Data Structures and Algorithms/Sorting Data/Bucket Sort in C

/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *  *   This is a demo program to show how good BucketSort can be. * *                                                                        * * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * *   Copyright (C) 2010 by Ingenieurbüro Henning, Kiel, Germany            * * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * *                                                                        * *   This program is free software; you can redistribute it and/or modify  * *  it under the terms of the GNU General Public License as published     * *  by the Free Software Foundation. * *                                                                        * *   This program is distributed in the hope that it will be useful,       * *  but WITHOUT ANY WARRANTY; without even the implied warranty of        * *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the        * *  GNU General Public License for more details. * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */


 * 1) include 


 * 1) define MAXKEYS   100000   // max. number of data records
 * 2) define MAXWLNG       25   // max. length of the words in the data record
 * 3) define MAXBUCKETS    20   // max. number of buckets

struct FiAd      // register of buckets with the star-points in linked lists { int  StartIndex;// element in linked list where the bucket starts int LastIndex; // last element of the bucket (to find it fast) int NumInFifo; // number of references in the fifo };

struct FiFo      // structure of the linked lists used as bucket { int  PosIndex;  // where in the table can this element be found int Next;      // pointer to next element in this bucket };

struct DataRec   // the structure of the test data { char   TheChar; short TheShint; int   TheInt; float TheFloat; double TheDouble; int   ThePos; int   TheLength; char  TheKey[MAXWLNG + 1]; };

// local subroutines void BuckSortInt (struct DataRec* Source, struct FiFo* InFif, int Start,                int NumLines, int NumBuckets, int* NewPosis, int* NewPosNr); void WriteText  (char * TextName);

struct DataRec DataRecs  [MAXKEYS];          // the input list (unsorted) struct DataRec SortedRecs [MAXKEYS];         // the sorted output list int   NumRecs;                               // number of data records

// the input and output files; you have to adapt them to your needs char In_ListN_1 [40] = "/home/user/EinList.bin"; char OutTextT_2 [40] = "/home/user/BucketSort.txt";

int main(int argc, char *argv[]) { struct FiFo Positions  [MAXKEYS];           // the sequence of records int        NewPosis   [MAXKEYS];           // where it is now int        RecLng = sizeof(struct DataRec);// size of one data record int        PosNr  = 0;                     // position to store result int        I, LastInt, NoBuckets; FILE       *fp;

NumRecs  = 100000;                         // number of records to sort NoBuckets = 7;                             // number of recursion levels

// we get the input data which we want to sort fp = fopen(In_ListN_1, "rb"); if(fp != NULL) {   fread(DataRecs, NumRecs * sizeof(struct DataRec), 1, fp); fclose(fp); }  else {   printf("something is wrong with the file\n"); return; };

// we set up a linked list with references to all records for(I = 0; I < NumRecs; I++)               // filling linked list {   Positions[I].PosIndex = I;                // record x is at position x    Positions[I].Next     = I + 1;            // pointer to the next element }; Positions[I - 1].Next   = -1;               // end of the linked list

// we indirectly sort the data records; correct sequence will be in NumRecs BuckSortInt(DataRecs, Positions, 0, NumRecs, NoBuckets, NewPosis, &PosNr);

// we copy the records in correct order to the array SortedRecs for(I = 0; I < NumRecs; I++) {   memmove(&SortedRecs[I], &DataRecs[NewPosis[I]], RecLng); };

// and we check for correct sorting LastInt = 0x80000000; for(I = 0; I < NumRecs; I++) {   if(SortedRecs[I].TheInt < LastInt) {     printf("sorting not correct\n"); return; };   LastInt = SortedRecs[I].TheInt; };

printf("sorting correct\n"); WriteText(OutTextT_2); }

void BuckSortInt(struct DataRec* Source, struct FiFo* InFif, int Start,                 int NumLines, int NumBuckets, int* NewPosis, int* NewPosNr) { int I, K, Index, LastLast, FirstFree, Next; int StepSize, MinVal, MaxVal, ActVal; long long Interval; int BuckBorder[MAXBUCKETS];                // border values between buckets struct FiAd* FifoAdmin;                    // the fifo administration struct FiFo* Fifo;                         // the left fifo

if(NumBuckets > MAXBUCKETS) return;

// we reserve space for the chained lists, depending on length of table FifoAdmin = (struct FiAd*) malloc(NumBuckets * sizeof(struct FiAd)); Fifo     = (struct FiFo*) malloc(NumLines   * sizeof(struct FiFo));

for(K = 0; K < NumBuckets; K++) {   FifoAdmin[K].StartIndex = FifoAdmin[K].LastIndex  = -1; FifoAdmin[K].NumInFifo = 0; };

FirstFree = 0;                             // first free element

// now we find the smallest and biggest element for the actual bucket MinVal = 0x7FFFFFFF;                       // min. value to biggest value MaxVal = 0x80000000;                       // max. value to smallest value Next  = Start;                             // this is the next element while(Next >= 0)                           // while bucket is not empty ... {   Index = InFif[Next].PosIndex;             // next record of this bucket ActVal = Source[Index].TheInt;           // the key value of the record if(ActVal < MinVal) MinVal = ActVal;     // key smaller => swap value if(ActVal > MaxVal) MaxVal = ActVal;     // key bigger  => swap value Next = InFif[Next].Next;                 // ponter to next element };

// only identical key values => the record numbers go directly to the result if(MinVal == MaxVal)                       // all keys identical ? {   Next   = Start;                           // this is the next element while(Next >= 0)                         // while bucket is not empty ... {     NewPosis[NewPosNr[0]] = InFif[Next].PosIndex;// final position of record NewPosNr[0]++;                         // we point to free element Next = InFif[Next].Next;               // next element of linked list };   };

// now we make the border-array ready Interval = (long long) MaxVal - (long long) MinVal; // we get the domain StepSize = Interval / NumBuckets;          // makes chunks of this size for(I = NumBuckets - 1; I > 0; I--)        // loop over the buckets {   BuckBorder[I] = MaxVal - StepSize;        // lower border of the bucket MaxVal = MaxVal - StepSize;              // lower border for next bucket }; BuckBorder[I] = MinVal;                     // this way is save

//now we distribute the records of the input bucket to the new sub-buckets Next  = Start;                             // this is the next element while(Next >= 0)                           // while bucket is not empty ... {   Index = InFif[Next].PosIndex;             // next record of this bucket ActVal = Source[Index].TheInt;           // key value of the record for(I = NumBuckets - 1; I >= 0; I--) {      if(ActVal >= BuckBorder[I])            // pointing to corrrect bucket? {        if(FifoAdmin[I].StartIndex < 0)      // already entries? {          // this bucket is used the first time; we start the new linked list FifoAdmin[I].StartIndex = FirstFree; // start and end FifoAdmin[I].LastIndex  = FirstFree; // are identical FifoAdmin[I].NumInFifo  = 1;      // one element in Fifo Fifo[FirstFree].PosIndex = Index; // number of record to list Fifo[FirstFree].Next    = -1;     // no further elements in list FirstFree++; }         else {          // this bucket was already used; we add the reference LastLast                = FifoAdmin[I].LastIndex; // last entry FifoAdmin[I].NumInFifo++;         // one element more in fifo FifoAdmin[I].LastIndex  = FirstFree; // new last element Fifo[LastLast].Next     = FirstFree; // linking ex-last element Fifo[FirstFree].PosIndex = Index; // the record number Fifo[FirstFree].Next    = -1;     // the linked list ends here FirstFree++; };        break;                               // record distributed };      };    Next = InFif[Next].Next;                  // next record of actual bucket };

// we make the recursion and call this function for any subbucket for(I = 0; I < NumBuckets; I++) {   if(FifoAdmin[I].NumInFifo > 1) BuckSortInt(Source, Fifo, FifoAdmin[I].StartIndex,                  FifoAdmin[I].NumInFifo, NumBuckets, NewPosis, NewPosNr); else if(FifoAdmin[I].NumInFifo == 1) {       NewPosis[NewPosNr[0]] = Fifo[FifoAdmin[I].StartIndex].PosIndex; NewPosNr[0]++; }   };

// we must free the memory we got from the operating system free(Fifo); free(FifoAdmin); };

void WriteText(char * TextName) { FILE *fp; int I; char   TChar; short TShint; int   TInt; float TFloat; double TDouble; int   TPos; int   TLength;

fp = fopen(TextName, "wb"); if(fp != NULL) {   for(I = 0; I < NumRecs; I++) {     TChar   = SortedRecs[I].TheChar; TShint = SortedRecs[I].TheShint; TInt   = SortedRecs[I].TheInt; TFloat = SortedRecs[I].TheFloat; TDouble = SortedRecs[I].TheDouble; TPos   = SortedRecs[I].ThePos; TLength = SortedRecs[I].TheLength; fprintf(fp, "%6d %6d  %12d  %14e  %14e  %6d   %4d  %s\n", TChar,       TShint, TInt, TFloat, TDouble, TPos, TLength, SortedRecs[I].TheKey); };   fclose(fp); }  else {   printf("Ausgabedatei %s kann nicht angelegt werden\n", TextName); return; }; };