/* This program takes a word and index of it's position and encodes into a posting list using Simple-9 compression algorithm and decodes back. */ #include "stdafx.h" #include #include #include "stdlib.h" #include #include #include using namespace std; //to stroe the word and its loaction id's Δ-vales typedef struct deltaValues { int value; int count; struct deltaValues* next; }delta; //stores the encodedBitsd 32 bits typedef struct encodedBits { unsigned char value[4]; struct encodedBits* next; }encodedBits; //Functions delta* insertNode(char * input, delta * nums); encodedBits* insertMultiple(int num, int cnt[], int input[], encodedBits * posting); encodedBits* insertAsOne( int insertValue, encodedBits * posting); unsigned char reverse(unsigned char c, int bit); encodedBits* insertEncode(encodedBits* newPtr, encodedBits* posting); void decode(encodedBits* posting); int findingSelector(int linkCount, int cnt[]); int selector(int linkCount, int cnt[]); int main() { delta * nums=NULL; // stores the id's Δ-vales encodedBits * posting=NULL; // stores the converted 32 bit posting list string word; char* input; char buf[256]; int value [28];int cnt[28]; int linkCount = 0; int simple=0; int displayValue=0; int s9[9]={1,2,3,4,5,7,9,14,28}; //Simple-9 selector values printf("Enter the word and it's index postions: "); cin >> word; if (fgets(buf, sizeof(buf), stdin) != NULL) { input=strtok(buf," "); while(input !=NULL) { nums=insertNode(input,nums); linkCount++; input = strtok (NULL, " "); } } // Converts the id's into 32 bit chunks according to Simple-9 algorithm int i=0; int max; delta* ptr = nums; while(nums!=NULL && linkCount>0) { ptr = nums; if(linkCount>=28) max =28; else max = linkCount; for(i=0;ivalue; cnt[i]=ptr->count; ptr = ptr->next; } simple = findingSelector(linkCount, cnt); if(simple) posting = insertMultiple(simple, cnt, value, posting); else posting = insertAsOne( value[0], posting); linkCount = linkCount - s9[simple]; for(i=0; ivalue; } else displayValue=displayValue+nums->value+1; cout<<" "<< displayValue ; nums= nums->next; } cout << ";"; } // Displays the posting list in bits cout<< endl< x(temp->value[i]); std::cout << x; } cout << " "; temp = temp->next; } // Decoding back to original word and its location id's cout<< endl<< endl<< "Decode: " << endl; cout << endl<< word <<": "; decode(posting); getch(); return 0; } /* Taking maximum of 28 index postions of the word and returns it's appropriate selector by calling the function selector */ int findingSelector(int linkCount, int cnt[]) { int simple=0; if(linkCount>=28) { simple = selector(28, cnt); } else if(linkCount>=14) { simple = selector(14, cnt); } else if(linkCount>=9) { simple = selector(9, cnt); } else if(linkCount>=7) { simple = selector(7, cnt); } else if(linkCount>=5) { simple = selector(5, cnt); } else if(linkCount>=4) { simple = selector(4, cnt); } else if(linkCount>=3) { simple = selector(3, cnt); } else if(linkCount>=2) { simple = selector(2, cnt); } else { simple=0; } return simple; } /* This method finds the selector that can be used to store the bits */ int selector(int linkCount, int cnt[]) { int counter[8]={28,14,9,7,5,4,3,2}; int check[8]={1,2,3,4,5,7,9,14}; int flag=0,i;int simple=0; switch(linkCount) { case 28:i=0;break; case 14:i=1;break; case 9: i=2;break; case 7: i=3;break; case 5: i=4;break; case 4: i=5;break; case 3: i=6;break; case 2: i=7;break; } for( i; i<8;i++) { flag=0; for(int j=0;jcheck[i]) { flag=-1; break; } } if(flag!=-1) { switch(counter[i]) { case 28: simple=8;break; case 14: simple=7;break; case 9: simple=6;break; case 7: simple=5;break; case 5: simple=4;break; case 4: simple=3;break; case 3: simple=2;break; case 2: simple=1;break; } return simple; } } return simple; } //Function to insert a new node with Δ-vales delta* insertNode(char* input, delta * nums) { int previous=0,count=0,x; if(nums==NULL) { nums = (delta*)malloc(sizeof(delta)); nums->value=stoi(input); x= nums->value; while(x>0) { x=x/2; count++; } nums->count=count; nums->next=NULL; } else { delta* ptr = nums;previous =0; while(ptr->next!=NULL) { previous += ptr->value+1; ptr = ptr->next; } previous += ptr->value; ptr->next = (delta*)malloc(sizeof(delta)); ptr->next->value=stoi(input)-previous-1; if(ptr->next->value<0) { cout<< "id's can not be in descending order"; exit(0); } x= ptr->next->value; while(x>0) { x=x/2; count++; } ptr->next->count=count; ptr->next->next=NULL; } return nums; } //Takes an array of id's and its selector value and converts them into 32 bits ( 4 bits as selector) encodedBits* insertMultiple(int num, int cnt[], int input[], encodedBits * posting) { int simple[9] = {0,1,2,3,4,6,8,13,27}; int unUsed[8] = {0,1,0,3,0,1,0,0}; int usedBits[8] = {14,9,7,5,4,3,2,1}; unsigned char c=0, c1; int bit=0, bit2=0; //creating a new node encodedBits * newPtr = (encodedBits*)malloc(sizeof(encodedBits)); int x= input[simple[num]], bitlength=0, j=3; int size=simple[num];int k=1; if(num==2||num==4||num==6) { while(bitlength!=unUsed[num-1]) { bit = bit*10+1; bitlength++; } } while(x>=0 && size>=0&&bitlength<28) { bit = bit*10+x%2+1; bitlength++; x=x/2; if(bitlength%8==0) { c= reverse(c,bit); newPtr->value[j--]= c; c=0; bit=0; } if(x==0 && size>=0) { if((cnt[size]<8 && num==1)||(cnt[size]<7 && k==1 && num==2)) { bitlength = 8; } if((cnt[size]<5 && k==1 && num==4)) { newPtr->value[j--]= reverse(c,bit); bitlength = 8; bit=0; } if(bitlength==24 && num==6) { bit = bit*10+1; bitlength++; } while(bitlength < ((usedBits[num-1]*k)+unUsed[num-1])) { if(bitlength%8==0) { c= reverse(c,bit); newPtr->value[j--]= c; c=0; bit=0; } bit = bit*10+1; bitlength++; } if(bitlength%8==0 && bit!=0) { c= reverse(c,bit); newPtr->value[j--]= c; c=0; bit=0; } x= input[--size]; k++; } } c1 = num; if(bit>0) c1=c1<<1; c1 = reverse(c1,bit); bit=0; newPtr->value[j--]= c1; newPtr->next=NULL; //inserting a new node posting = insertEncode( newPtr, posting); return posting; } //Takes an id and converts as a 28 bit and 4 bit selector'0000' encodedBits* insertAsOne( int insertValue, encodedBits * posting) { unsigned char c=0; int bit=0,j=3, bitlength=0; encodedBits * newPtr = (encodedBits*)malloc(sizeof(encodedBits)); int x=insertValue; while(j>=0 && bitlength!=28) { bit = bit*10+x%2+1; bitlength++; x=x/2; if((bitlength%8)==0) { c=reverse(c, bit); bit=0; newPtr->value[j--]= c; c=0; } } c=reverse(c, bit); bit=0; newPtr->value[0]= c; newPtr->next=NULL; posting = insertEncode(newPtr, posting); return posting; } //Inserts a new node of compressed 32bits encodedBits* insertEncode(encodedBits* newPtr, encodedBits* posting) { if(posting==NULL) { posting = newPtr; } else { encodedBits* ptr = posting; while(ptr->next!=NULL) ptr = ptr->next; ptr->next = newPtr; } return posting; } // Takes the list of encoded bits and converts back into the original indexes of the word void decode(encodedBits* posting) { int docid=0; while(posting!=NULL) { unsigned char c = posting->value[0]; int bit, num=0, i,j=0,k=7, x=0; while(num<4) { c=c>>1; num++; } bit =c; switch(bit) { case 0: x=28;break; case 1: x=14;break; case 2: x=9;break; case 3: x=7;break; case 4: x=5;break; case 5: x=4;break; case 6: x=3;break; case 7: x=2;break; case 8: x=1;break; } c= posting->value[j++]; int decodeNum=0,flag=0,term=1; unsigned char dChar,temp; for(i=0; i<28; i++) { if(i==4&&flag==0) {i=0; decodeNum=0;flag=1;} temp = c &(1<value[j++]; k=8; } k--; if(temp!=0) decodeNum = decodeNum+(int)pow(2.0,((term*x)-1-i))*1; if(x==1 && i==0 && flag) { cout<0 && flag) { if(docid==0) { cout<next; } } /*invert the codes so they can be used with mod operator */ unsigned char reverse(unsigned char c, int bit) { int bit2; while(bit>0) { bit2 = bit%10 -1; bit = bit/10; c=c|bit2; if(bit>0) c= c<<1; } return c; }