59.排序好的大數據建立索引文件,並實現大文件的二分查找,根據索引百萬數據秒讀數據

  • 建立索引
    1 //建立索引
    2 struct index 3 { 4     //保存每行偏移的位置
    5     int *pindex; 6     //文件的總長度
    7     int length; 8 }allindex;//索引

     

  • 初始化索引數組並把索引寫入到文件
     1 //初始化索引數組,並把索引寫入到文件
     2 void init(char *path)  3 {  4     printf("\n索引數組開始分配");  5     allindex.length = N;  6     //分配內存
     7     allindex.pindex = calloc(N, sizeof(int));  8     printf("\n索引數組完成分配");  9 
    10     printf("\n開始讀取"); 11     //二進制讀取文件 避免/r/n讀取成/n
    12     FILE *pf = fopen("filesort.txt", "rb"); 13     if (pf == NULL) 14  { 15         return -1; 16  } 17     else
    18  { 19         int alllength = 0; 20         for (int i = 0; i < N; i++) 21  { 22             char str[50] = { 0 }; 23             fgets(str, 50, pf); 24             //每個首地址的偏移
    25             allindex.pindex[i] = alllength; 26 
    27             int length = strlen(str); 28             alllength += length; 29  } 30  fclose(pf); 31  } 32     printf("\n結束讀取"); 33 
    34     printf("\n開始寫入"); 35     //二進制方式打開文件,並寫入索引
    36     FILE *pfw = fopen("index.txt", "wb"); 37     //寫入
    38     fwrite(allindex.pindex, sizeof(int), allindex.length, pfw); 39     //關閉文件
    40  fclose(pfw); 41     printf("\n結束寫入"); 42 
    43     //釋放內存
    44  free(allindex.pindex); 45 }

     

  • 從文件中讀取索引到索引數組中
     1 //從文件中讀取索引
     2 void qucik()  3 {  4     printf("\n索引數組開始分配");  5     allindex.length = N;  6     allindex.pindex = calloc(N, sizeof(int));//分配內存
     7     printf("\n索引數組完成分配");  8 
     9 
    10     printf("\n開始讀取"); 11     //以二進制讀的方式讀取索引
    12     FILE *pfw = fopen("index.txt", "rb"); 13     //讀取
    14     fread(allindex.pindex, sizeof(int), allindex.length, pfw); 15     //關閉文件
    16  fclose(pfw); 17     printf("\n結束讀取"); 18 }

     

  • 測試函數
     1   FILE *pf1 = fopen("index.txt", "rb");  2     FILE *pf2 = fopen("filesort.txt", "rb");  3     while (1)  4  {  5         printf("\n請輸入要讀取的行數");  6         int num = 0;  7         scanf("%d", &num);  8 
     9         int indexnum = 0; 10         fseek(pf1, num*sizeof(int), SEEK_SET); 11         fread(&indexnum, sizeof(int), 1, pf1);//讀索引到indexnum
    12 
    13  fseek(pf2, indexnum, SEEK_SET); 14         char str[128] = { 0 }; 15         fgets(str, 128, pf2);//讀取
    16         printf("\n%s", str); 17 
    18  } 19  fclose(pf1); 20     fclose(pf2);

     

     

  • 根據索引文件對已經排序好的文件進行二分查找
     1 void binsearch(char *searchstr)  2 {  3     //頭部
     4     int tou = 0;  5     //尾部
     6     int wei = N - 1;  7     //是否找到的標識
     8     int flag = 0;  9     //若是頭小於尾
    10     while (tou <= wei) 11  { 12         //獲取中部
    13         int zhong = (tou + wei) / 2; 14         //讀取中部索引的內容
    15         char zhongstr[256] = { 0 }; 16  { 17             //打開索引文件
    18             FILE *pf1 = fopen("index.txt", "rb"); 19             //打開排序好的文件
    20             FILE *pf2 = fopen("filesort.txt", "rb"); 21 
    22             //讀zhong對應的地址存到indexnum中
    23             int indexnum = 0; 24             fseek(pf1, zhong * sizeof(int), SEEK_SET); 25             fread(&indexnum, sizeof(int), 1, pf1); 26 
    27             //根據讀取的位置讀取文件到zhongstr中
    28  fseek(pf2, indexnum, SEEK_SET); 29             fgets(zhongstr, 128, pf2); 30 
    31  fclose(pf1); 32  fclose(pf2); 33  } 34         //消除'\r或者\n'
    35  eatN(zhongstr); 36         char pnewzhongstr[256] = { 0 }; 37  sprintf(pnewzhongstr, zhongstr); 38         //進行處理,遇到-終止
    39  eatg(pnewzhongstr); 40         //比較是否找到
    41         int res = strcmp(pnewzhongstr, searchstr);//1 0 -1
    42         if (res == 0) 43  { 44             flag = 1; 45             printf("%s", zhongstr); 46             break; 47  } 48         //若是中比searchstr要大
    49         else if (res == 1) 50  { 51             wei = zhong - 1; 52  } 53         //若是中比searchstr小
    54         else
    55  { 56             tou = zhong + 1; 57  } 58 
    59 
    60  } 61     //判斷是否找到
    62     if (flag) 63  { 64         printf("\nfind"); 65  } 66     else
    67  { 68         printf("\n not find"); 69  } 70 }

     

  • 遇到'-'結束
     1 //遇到'-'結束
     2 void eatg(char *str)  3 {  4     while (*str!='\0')  5  {  6 
     7         if (*str=='-')  8  {  9             *str = '\0'; 10  } 11         str++; 12  } 13 
    14 }

     

  • 測試函數
     1 void main()  2 {  3 
     4 
     5     char str[256] = { 0 };  6     scanf("%s", str);  7  binsearch(str);  8 
     9 
    10 
    11     system("pause"); 12 }
相關文章
相關標籤/搜索