如何加速背完一套3600題的題庫？

時間 2021-08-13

標籤 python c++ 數組數據結構 app 測試編碼 spa code blog 欄目 Python 简体版

原文原文鏈接

剛剛入職就收到了一份3600題的題庫。知道本身的腦子不太好使，因而對題庫進行了一系列選項分析，減少了800多題的背題量並掌握了許多蒙題技巧:)python

 1 #include <bits/stdc++.h>
 2 using namespace std;
 3 int i=0;
 4 int main(){
 5     freopen("ansi.txt","r",stdin);
 6     freopen("extract_out.txt","w",stdout);
 7     char c[1200001];
 8     cout<<"T1:";
 9     int flag=2;//狀態開關 
10     bool flag1=false;//字母開關 
11     int k=1;
12     while(i<1200000){
13         c[++i]=getchar();
14         //判斷題型 
15         if (i>=4&&flag==2&&c[i-3]==-75&&c[i-2]==-91&&c[i-1]==-47&&c[i]==-95){
16             cout<<"(D1)";
17             flag=3;
18         }//單選 
19         if (i>=4&&flag==2&&c[i-3]==-74&&c[i-2]==-32&&c[i-1]==-47&&c[i]==-95) {
20             cout<<"(D2)";
21             flag=3;
22         }//多選 
23         if (i>=4&&flag==2&&c[i-3]==-59&&c[i-2]==-48&&c[i-1]==-74&&c[i]==-49) {
24             cout<<"(D3)";
25             flag=3;
26         }//判斷 
27         //記錄答案 
28         if (i>=6&&flag==3&&c[i-5]==-76&&c[i-4]==-16&&c[i-3]==-80&&c[i-2]==-72&&c[i-1]==-93&&c[i]==-70){
29             cout<<"ANS:"; flag=1;
30         }//答案 
31         //開題 
32         if (c[i]==10) {
33             cout<<'\n';
34             if (flag==1&&c[i-1]==10){
35                 k++;
36                 cout<<'T'<<k<<':';
37                 flag=2;//換兩行且flag狀態爲1代表一題已經結束 
38             }
39         }
40         //輸出價值字母 
41         if (c[i]>='A'&&c[i]<='F'&&flag1) cout<<c[i];
42         else flag1=false;
43         if (c[i]==10||(c[i-3]==-80&&c[i-2]==-72&&c[i-1]==-93&&c[i]==-70)) flag1=true;
44     }
45     return 0;
46 }

提取完後將關鍵信息導出至文本文檔中，大概長這樣：

step 2.選項分佈及正確率統計

這一步主要統計了各個題型的題目數量，計算了各題型中各個選項的正確機率，以及多選題中正確選項數量的機率。（發現多選題全選機率超過40%hhh）因爲多選題通常包含四個選項，所以我單獨對四個選項的多選題的各類答案數量進行了直接彙總。

首先須要給數據創建合適的存儲結構。

單選題數據結構：

二維數組dan_count：dan_count_i0表示包含i個選項的題目有幾題；dan_count_ij表示包含i個選項的題目中，第j個選項正確的題目有幾題。

各選項正確機率：dan_count_ij / dancount_i0

多選題數據結構：

二維數組duo_count：duo_count_i0表示包含i個選項的題目有幾題；duo_count_ij表示包含i個選項的題目中，第j個選項正確的題目有幾題。

二維數組duo_numcount：duo_numcount_ij表示包含i個選項的題目中，有j個選項正確的題目有幾題。

二維數組duo_enumcount：用於專門統計四個選項的題目：i爲多選題13種可能（AB AC AD BC BD CD ABC ABD ACD BCD ABCD）對應的二進制，duo_enumcount_ij表示各狀況下各項正確個數。

判斷題數據結構：

pan_a：統計判斷題正確數量

pan_b：統計判斷題錯誤數量

源代碼：

  1 #include<bits/stdc++.h>
  2 using namespace std;
  3 int pan_a=0,pan_b=0;
  4 int dan_count[10][10]={0};
  5 int duo_count[10][10]={0},duo_numcount[10][10]={0},duo_enumcount[10][10]={0};
  6 int dan=0,duo=0,pan=0;
  7 int k=0; 
  8 char c[21]={0};
  9 void get_char(){
 10     for (int i=1;i<=19;i++) c[i]=c[i+1];
 11     c[20]=getchar();
 12 }
 13 
 14 void danxuan(){
 15     int j=0;
 16     while (c[20]!=':'){
 17         get_char();
 18         if ('A'+j==c[20]) j++;
 19     }
 20     dan_count[j][0]++;
 21     get_char();
 22     dan_count[j][c[20]-'A'+1]++;
 23     return;
 24 }
 25 
 26 void duoxuan(){
 27     int j=0,t=0,x=0;
 28     while (c[20]!=':'){
 29         get_char();
 30         if ('A'+j==c[20]) j++;
 31     }
 32     duo_count[j][0]++;
 33     while (c[20]!='\n'){
 34         get_char();
 35         if (c[20]>='A'&&c[20]<='F'){
 36             duo_count[j][c[20]-'A'+1]++;
 37             t++;
 38             x=x+(int)(pow(2,c[20]-'A'));
 39         }
 40     }
 41     duo_numcount[j][t]++;
 42     duo_enumcount[j][x]++;
 43     return;
 44 }
 45 
 46 void panduan(){
 47     while (1){
 48         get_char();
 49         if (c[19]==':'){
 50             if (c[20]=='A') pan_a++;
 51             else pan_b++;
 52             return;
 53         }
 54     }
 55 }
 56 
 57 void print(){
 58     cout<<"單選題總數："<<dan<<endl; 
 59     for (int i=2;i<=4;i++){
 60         cout<<"有"<<i<<"個選項的題目數量爲"<<dan_count[i][0]<<endl;
 61         for (int j=1;j<=i;j++) printf("選%c的機率：%.2f%c ",j+'A'-1,(double)dan_count[i][j]/dan_count[i][0]*100,'%');
 62         printf("\n");
 63         printf("\n");
 64     } 
 65     cout<<endl; 
 66     cout<<"多選題總數："<<duo<<endl; 
 67     for (int i=3;i<=6;i++){
 68         cout<<"有"<<i<<"個選項的題目數量爲"<<duo_count[i][0]<<endl;
 69         for (int j=1;j<=i;j++) 
 70         printf("%c正確的機率：%6.2f%c ",j+'A'-1,(double)duo_count[i][j]/duo_count[i][0]*100,'%');
 71         printf("\n");
 72         for (int j=1;j<=i;j++) 
 73         if (duo_numcount[i][j])
 74         printf("%d個選項正確的機率：%6.2f%c ",j,(double)duo_numcount[i][j]/duo_count[i][0]*100,'%');
 75         printf("\n"); 
 76         printf("\n");
 77     } 
 78     cout<<endl;
 79     cout<<"判斷題總數："<<pan<<endl; 
 80     cout<<"判斷題正確機率：";
 81     printf("%.2f %\n",((double)pan_a/(pan_a+pan_b)*100));
 82     cout<<"判斷題錯誤機率：";
 83     printf("%.2f %\n",((double)pan_b/(pan_a+pan_b)*100));
 84     cout<<endl;
 85     printf("特別統計（四個選項的多選題選項分佈）\n");
 86     printf("選項  次數\n");
 87     for (int i=1;i<=15;i++){
 88         int t=0;
 89         for (int j=0;j<4;j++)
 90         if ((i>>j)&1) cout<<(char)('A'+j);
 91         else t++;
 92         t+=2;
 93         while (t){
 94             t--;
 95             cout<<' ';
 96         }
 97         cout<<duo_enumcount[4][i]<<endl;
 98     }
 99     return;
100 }
101 
102 int main(){
103     freopen("extract_out.txt","r",stdin);
104     freopen("選項統計結果.txt","w",stdout); 
105     while (dan+duo+pan<3600){
106         get_char(); 
107         if (c[20]=='1'&&c[19]=='D') danxuan(),dan++;
108         if (c[20]=='2'&&c[19]=='D') duoxuan(),duo++;
109         if (c[20]=='3'&&c[19]=='D') panduan(),pan++;
110         //if (k!=dan+duo+pan) cout<<k<<endl;
111     } 
112     print();
113     return 0;
114 }

step 3.精華選項提取

這一步前後提取了(1)單選題和判斷題中的正確選項 (2)非全選的多選題。

這套題庫的多選題共計 713 題，其中 309 題答案爲全選，另外 404 題答案爲非全選。因此說背完404題就掌握了713題......

提取選項須要掃兩遍題庫：

（1）第一遍掃描每道題的答案，在每道題須要的輸出的選項上打上標記。

ans[k][c[20]-'A'+1]=1;//第k題的當前選項是正確

（2）第二遍掃描標記，進行輸出。

源代碼：

 1 #include<bits/stdc++.h>
 2 using namespace std;
 3 int ans[4000][7]={0};
 4 int k=0; 
 5 char c[21]={0};
 6 
 7 void get_char(){
 8     for (int i=1;i<=19;i++) c[i]=c[i+1];
 9     c[20]=getchar();
10 }
11 
12 int main(){
13     freopen("extract_out.txt","r",stdin);
14     freopen("answer.txt","w",stdout);
15     //提取答案 
16     while (k<3600){
17         while (c[20]!='T') get_char(); 
18         k++;
19         while (c[20]!=':'||c[19]!='S') get_char();
20         get_char();
21         while (c[20]>='A'&&c[20]<='F'){
22             ans[k][c[20]-'A'+1]=1;//第k題的該選項是對的 
23             get_char();
24         }
25     }
26     //提取正確選項 
27     freopen("ansi.txt","r",stdin);
28     k=1;
29     int num=0;
30     while (k<=3600){
31         get_char();
32         if (c[20]>='0'&&c[20]<='9'){
33             num=num*10+c[20]-'0';
34         }//讀出題號 
35         else {
36             if (num==k){
37                 cout<<'T'<<num<<':';
38                 while (ans[k][1]||ans[k][2]||ans[k][3]||ans[k][4]||ans[k][5]||ans[k][6]){//已經沒有可讀選項時退出 
39                     get_char();
40                     if (c[20]>='A'&&c[20]<='F'&&c[19]=='\n'){
41                         if (ans[k][c[20]-'A'+1]){
42                             cout<<c[20];
43                             ans[k][c[20]-'A'+1]=0;//該選項已經提取完畢 
44                             char tc=0;
45                             while (tc!='\n'){
46                                 tc=getchar();
47                                 cout<<tc; 
48                             }//輸出選項內容 
49                             c[20]='\n';
50                         }
51                     }
52                 } 
53                 k++;
54             }
55             num=0;
56         }
57     }
58     return 0;
59 }

step 4.選項出現詞彙正確率統計

最後一步將全部題目的正確選項進行了提取分析。經過各個詞彙在正確選項中出現率和在全文的出現率的對比，得出了195個正確率100%的高頻詞彙和366個正確率大於等於80%的高頻詞彙。

實現這一步須要用到python的jieba分詞了（被迫妥協）。用ANSI編碼導入全部選項與全部正確選項，根據全部選項的分詞創建總詞彙典dict，統計兩個文件出現詞彙次數至cnt(cnts)，計算出現率，排序，輸出。

 1 import jieba
 2 fs=open("item.txt","r",encoding="ansi")
 3 txts=fs.read()
 4 words=jieba.lcut(txts)
 5 #建立詞彙典
 6 dict=[]
 7 k=0
 8 for wrd in words:
 9     if dict.count(wrd)==0:
10         dict.append(wrd)
11 #統計兩個文件出現詞彙次數至cnt(s)
12 cnts={}
13 for wrd in dict:
14     cnts[wrd]=cnts.get(wrd, txts.count(wrd,0,len(txts)))
15 f=open("answer.txt","r",encoding="ansi")
16 txt=f.read()
17 cnt={}
18 for wrd in dict:
19     cnt[wrd]=cnt.get(wrd, txt.count(wrd,0,len(txt)))
20 #計算出現率
21 com={}
22 for wrd in cnt.keys():
23     com[wrd]=com.get(wrd, cnt[wrd]/cnts[wrd])
24 items=list(com.items())
25 #排序
26 items.sort(key=lambda x:x[1], reverse=True)
27 for wrd,i in items:
28     if i<0.8:
29         break
30     maxwrd=wrd
31     for wrd2,j in items:
32         if (j==i and cnts[wrd2]>cnts[maxwrd]):
33             maxwrd=wrd2
34         if j<i:
35             break
36     if (cnts[maxwrd]>2 and maxwrd[0]!='T' and (maxwrd[0]<'0' or maxwrd[0]>'9')):
37         print(maxwrd," 出現總次數：",cnts[maxwrd],' 正確機率：',i*100,'%')
38     cnts[maxwrd]=0