#define MIN(X,Y) (X>=Y?Y:X) /** 浮點的加法運算轉換爲定點的加法運算 * author : zgr 2014-04-03 * 1. 內存中符號位+階碼+尾數的浮點表示法轉換爲符號位+整數部分+尾數部分的表示法 * 2. 找到合適的Q值,並取較大的Q值,而後經過移位操做轉爲同一Q值的定標數 * 3. 作定標數的加減運算 * 4. 將定標數的結果根據其Q值,轉換爲float的表示形式(符號位+階碼+尾數) */ float inter_fadd(float x1, float x2){ int Q1, Q2, Q; int xq1, xq2, xq; int ipart, exponent1, exponent2, n, index; int effective1, effective2; if(((*(int*)&x1)&0x7f800000)==0x00000000){ //階碼爲0,以0計 effective1 = 0; ipart = 0; exponent1 = 30; } else if(((*(int*)&x1)&0x7f800000)==0x7f800000){ //放棄非法數值或者無窮大或無窮小的數值 return 0.0f; } else { effective1 = ipart = (((*(int*)&x1)&0x7fffff) | 0x800000); //取出有效數字部分 1.XXXXXXXXXXX exponent1 = 150 - ((*(int*)&x1)>>23 & 0xff); //經過移位截斷小數部分,保留整數部分 if(exponent1 < 0) ipart = ipart << (-exponent1); else if(exponent1>31) ipart = ipart >> 31; else ipart = ipart >> exponent1; } //計算合適的Q值,這裏取最長30位存放有效數字,作到最大的精確度,而不溢出 for(n=0; n<31; n++){ if(ipart < (1<<n)){ Q1 = 30 - n; break; } } if(((*(int*)&x2)&0x7f800000)==0x00000000){ effective2 = 0; ipart = 0; exponent2 = 30; } else if(((*(int*)&x2)&0x7f800000)==0x7f800000){ return 0.0f; } else { effective2 = ipart = (((*(int*)&x2)&0x7fffff) | 0x800000); exponent2 = 150 - ((*(int*)&x2)>>23 & 0xff); if(exponent2 < 0) ipart = ipart << (-exponent2); else if(exponent2>31) ipart = ipart >> 31; else ipart = ipart >> exponent2; } for(n=0; n<31; n++){ if(ipart < (1<<n)){ Q2 = 30 - n; break; } } //取較小Q值,保證不溢出 Q = MIN(Q1, Q2); //經過移位,擴展至一樣的倍數 if(exponent1>=0 && (exponent1-Q)>=0){ xq1 = effective1>>(exponent1-Q); }else{ xq1 = effective1<<(Q-exponent1); } if(exponent2>=0 && (exponent2-Q)>=0){ xq2 = effective2>>(exponent2-Q); }else{ xq2 = effective2<<(Q-exponent2); } //定標運算 xq = xq1 + xq2; //找到最高有效位的索引,用於浮點存儲 n = -1; for(index=0; index<31; index++){ if(xq & (1<<index)){ n = index; } } if(n == -1) //全爲0,則爲0.0f return 0.0f; else{ exponent1 = (n-Q) + 127; //計算階碼 //保留23位尾數 if(n-23>=0){ xq = xq>>(n-23); }else{ xq = xq<<(23-n); } //保留符號位,拼接階碼與尾數 xq = (xq&0x807fffff)|((exponent1&0xff)<<23); return *(float*)&xq; } } float my_fadd(float x1, float x2){ float res; int temp1, temp2; if(!(((*(int*)&x1)&0x80000000)) && !(((*(int*)&x2)&0x80000000))){ //if x1>0 && x2>0 then x1+x2 res = inter_fadd(x1, x2); }else if((((*(int*)&x1)&0x80000000)) && (((*(int*)&x2)&0x80000000))){ //if x1<0 && x2<0 then -(|x1| + |x2|) temp1 = (*(int*)&x1)&0x7fffffff; temp2 = (*(int*)&x2)&0x7fffffff; res = inter_fadd(*(float*)&temp1, *(float*)&temp2); temp1 = (*(int*)&res) ^ 0x80000000; //符號取反 res = *(float*)&temp1; }else if(!(((*(int*)&x1)&0x80000000)) && (((*(int*)&x2)&0x80000000))){ //if x1>0 && x2<0 then x1 - |x2| temp2 = (*(int*)&x2)&0x7fffffff; res = inter_fminus(x1, *(float*)&temp2); }else if((((*(int*)&x1)&0x80000000)) && !(((*(int*)&x2)&0x80000000))){ //if x1<0 && x2>0 then x2 - |x1| temp1 = (*(int*)&x1)&0x7fffffff; res = inter_fminus(x2, *(float*)&temp1); }else{ res = 0.0f; } return res; }
這裏默認用的是32位的float存儲,其具體規則能夠看個鏈接http://bbs.chinaunix.net/thread-3746530-1-1.html html
將浮點數用32位的定點形式表示。.net
將兩個數經過移位擴大至相同的倍數。unix
定點加法運算。code
將上一步的運算結果還原爲浮點表示。htm