常見常數優化以及代碼風格

時間 2019-12-07

標籤常見常數優化以及代碼風格简体版

原文原文鏈接

常見常數優化以及代碼風格等

來源:html

沒有開啓 -O2 優化

編譯器沒有開 -O2 優化,考慮從新實現庫函數linux

isdigit()
max() / min()
unique() / lower_bound() / upper_bound()
IO:
- C: scanf()/printf
- C++: cin / cout
STL
- queue
- stack
- priority_queue /heap
- deque
...

IO優化

src:http://www.cnblogs.com/qscqesze/p/5736748.htmlc++

fread讀入掛

const int BUF=40000000;
char Buf[BUF],*buf=Buf;
const int OUT=20000000;
char Out[OUT],*ou=Out;int Outn[30],Outcnt;
inline void write(int x){
  if(!x)*ou++=48;
  else{
    for(Outcnt=0;x;x/=10)Outn[++Outcnt]=x%10+48;
    while(Outcnt)*ou++=Outn[Outcnt--];
  }
}
inline void writell(ll x){
  if(!x)*ou++=48;
  else{
    for(Outcnt=0;x;x/=10)Outn[++Outcnt]=x%10+48;
    while(Outcnt)*ou++=Outn[Outcnt--];
  }
}
inline void writechar(char x){*ou++=x;}
inline void writeln(){*ou++='\n';}
inline void read(int&a){for(a=0;*buf<48;buf++);while(*buf>47)a=a*10+*buf++-48;}


//這句話放到代碼中
fread(Buf,1,BUF,stdin);

//樣例
#include<bits/stdc++.h>
using namespace std;
typedef long long ll;
const int BUF=40000000;
char Buf[BUF],*buf=Buf;
const int OUT=20000000;
char Out[OUT],*ou=Out;int Outn[30],Outcnt;
inline void write(int x){
  if(!x)*ou++=48;
  else{
    for(Outcnt=0;x;x/=10)Outn[++Outcnt]=x%10+48;
    while(Outcnt)*ou++=Outn[Outcnt--];
  }
}
inline void writell(ll x){
  if(!x)*ou++=48;
  else{
    for(Outcnt=0;x;x/=10)Outn[++Outcnt]=x%10+48;
    while(Outcnt)*ou++=Outn[Outcnt--];
  }
}
inline void writechar(char x){*ou++=x;}
inline void writeln(){*ou++='\n';}
inline void read(int&a){for(a=0;*buf<48;buf++);while(*buf>47)a=a*10+*buf++-48;}

int main(){
    fread(Buf,1,BUF,stdin);
    int a,b;
    read(a),read(b);
    write(a+b);
    writeln();
    fwrite(Out,1,ou-Out,stdout);
}

通常讀入掛

inline ll read()
{
    int x=0,f=1;char ch=getchar();
    while(ch<'0'||ch>'9'){if(ch=='-')f=-1;ch=getchar();}
    while(ch>='0'&&ch<='9'){x=x*10+ch-'0';ch=getchar();}
    return x*f;
}

超神讀入掛

namespace fastIO{
    #define BUF_SIZE 100000
    #define OUT_SIZE 100000
    #define ll long long
    //fread->read
    bool IOerror=0;
    inline char nc(){
        static char buf[BUF_SIZE],*p1=buf+BUF_SIZE,*pend=buf+BUF_SIZE;
        if (p1==pend){
            p1=buf; pend=buf+fread(buf,1,BUF_SIZE,stdin);
            if (pend==p1){IOerror=1;return -1;}
            //{printf("IO error!\n");system("pause");for (;;);exit(0);}
        }
        return *p1++;
    }
    inline bool blank(char ch){return ch==' '||ch=='\n'||ch=='\r'||ch=='\t';}
    inline void read(int &x){
        bool sign=0; char ch=nc(); x=0;
        for (;blank(ch);ch=nc());
        if (IOerror)return;
        if (ch=='-')sign=1,ch=nc();
        for (;ch>='0'&&ch<='9';ch=nc())x=x*10+ch-'0';
        if (sign)x=-x;
    }
    inline void read(ll &x){
        bool sign=0; char ch=nc(); x=0;
        for (;blank(ch);ch=nc());
        if (IOerror)return;
        if (ch=='-')sign=1,ch=nc();
        for (;ch>='0'&&ch<='9';ch=nc())x=x*10+ch-'0';
        if (sign)x=-x;
    }
    inline void read(double &x){
        bool sign=0; char ch=nc(); x=0;
        for (;blank(ch);ch=nc());
        if (IOerror)return;
        if (ch=='-')sign=1,ch=nc();
        for (;ch>='0'&&ch<='9';ch=nc())x=x*10+ch-'0';
        if (ch=='.'){
            double tmp=1; ch=nc();
            for (;ch>='0'&&ch<='9';ch=nc())tmp/=10.0,x+=tmp*(ch-'0');
        }
        if (sign)x=-x;
    }
    inline void read(char *s){
        char ch=nc();
        for (;blank(ch);ch=nc());
        if (IOerror)return;
        for (;!blank(ch)&&!IOerror;ch=nc())*s++=ch;
        *s=0;
    }
    inline void read(char &c){
        for (c=nc();blank(c);c=nc());
        if (IOerror){c=-1;return;}
    }
    //getchar->read
    inline void read1(int &x){
        char ch;int bo=0;x=0;
        for (ch=getchar();ch<'0'||ch>'9';ch=getchar())if (ch=='-')bo=1;
        for (;ch>='0'&&ch<='9';x=x*10+ch-'0',ch=getchar());
        if (bo)x=-x;
    }
    inline void read1(ll &x){
        char ch;int bo=0;x=0;
        for (ch=getchar();ch<'0'||ch>'9';ch=getchar())if (ch=='-')bo=1;
        for (;ch>='0'&&ch<='9';x=x*10+ch-'0',ch=getchar());
        if (bo)x=-x;
    }
    inline void read1(double &x){
        char ch;int bo=0;x=0;
        for (ch=getchar();ch<'0'||ch>'9';ch=getchar())if (ch=='-')bo=1;
        for (;ch>='0'&&ch<='9';x=x*10+ch-'0',ch=getchar());
        if (ch=='.'){
            double tmp=1;
            for (ch=getchar();ch>='0'&&ch<='9';tmp/=10.0,x+=tmp*(ch-'0'),ch=getchar());
        }
        if (bo)x=-x;
    }
    inline void read1(char *s){
        char ch=getchar();
        for (;blank(ch);ch=getchar());
        for (;!blank(ch);ch=getchar())*s++=ch;
        *s=0;
    }
    inline void read1(char &c){for (c=getchar();blank(c);c=getchar());}
    //scanf->read
    inline void read2(int &x){scanf("%d",&x);}
    inline void read2(ll &x){
        #ifdef _WIN32
            scanf("%I64d",&x);
        #else
        #ifdef __linux
            scanf("%lld",&x);
        #else
            puts("error:can't recognize the system!");
        #endif
        #endif
    }
    inline void read2(double &x){scanf("%lf",&x);}
    inline void read2(char *s){scanf("%s",s);}
    inline void read2(char &c){scanf(" %c",&c);}
    inline void readln2(char *s){gets(s);}
    //fwrite->write
    struct Ostream_fwrite{
        char *buf,*p1,*pend;
        Ostream_fwrite(){buf=new char[BUF_SIZE];p1=buf;pend=buf+BUF_SIZE;}
        void out(char ch){
            if (p1==pend){
                fwrite(buf,1,BUF_SIZE,stdout);p1=buf;
            }
            *p1++=ch;
        }
        void print(int x){
            static char s[15],*s1;s1=s;
            if (!x)*s1++='0';if (x<0)out('-'),x=-x;
            while(x)*s1++=x%10+'0',x/=10;
            while(s1--!=s)out(*s1);
        }
        void println(int x){
            static char s[15],*s1;s1=s;
            if (!x)*s1++='0';if (x<0)out('-'),x=-x;
            while(x)*s1++=x%10+'0',x/=10;
            while(s1--!=s)out(*s1); out('\n');
        }
        void print(ll x){
            static char s[25],*s1;s1=s;
            if (!x)*s1++='0';if (x<0)out('-'),x=-x;
            while(x)*s1++=x%10+'0',x/=10;
            while(s1--!=s)out(*s1);
        }
        void println(ll x){
            static char s[25],*s1;s1=s;
            if (!x)*s1++='0';if (x<0)out('-'),x=-x;
            while(x)*s1++=x%10+'0',x/=10;
            while(s1--!=s)out(*s1); out('\n');
        }
        void print(double x,int y){
            static ll mul[]={1,10,100,1000,10000,100000,1000000,10000000,100000000,
                1000000000,10000000000LL,100000000000LL,1000000000000LL,10000000000000LL,
                100000000000000LL,1000000000000000LL,10000000000000000LL,100000000000000000LL};
            if (x<-1e-12)out('-'),x=-x;x*=mul[y];
            ll x1=(ll)floor(x); if (x-floor(x)>=0.5)++x1;
            ll x2=x1/mul[y],x3=x1-x2*mul[y]; print(x2);
            if (y>0){out('.'); for (size_t i=1;i<y&&x3*mul[i]<mul[y];out('0'),++i); print(x3);}
        }
        void println(double x,int y){print(x,y);out('\n');}
        void print(char *s){while (*s)out(*s++);}
        void println(char *s){while (*s)out(*s++);out('\n');}
        void flush(){if (p1!=buf){fwrite(buf,1,p1-buf,stdout);p1=buf;}}
        ~Ostream_fwrite(){flush();}
    }Ostream;
    inline void print(int x){Ostream.print(x);}
    inline void println(int x){Ostream.println(x);}
    inline void print(char x){Ostream.out(x);}
    inline void println(char x){Ostream.out(x);Ostream.out('\n');}
    inline void print(ll x){Ostream.print(x);}
    inline void println(ll x){Ostream.println(x);}
    inline void print(double x,int y){Ostream.print(x,y);}
    inline void println(double x,int y){Ostream.println(x,y);}
    inline void print(char *s){Ostream.print(s);}
    inline void println(char *s){Ostream.println(s);}
    inline void println(){Ostream.out('\n');}
    inline void flush(){Ostream.flush();}
    //puts->write
    char Out[OUT_SIZE],*o=Out;
    inline void print1(int x){
        static char buf[15];
        char *p1=buf;if (!x)*p1++='0';if (x<0)*o++='-',x=-x;
        while(x)*p1++=x%10+'0',x/=10;
        while(p1--!=buf)*o++=*p1;
    }
    inline void println1(int x){print1(x);*o++='\n';}
    inline void print1(ll x){
        static char buf[25];
        char *p1=buf;if (!x)*p1++='0';if (x<0)*o++='-',x=-x;
        while(x)*p1++=x%10+'0',x/=10;
        while(p1--!=buf)*o++=*p1;
    }
    inline void println1(ll x){print1(x);*o++='\n';}
    inline void print1(char c){*o++=c;}
    inline void println1(char c){*o++=c;*o++='\n';}
    inline void print1(char *s){while (*s)*o++=*s++;}
    inline void println1(char *s){print1(s);*o++='\n';}
    inline void println1(){*o++='\n';}
    inline void flush1(){if (o!=Out){if (*(o-1)=='\n')*--o=0;puts(Out);}}
    struct puts_write{
        ~puts_write(){flush1();}
    }_puts;
    inline void print2(int x){printf("%d",x);}
    inline void println2(int x){printf("%d\n",x);}
    inline void print2(char x){printf("%c",x);}
    inline void println2(char x){printf("%c\n",x);}
    inline void print2(ll x){
        #ifdef _WIN32
            printf("%I64d",x);
        #else
        #ifdef __linux
            printf("%lld",x);
        #else
            puts("error:can't recognize the system!");
        #endif
        #endif
    }
    inline void println2(ll x){print2(x);printf("\n");}
    inline void println2(){printf("\n");}
    #undef ll
    #undef OUT_SIZE
    #undef BUF_SIZE
};
using namespace fastIO;

mmap 讀入掛

/***************************************
做者：xehoth
連接：https://www.zhihu.com/question/49272859/answer/154084413
來源：知乎
著做權歸做者全部。商業轉載請聯繫做者得到受權，非商業轉載請註明出處。
******************************************/
#include <bits/stdc++.h>
#include <sys/mman.h>
#include <sys/stat.h>

#define private private:
#define public public:
class BufferedInputStream {
    private char *buf, *p;
    private int size;

    public inline void init() {
        register int fd = fileno(stdin);
        struct stat sb;
        fstat(fd, &sb);
        size = sb.st_size;
        buf = reinterpret_cast<char *>(mmap(0, size, PROT_READ, MAP_PRIVATE, fileno(stdin), 0));
        p = buf;
    }

    public inline char nextChar() {
        return (p == buf + size || *p == -1) ? -1 : *p++;
    }
};

運算優化

用位運算來優化常數(有 -O2 時編譯器會幫你優化)git

x * 10 => (x << 3) + (x << 1)
x != y => (x^y)
x != -1 => (~x)
x * 2 => (x << 1)
x * 2 + 1 => (x << 1 | 1)
x/2 => (x>>1)
(x + 1) % 2 =>(x^1)```
x % 2 => (x&1)
x % 2 == 0 => (~(x & 1))

尋址優化

循環展開

循環展開也許只是表面，在緩存和寄存器容許的狀況下一條語句內大量的展開運算會刺激 CPU 併發(前提是你的 CPU 不是某 CPU) ---- xehoth算法

如 BZOJ-3509，暴力+刺激併發就能拿下 rk1，如下是關鍵代碼：編程

/*********************************************************
做者：xehoth
連接：https://www.zhihu.com/question/49272859/answer/154084413
來源：知乎
著做權歸做者全部。商業轉載請聯繫做者得到受權，非商業轉載請註明出處。
***************************************************************/
while (p1 <= pr) {
    tmp += (*p1) * (*p2) + (*(p1 + 1)) * (*(p2 + 1)) + (*(p1 + 2)) *
        (*(p2 + 2)) + (*(p1 + 3)) * (*(p2 + 3)) + (*(p1 + 4)) * (*(p2 + 4))
        + (*(p1 + 5)) * (*(p2 + 5)) + (*(p1 + 6)) * (*(p2 + 6)) + (*(p1 + 7))
        * (*(p2 + 7)) + (*(p1 + 8)) * (*(p2 + 8)) + (*(p1 + 9)) * (*(p2 + 9))
        + (*(p1 + 10)) * (*(p2 + 10)) + (*(p1 + 11)) * (*(p2 + 11))
        + (*(p1 + 12)) * (*(p2 + 12)) + (*(p1 + 13)) * (*(p2 + 13))
        + (*(p1 + 14)) * (*(p2 + 14));
    p1 += 15, p2 += 15;
}

vector 存圖

數據極爲強大的最大流使用 vector 存圖，例如 1000000 個點，4000000 條邊，vector 存圖只須要 450ms 左右，而前向星須要 1800mswindows

緣由在於數據太過巨大， vector 動態的劣勢已降至極低，而大量訪問連續的內存地址顯然比前向星更優，以下：數組

/*
做者：xehoth
連接：https://www.zhihu.com/question/49272859/answer/154084413
來源：知乎
著做權歸做者全部。商業轉載請聯繫做者得到受權，非商業轉載請註明出處。
*/
for (register int i = iter[v]; i < edge[v].size(); i++) {
    Node *p = &edge[v][i];
    if (h[v] == h[p->v] + 1) {
        register int ret = sap(p->v, std::min(flow - rec, p->f), s, t, n);
        p->f -= ret, edge[p->v][p->index].f += ret, iter[v] = i;
        if ((rec += ret) == flow) return flow;
    }
}

語法優化

inline 在非遞歸函數前加修飾
循環變量 int i => register int i
手寫棧來優化遞歸
減小乘/除/取模指令
x+=add;x%=mod;=> x+=add;x>=mod?x%=mod:1;
使用三目運算符 A?B:C
memset初始化細節 memset(a,0x3f,sizeof(a));緩存

最後的 a[1] = 0x3f3f3f3f
int的極限是 0x7fffffff
還能夠 ~0u
INF有的時候不要恰好賦值到 0X7FFFFFFF ,若是有2個inf的值相加就會溢出。併發
不一樣類型的話最好在前面顯示的強轉一下(編譯器通常自動處理)
常量 >INT_MAX 的話加上 LL
後置 ++
建議平時在編譯的時候把編譯指令加上 -ansi
__attribute__ ，__fastcall (然而這玩意並不能在考試時用)

__attribute__((optimize("Ofast"))) __attribute__((__gnu_inline__, __always_inline__, __artificial__))

__attribute__((aligned))

SIMD 指令集優化矩陣乘法(一樣並無什麼用)

/*
做者：xehoth
連接：https://www.zhihu.com/question/49272859/answer/154084413
來源：知乎
著做權歸做者全部。商業轉載請聯繫做者得到受權，非商業轉載請註明出處。
*/
#include <immintrin.h>
#include <intrin.h>

#define DIFFERENT_ORDER 0
 
static inline void lincomb_SSE(const float *a, const __m128 b[4], float *out) {
    __m128 result;
    __m128 column = _mm_load_ps(a);
    result = _mm_mul_ps(_mm_shuffle_ps(column, column, 0x00), b[0]);
    result = _mm_add_ps(result, _mm_mul_ps(_mm_shuffle_ps(column, column, 0x55), b[1]));
    result = _mm_add_ps(result, _mm_mul_ps(_mm_shuffle_ps(column, column, 0xaa), b[2]));
    result = _mm_add_ps(result, _mm_mul_ps(_mm_shuffle_ps(column, column, 0xff), b[3]));
    _mm_store_ps(out, result);
}
 
void matmult_SSE(float *A, const float *B) {
    _MM_ALIGN16 float mA[16], mB[16];
#if DIFFERENT_ORDER
    float *out = mA;
    memcpy(mA, A, 16 * sizeof(float));
    memcpy(mB, B, 16 * sizeof(float));
#else
    _MM_ALIGN16 float out[16];
    memcpy(mB, A, 16 * sizeof(float));
    memcpy(mA, B, 16 * sizeof(float));
#endif
    __m128 Bcolumns[] = { 
        _mm_load_ps(mB + 0),
        _mm_load_ps(mB + 4),
        _mm_load_ps(mB + 8),
        _mm_load_ps(mB + 12)
    };
    lincomb_SSE(mA + 0,  Bcolumns, out + 0);
    lincomb_SSE(mA + 4,  Bcolumns, out + 4);
    lincomb_SSE(mA + 8,  Bcolumns, out + 8);
    lincomb_SSE(mA + 12, Bcolumns, out + 12);
    memcpy(A, out, 16 * sizeof(float));

一些注意事項

OI考場不容許使用bits/stdc++.h庫，而且使用該庫變量名可能不能使用next (C++庫裏面有個template是next會CE)
請盡力少用黑語法。
避免link作變量名（還有個什麼變量名Linux也會CE我忽然記不到了..有時其實也能夠用「中國式的變量名命名法」這樣不會CE。不推薦這種詭異的風格），Linux環境可能會CE。
少用math.h|cmath庫。(由於_x,_y,y1,y2,x1,x2,x0,y0,這類命名有時會CE。)
OI考場嚴禁使用帶下劃線的庫函數。eg. __gcd()
編程時利用宏能夠減小代碼量，可是請務必在每一個變量里加括號。
#define rep(i,s,t) for(int i=(s);i<=(t);i++)
循環變量for(int i;…;…;)請不要放到全局上。這種常數不會卡。相反會帶來不少隱式的錯誤
若是你不精通指針請少用它。指針的代碼很難查錯。競賽裏面請避免使用函數指針，多級指針，指針數組這樣的語法。
若是能夠靜態實現，請先考慮靜態版本的代碼。而不是寫動態。(malloc() new)
引用不等於指針。這個語法我已經不想解釋了。去買本語法書細讀。
OI 考試少用C++的OOP特性，可使用STL ,template, class, namespace 但不推薦使用。
熟悉STL裏面的 string ,queue, stack ,vector, set ,map

後面這些用的少,僅供參考而且在pascal選手消失前應該是不會考的前面這些只是方便才用，
請注意常數！推薦本身實現。 deque multiset multimap bitset
memset() 底層是用匯編實現的效率要比直接的快4倍，不是全部的庫函數都是c\c++實現的。
多維數組請把大的放前面 (開了 -O2 後差異不明顯)
大量調用 memcpy 還不如直接循環
C++ 和 c 的 IO 混用時請謹慎
宏指令少用( #progma 等)
未測試請不要內嵌彙編

代碼風格

方便 debug

上下括號請對齊
請保持縮進
變量名函數名推薦按照駝峯命名法
取有意義的函數,變量名
約定俗成的命名
插入適當的空格
不要在一行作過多的事
屢次調用，寫成函數

上下括號請對齊 AND 保持縮進

#include<cstdio>
using namespace std;

int main()
{
    //do sth..
    return 0;
}

變量名函數名推薦按照駝峯命名法 checkOfInput()

函數名，變量名最好不要用沒有意義的名字。

好比，你要檢查素數，函數名更好是 checkPrime() or isPrime() 這類的，而不是solve()
f() 固然也能夠直接check() 可是當你有多個函數的時候爲了避免讓本身混淆請使用最前面的方法。
好比，你要寫動態規劃，狀態數組最好開成dp[][]...

這是你們約定俗成的。這樣方便你們互相閱讀。也方便別人幫你查錯。

dfs() //deep-first-search
bfs() //bread-first-search
maxflow()，dinic() 等//最大流
isprime()，getprime()//檢查素數，篩素數
getdis()//計算歐幾里德距離，曼哈頓距離
query()//查詢操做
queryMax()/querySum()
update()//更新操做
tarjan()//有多種tarjan..找強聯通份量/雙聯通份量/LCA的tarjan算法。
LCA（）、RMQ（）//字面意思..
check()//通常是二分的check()函數
solve()//字面意思..
match()//二分圖匹配..
gethash()//字面意思..
getid()//字面意思..
getrank()//字面意思..
sort()//字面意思..
pre()//預處理

dp[][] 通常是dp狀態定義 或者f[][]/g[][]
dfn[] dfs序 que[]/q[]/sta[]/s[] 手寫棧/隊列 head,tail維護首尾。
//邊
通常意義下: M->邊 N->點 Q->操做數
struct Edge{ int to,next,w;}e[M]
struct Edge{ int u,v,w;}e[M]
#define maxn ..
#define N ..
#define M ...
#define mod ...
#define max3(a,b,c) max(a,max(b,c))
#define isdigit(x) (x>='0'&&x<='9')
#define lson u<<1
#define rson u<<1|1
...

代碼中插入適當的空格

for(int i = 1; i <= n; i++)
x = (a + b) / 2
ans = sqrt((x1 - x2) * (x1 - x2) + (y1 - y2) * (y1 - y2))

視我的習慣,有的地方空格能夠略去

for(int i=1;i<=n;i++)
for(int i = 1;i <= n; i++)
for(int i = 1; i < = n ; i++)
for(int i = 1; i <= n ; i++)
for(int i=1;i <= n;i++)

#define rep(i,s,t) for(int i=(s);i<=(t);i++)
rep(i,1,n)

請不要在一行作過多的事

for(int i = 1; i <= n ; i++)scanf("%d",&a[i]),a[i]<0?a[i]=-a[i]:1;

/************************/
for(int i = 1; i <= n ; i++)
    scanf("%d",&a[i]),a[i]<0?a[i]=-a[i]:1;

for(int i = 1; i <= n ; i++){
    scanf("%d",&a[i]);
    a[i]<0?a[i]=-a[i]:1;
}
for(int i = 1; i <= n ; i++)
{
    scanf("%d",&a[i]);
    if(a[i]<0)a[i] = -a[i];
}

若是一樣的計算要出現3次以上請寫成函數 getdis(),abs()

代碼壓行

比賽時間短，浪費在代碼風格上無心義

for(int i=1;i<=n;i++)
{
    //do sth..
}

========================
for(int i=1;i<=n;i++{
    //do sth..
}
========================
#define rep(i,s,t) for(int i=(s);i<=(t);i++)
rep(i,1,n){
    //do sth..
}
=========================
#define rep(i,t) for(int i=1;i<=(t);i++)
rep(i,t){/*do sth..*/}

===========================
===========================
for(int i=head[u];~i;i=e[i].next){
    //do sth..
}

==============================
#define each(x) for(int i=head[x];~i;i=e[i].next)
each(u){ /*do sth ..*/}

================================
================================
int gcd(int a,int b)
{
    if(!b)return a;
    else return gcd(b,a%b);
}

================================
int gcd(int a,int b){return !b?a:gcd(b,a%b);}
================================
int gcd(int a,int b)
{
    int t;
    while(b!=0)
    {
        t = a;
        a = b;
        b = t%b;
    }
}
===============================
int gcd(int a,int b){for(int t;b!=0;t=a,a=b,b=t%b);}
===============================

附註：