來源:html
編譯器沒有開 -O2 優化,考慮從新實現庫函數linux
isdigit()
max()
/ min()
unique()
/ lower_bound()
/ upper_bound()
scanf()
/printf
cin
/ cout
queue
stack
priority_queue
/heap
deque
src:http://www.cnblogs.com/qscqesze/p/5736748.htmlc++
const int BUF=40000000; char Buf[BUF],*buf=Buf; const int OUT=20000000; char Out[OUT],*ou=Out;int Outn[30],Outcnt; inline void write(int x){ if(!x)*ou++=48; else{ for(Outcnt=0;x;x/=10)Outn[++Outcnt]=x%10+48; while(Outcnt)*ou++=Outn[Outcnt--]; } } inline void writell(ll x){ if(!x)*ou++=48; else{ for(Outcnt=0;x;x/=10)Outn[++Outcnt]=x%10+48; while(Outcnt)*ou++=Outn[Outcnt--]; } } inline void writechar(char x){*ou++=x;} inline void writeln(){*ou++='\n';} inline void read(int&a){for(a=0;*buf<48;buf++);while(*buf>47)a=a*10+*buf++-48;} //這句話放到代碼中 fread(Buf,1,BUF,stdin); //樣例 #include<bits/stdc++.h> using namespace std; typedef long long ll; const int BUF=40000000; char Buf[BUF],*buf=Buf; const int OUT=20000000; char Out[OUT],*ou=Out;int Outn[30],Outcnt; inline void write(int x){ if(!x)*ou++=48; else{ for(Outcnt=0;x;x/=10)Outn[++Outcnt]=x%10+48; while(Outcnt)*ou++=Outn[Outcnt--]; } } inline void writell(ll x){ if(!x)*ou++=48; else{ for(Outcnt=0;x;x/=10)Outn[++Outcnt]=x%10+48; while(Outcnt)*ou++=Outn[Outcnt--]; } } inline void writechar(char x){*ou++=x;} inline void writeln(){*ou++='\n';} inline void read(int&a){for(a=0;*buf<48;buf++);while(*buf>47)a=a*10+*buf++-48;} int main(){ fread(Buf,1,BUF,stdin); int a,b; read(a),read(b); write(a+b); writeln(); fwrite(Out,1,ou-Out,stdout); }
inline ll read() { int x=0,f=1;char ch=getchar(); while(ch<'0'||ch>'9'){if(ch=='-')f=-1;ch=getchar();} while(ch>='0'&&ch<='9'){x=x*10+ch-'0';ch=getchar();} return x*f; }
namespace fastIO{ #define BUF_SIZE 100000 #define OUT_SIZE 100000 #define ll long long //fread->read bool IOerror=0; inline char nc(){ static char buf[BUF_SIZE],*p1=buf+BUF_SIZE,*pend=buf+BUF_SIZE; if (p1==pend){ p1=buf; pend=buf+fread(buf,1,BUF_SIZE,stdin); if (pend==p1){IOerror=1;return -1;} //{printf("IO error!\n");system("pause");for (;;);exit(0);} } return *p1++; } inline bool blank(char ch){return ch==' '||ch=='\n'||ch=='\r'||ch=='\t';} inline void read(int &x){ bool sign=0; char ch=nc(); x=0; for (;blank(ch);ch=nc()); if (IOerror)return; if (ch=='-')sign=1,ch=nc(); for (;ch>='0'&&ch<='9';ch=nc())x=x*10+ch-'0'; if (sign)x=-x; } inline void read(ll &x){ bool sign=0; char ch=nc(); x=0; for (;blank(ch);ch=nc()); if (IOerror)return; if (ch=='-')sign=1,ch=nc(); for (;ch>='0'&&ch<='9';ch=nc())x=x*10+ch-'0'; if (sign)x=-x; } inline void read(double &x){ bool sign=0; char ch=nc(); x=0; for (;blank(ch);ch=nc()); if (IOerror)return; if (ch=='-')sign=1,ch=nc(); for (;ch>='0'&&ch<='9';ch=nc())x=x*10+ch-'0'; if (ch=='.'){ double tmp=1; ch=nc(); for (;ch>='0'&&ch<='9';ch=nc())tmp/=10.0,x+=tmp*(ch-'0'); } if (sign)x=-x; } inline void read(char *s){ char ch=nc(); for (;blank(ch);ch=nc()); if (IOerror)return; for (;!blank(ch)&&!IOerror;ch=nc())*s++=ch; *s=0; } inline void read(char &c){ for (c=nc();blank(c);c=nc()); if (IOerror){c=-1;return;} } //getchar->read inline void read1(int &x){ char ch;int bo=0;x=0; for (ch=getchar();ch<'0'||ch>'9';ch=getchar())if (ch=='-')bo=1; for (;ch>='0'&&ch<='9';x=x*10+ch-'0',ch=getchar()); if (bo)x=-x; } inline void read1(ll &x){ char ch;int bo=0;x=0; for (ch=getchar();ch<'0'||ch>'9';ch=getchar())if (ch=='-')bo=1; for (;ch>='0'&&ch<='9';x=x*10+ch-'0',ch=getchar()); if (bo)x=-x; } inline void read1(double &x){ char ch;int bo=0;x=0; for (ch=getchar();ch<'0'||ch>'9';ch=getchar())if (ch=='-')bo=1; for (;ch>='0'&&ch<='9';x=x*10+ch-'0',ch=getchar()); if (ch=='.'){ double tmp=1; for (ch=getchar();ch>='0'&&ch<='9';tmp/=10.0,x+=tmp*(ch-'0'),ch=getchar()); } if (bo)x=-x; } inline void read1(char *s){ char ch=getchar(); for (;blank(ch);ch=getchar()); for (;!blank(ch);ch=getchar())*s++=ch; *s=0; } inline void read1(char &c){for (c=getchar();blank(c);c=getchar());} //scanf->read inline void read2(int &x){scanf("%d",&x);} inline void read2(ll &x){ #ifdef _WIN32 scanf("%I64d",&x); #else #ifdef __linux scanf("%lld",&x); #else puts("error:can't recognize the system!"); #endif #endif } inline void read2(double &x){scanf("%lf",&x);} inline void read2(char *s){scanf("%s",s);} inline void read2(char &c){scanf(" %c",&c);} inline void readln2(char *s){gets(s);} //fwrite->write struct Ostream_fwrite{ char *buf,*p1,*pend; Ostream_fwrite(){buf=new char[BUF_SIZE];p1=buf;pend=buf+BUF_SIZE;} void out(char ch){ if (p1==pend){ fwrite(buf,1,BUF_SIZE,stdout);p1=buf; } *p1++=ch; } void print(int x){ static char s[15],*s1;s1=s; if (!x)*s1++='0';if (x<0)out('-'),x=-x; while(x)*s1++=x%10+'0',x/=10; while(s1--!=s)out(*s1); } void println(int x){ static char s[15],*s1;s1=s; if (!x)*s1++='0';if (x<0)out('-'),x=-x; while(x)*s1++=x%10+'0',x/=10; while(s1--!=s)out(*s1); out('\n'); } void print(ll x){ static char s[25],*s1;s1=s; if (!x)*s1++='0';if (x<0)out('-'),x=-x; while(x)*s1++=x%10+'0',x/=10; while(s1--!=s)out(*s1); } void println(ll x){ static char s[25],*s1;s1=s; if (!x)*s1++='0';if (x<0)out('-'),x=-x; while(x)*s1++=x%10+'0',x/=10; while(s1--!=s)out(*s1); out('\n'); } void print(double x,int y){ static ll mul[]={1,10,100,1000,10000,100000,1000000,10000000,100000000, 1000000000,10000000000LL,100000000000LL,1000000000000LL,10000000000000LL, 100000000000000LL,1000000000000000LL,10000000000000000LL,100000000000000000LL}; if (x<-1e-12)out('-'),x=-x;x*=mul[y]; ll x1=(ll)floor(x); if (x-floor(x)>=0.5)++x1; ll x2=x1/mul[y],x3=x1-x2*mul[y]; print(x2); if (y>0){out('.'); for (size_t i=1;i<y&&x3*mul[i]<mul[y];out('0'),++i); print(x3);} } void println(double x,int y){print(x,y);out('\n');} void print(char *s){while (*s)out(*s++);} void println(char *s){while (*s)out(*s++);out('\n');} void flush(){if (p1!=buf){fwrite(buf,1,p1-buf,stdout);p1=buf;}} ~Ostream_fwrite(){flush();} }Ostream; inline void print(int x){Ostream.print(x);} inline void println(int x){Ostream.println(x);} inline void print(char x){Ostream.out(x);} inline void println(char x){Ostream.out(x);Ostream.out('\n');} inline void print(ll x){Ostream.print(x);} inline void println(ll x){Ostream.println(x);} inline void print(double x,int y){Ostream.print(x,y);} inline void println(double x,int y){Ostream.println(x,y);} inline void print(char *s){Ostream.print(s);} inline void println(char *s){Ostream.println(s);} inline void println(){Ostream.out('\n');} inline void flush(){Ostream.flush();} //puts->write char Out[OUT_SIZE],*o=Out; inline void print1(int x){ static char buf[15]; char *p1=buf;if (!x)*p1++='0';if (x<0)*o++='-',x=-x; while(x)*p1++=x%10+'0',x/=10; while(p1--!=buf)*o++=*p1; } inline void println1(int x){print1(x);*o++='\n';} inline void print1(ll x){ static char buf[25]; char *p1=buf;if (!x)*p1++='0';if (x<0)*o++='-',x=-x; while(x)*p1++=x%10+'0',x/=10; while(p1--!=buf)*o++=*p1; } inline void println1(ll x){print1(x);*o++='\n';} inline void print1(char c){*o++=c;} inline void println1(char c){*o++=c;*o++='\n';} inline void print1(char *s){while (*s)*o++=*s++;} inline void println1(char *s){print1(s);*o++='\n';} inline void println1(){*o++='\n';} inline void flush1(){if (o!=Out){if (*(o-1)=='\n')*--o=0;puts(Out);}} struct puts_write{ ~puts_write(){flush1();} }_puts; inline void print2(int x){printf("%d",x);} inline void println2(int x){printf("%d\n",x);} inline void print2(char x){printf("%c",x);} inline void println2(char x){printf("%c\n",x);} inline void print2(ll x){ #ifdef _WIN32 printf("%I64d",x); #else #ifdef __linux printf("%lld",x); #else puts("error:can't recognize the system!"); #endif #endif } inline void println2(ll x){print2(x);printf("\n");} inline void println2(){printf("\n");} #undef ll #undef OUT_SIZE #undef BUF_SIZE }; using namespace fastIO;
/*************************************** 做者:xehoth 連接:https://www.zhihu.com/question/49272859/answer/154084413 來源:知乎 著做權歸做者全部。商業轉載請聯繫做者得到受權,非商業轉載請註明出處。 ******************************************/ #include <bits/stdc++.h> #include <sys/mman.h> #include <sys/stat.h> #define private private: #define public public: class BufferedInputStream { private char *buf, *p; private int size; public inline void init() { register int fd = fileno(stdin); struct stat sb; fstat(fd, &sb); size = sb.st_size; buf = reinterpret_cast<char *>(mmap(0, size, PROT_READ, MAP_PRIVATE, fileno(stdin), 0)); p = buf; } public inline char nextChar() { return (p == buf + size || *p == -1) ? -1 : *p++; } };
用位運算來優化常數(有 -O2 時編譯器會幫你優化)git
x * 10
=> (x << 3) + (x << 1)
x != y
=> (x^y)
x != -1
=> (~x)
x * 2
=> (x << 1)
x * 2 + 1
=> (x << 1 | 1)
x/2
=> (x>>1)
(x + 1) % 2 =>
(x^1)```x % 2
=> (x&1)
x % 2 == 0
=> (~(x & 1))
循環展開也許只是表面,在緩存和寄存器容許的狀況下一條語句內大量的展開運算會刺激 CPU 併發(前提是你的 CPU 不是某 CPU) ---- xehoth算法
如 BZOJ-3509,暴力+刺激併發就能拿下 rk1,如下是關鍵代碼:編程
/********************************************************* 做者:xehoth 連接:https://www.zhihu.com/question/49272859/answer/154084413 來源:知乎 著做權歸做者全部。商業轉載請聯繫做者得到受權,非商業轉載請註明出處。 ***************************************************************/ while (p1 <= pr) { tmp += (*p1) * (*p2) + (*(p1 + 1)) * (*(p2 + 1)) + (*(p1 + 2)) * (*(p2 + 2)) + (*(p1 + 3)) * (*(p2 + 3)) + (*(p1 + 4)) * (*(p2 + 4)) + (*(p1 + 5)) * (*(p2 + 5)) + (*(p1 + 6)) * (*(p2 + 6)) + (*(p1 + 7)) * (*(p2 + 7)) + (*(p1 + 8)) * (*(p2 + 8)) + (*(p1 + 9)) * (*(p2 + 9)) + (*(p1 + 10)) * (*(p2 + 10)) + (*(p1 + 11)) * (*(p2 + 11)) + (*(p1 + 12)) * (*(p2 + 12)) + (*(p1 + 13)) * (*(p2 + 13)) + (*(p1 + 14)) * (*(p2 + 14)); p1 += 15, p2 += 15; }
數據極爲強大的最大流使用 vector 存圖,例如 1000000 個點,4000000 條邊,vector 存圖只須要 450ms 左右,而前向星須要 1800mswindows
緣由在於數據太過巨大, vector 動態的劣勢已降至極低,而大量訪問連續的內存地址顯然比前向星更優,以下:數組
/* 做者:xehoth 連接:https://www.zhihu.com/question/49272859/answer/154084413 來源:知乎 著做權歸做者全部。商業轉載請聯繫做者得到受權,非商業轉載請註明出處。 */ for (register int i = iter[v]; i < edge[v].size(); i++) { Node *p = &edge[v][i]; if (h[v] == h[p->v] + 1) { register int ret = sap(p->v, std::min(flow - rec, p->f), s, t, n); p->f -= ret, edge[p->v][p->index].f += ret, iter[v] = i; if ((rec += ret) == flow) return flow; } }
inline
在非遞歸函數前加修飾int i
=> register int i
x+=add;x%=mod;
=> x+=add;x>=mod?x%=mod:1;
A?B:C
memset
初始化細節 memset(a,0x3f,sizeof(a));
緩存
最後的
a[1] = 0x3f3f3f3f
int的極限是0x7fffffff
還能夠~0u
INF有的時候不要恰好賦值到0X7FFFFFFF
,若是有2個inf的值相加就會溢出。併發
LL
++
建議平時在編譯的時候把編譯指令加上 -ansi
__attribute__
,__fastcall
(然而這玩意並不能在考試時用)
__attribute__((optimize("Ofast"))) __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __attribute__((aligned))
/* 做者:xehoth 連接:https://www.zhihu.com/question/49272859/answer/154084413 來源:知乎 著做權歸做者全部。商業轉載請聯繫做者得到受權,非商業轉載請註明出處。 */ #include <immintrin.h> #include <intrin.h> #define DIFFERENT_ORDER 0 static inline void lincomb_SSE(const float *a, const __m128 b[4], float *out) { __m128 result; __m128 column = _mm_load_ps(a); result = _mm_mul_ps(_mm_shuffle_ps(column, column, 0x00), b[0]); result = _mm_add_ps(result, _mm_mul_ps(_mm_shuffle_ps(column, column, 0x55), b[1])); result = _mm_add_ps(result, _mm_mul_ps(_mm_shuffle_ps(column, column, 0xaa), b[2])); result = _mm_add_ps(result, _mm_mul_ps(_mm_shuffle_ps(column, column, 0xff), b[3])); _mm_store_ps(out, result); } void matmult_SSE(float *A, const float *B) { _MM_ALIGN16 float mA[16], mB[16]; #if DIFFERENT_ORDER float *out = mA; memcpy(mA, A, 16 * sizeof(float)); memcpy(mB, B, 16 * sizeof(float)); #else _MM_ALIGN16 float out[16]; memcpy(mB, A, 16 * sizeof(float)); memcpy(mA, B, 16 * sizeof(float)); #endif __m128 Bcolumns[] = { _mm_load_ps(mB + 0), _mm_load_ps(mB + 4), _mm_load_ps(mB + 8), _mm_load_ps(mB + 12) }; lincomb_SSE(mA + 0, Bcolumns, out + 0); lincomb_SSE(mA + 4, Bcolumns, out + 4); lincomb_SSE(mA + 8, Bcolumns, out + 8); lincomb_SSE(mA + 12, Bcolumns, out + 12); memcpy(A, out, 16 * sizeof(float));
bits/stdc++.h
庫,而且使用該庫變量名可能不能使用next
(C++庫裏面有個template
是next
會CE)math.h
|cmath
庫。(由於_x,_y,y1,y2,x1,x2,x0,y0,這類命名有時會CE。)#define rep(i,s,t) for(int i=(s);i<=(t);i++)
for(int i;…;…;)
請不要放到全局上。這種常數不會卡。相反會帶來不少隱式的錯誤malloc()
new
)STL
,template
, class
, namespace
但不推薦使用。熟悉STL
裏面的 string
,queue
, stack
,vector
, set
,map
後面這些用的少,僅供參考而且在pascal選手消失前應該是不會考的前面這些只是方便才用,
請注意常數!推薦本身實現。deque
multiset
multimap
bitset
memset()
底層是用匯編實現的效率要比直接的快4倍,不是全部的庫函數都是c\c++實現的。#progma
等)未測試請不要內嵌彙編
方便 debug
上下括號請對齊 AND 保持縮進
#include<cstdio> using namespace std; int main() { //do sth.. return 0; }
變量名函數名推薦按照駝峯命名法 checkOfInput()
函數名,變量名最好不要用沒有意義的名字。
好比,你要檢查素數,函數名更好是
checkPrime()
orisPrime()
這類的,而不是solve()
f()
固然也能夠直接check()
可是當你有多個函數的時候爲了避免讓本身混淆請使用最前面的方法。
好比,你要寫動態規劃,狀態數組最好開成dp[][]...
這是你們約定俗成的。這樣方便你們互相閱讀。也方便別人幫你查錯。
dfs() //deep-first-search bfs() //bread-first-search maxflow(),dinic() 等//最大流 isprime(),getprime()//檢查素數,篩素數 getdis()//計算歐幾里德距離,曼哈頓距離 query()//查詢操做 queryMax()/querySum() update()//更新操做 tarjan()//有多種tarjan..找強聯通份量/雙聯通份量/LCA的tarjan算法。 LCA()、RMQ()//字面意思.. check()//通常是二分的check()函數 solve()//字面意思.. match()//二分圖匹配.. gethash()//字面意思.. getid()//字面意思.. getrank()//字面意思.. sort()//字面意思.. pre()//預處理 dp[][] 通常是dp狀態定義 或者f[][]/g[][] dfn[] dfs序 que[]/q[]/sta[]/s[] 手寫棧/隊列 head,tail維護首尾。 //邊 通常意義下: M->邊 N->點 Q->操做數 struct Edge{ int to,next,w;}e[M] struct Edge{ int u,v,w;}e[M] #define maxn .. #define N .. #define M ... #define mod ... #define max3(a,b,c) max(a,max(b,c)) #define isdigit(x) (x>='0'&&x<='9') #define lson u<<1 #define rson u<<1|1 ...
代碼中插入適當的空格
for(int i = 1; i <= n; i++) x = (a + b) / 2 ans = sqrt((x1 - x2) * (x1 - x2) + (y1 - y2) * (y1 - y2))
視我的習慣,有的地方空格能夠略去
for(int i=1;i<=n;i++) for(int i = 1;i <= n; i++) for(int i = 1; i < = n ; i++) for(int i = 1; i <= n ; i++) for(int i=1;i <= n;i++) #define rep(i,s,t) for(int i=(s);i<=(t);i++) rep(i,1,n)
請不要在一行作過多的事
for(int i = 1; i <= n ; i++)scanf("%d",&a[i]),a[i]<0?a[i]=-a[i]:1; /************************/ for(int i = 1; i <= n ; i++) scanf("%d",&a[i]),a[i]<0?a[i]=-a[i]:1; for(int i = 1; i <= n ; i++){ scanf("%d",&a[i]); a[i]<0?a[i]=-a[i]:1; } for(int i = 1; i <= n ; i++) { scanf("%d",&a[i]); if(a[i]<0)a[i] = -a[i]; }
若是一樣的計算要出現3次以上請寫成函數 getdis()
,abs()
比賽時間短,浪費在代碼風格上無心義
for(int i=1;i<=n;i++) { //do sth.. } ======================== for(int i=1;i<=n;i++{ //do sth.. } ======================== #define rep(i,s,t) for(int i=(s);i<=(t);i++) rep(i,1,n){ //do sth.. } ========================= #define rep(i,t) for(int i=1;i<=(t);i++) rep(i,t){/*do sth..*/} =========================== =========================== for(int i=head[u];~i;i=e[i].next){ //do sth.. } ============================== #define each(x) for(int i=head[x];~i;i=e[i].next) each(u){ /*do sth ..*/} ================================ ================================ int gcd(int a,int b) { if(!b)return a; else return gcd(b,a%b); } ================================ int gcd(int a,int b){return !b?a:gcd(b,a%b);} ================================ int gcd(int a,int b) { int t; while(b!=0) { t = a; a = b; b = t%b; } } =============================== int gcd(int a,int b){for(int t;b!=0;t=a,a=b,b=t%b);} ===============================
附註:
#progma
手動擴棧在 ACM 中通常是容許的(模板中時常有擴棧)Callgrind
gprof
(或者 windows 上能夠用 CodeXL
),對症下藥