因爲巨佬 shadowice1984 卡時限,本代碼已經 T 請不要粘上去交
退役以後再寫一個常數小的多項式取模吧c++
一句話題意:NP問題,求N!%Pspa
吐槽:出題人太毒瘤...必須寫任意模數NTT,並且加法取模還溢出...code
我常數太大,粘的很久之前寫的多項式取模,卡了卡常才A,你們1e3 1e4不要寫vector,不要參考下面的代碼get
orz shadowice1984 寫 $O(\sqrt n\log n)$ 吊打個人 $O(\sqrt n\log^2 n)$it
如下是 $O(\sqrt n\log^2 n)$ 的題解模板
前置芝士: 多項式多點求值、多項式取模、多項式求逆
出門左轉你谷模板區,包教不包會class
前置芝士: 任意模數NTT
出門左轉你谷模板區,包教不包會程序
本題題解
首先咱們發現p是2^31-1的im
你能夠考慮像分段打表那樣根號分塊,把1~p分紅 $O(\sqrt p)$ 份di
而後你求出每一份的值來,最後邊角暴力就好了
那麼怎麼求呢
你會發現第一塊是 $(12...s)$, 第二塊是 $((s+1)(s+2)...(s+s))$
第i塊就是 $(s(i-1)+1)(s(i-1)+2)(s(i-1)+3)*(s(i-1)+s)$
咱們發現這是一個關於i的多項式,能夠用分治+NTT在 $O(\sqrt p \log^2p)$的時間內求出這個多項式
而後你要求出第i=1...s的每個數的值,也就是每一塊數的積,你會發現是一個多項式多點求值,複雜度也是$O(\sqrt p\log ^2p)$
直接去隔壁模板區把多項式多點求值板子粘過來就好了
因爲出題人故意卡模數,須要把FFT換成任意模數NTT...
而後你就在線A題了...
代碼太醜,用vector xjb寫的
#include <bits/stdc++.h> using namespace std; #define int long long int n, p, s; const int sb = 32768, sb2 = 1073741824; const double pi = acos(-1); int qpow(int x, int y) { int res = 1; for (x %= p; y > 0; y >>= 1, x = x * (long long)x % p) if (y & 1) res = res * (long long)x % p; return res; } struct Complex { double real, imag; Complex(double r = 0, double i = 0) : real(r), imag(i) { } }; Complex a1[600000], a2[600000], b1[600000], b2[600000], a1b1[600000], ab[600000], a2b2[600000]; Complex operator+(const Complex &a, const Complex &b) { return Complex(a.real + b.real, a.imag + b.imag); } Complex operator-(const Complex &a, const Complex &b) { return Complex(a.real - b.real, a.imag - b.imag); } Complex operator*(const Complex &a, const Complex &b) { return Complex(a.real * b.real - a.imag * b.imag, a.real * b.imag + b.real * a.imag); } Complex *w[22]; Complex getw(int x, int y, int falg) { return Complex(w[x][y].real, falg * w[x][y].imag); } int *r[22]; void fftinit() { for (int i = 0; i < 19; i++) { w[i] = new Complex[1 << i], r[i] = new int[1 << i]; for (int j = 0; j < (1 << i); j++) w[i][j] = Complex(cos(pi * j / (1 << i)), sin(pi * j / (1 << i))); r[i][0] = 0; for (int j = 1; j < (1 << i); j++) r[i][j] = (r[i][j >> 1] >> 1) | ((j & 1) * (1 << (i - 1))); } } void fft(Complex *a, int len, int loglen, int falg) { Complex w, t; for (int i = 0; i < len; i++) if (r[loglen][i] < i) swap(a[i], a[r[loglen][i]]); for (int i = 1, logi = 0; i < len; logi++, i <<= 1) for (int j = 0; j < len; j += i << 1) for (int k = 0; k < i; k++) w = getw(logi, k, falg), t = a[j + k + i] * w, a[j + k + i] = a[j + k] - t, a[j + k] = a[j + k] + t; if (falg == -1) for (int i = 0; i < len; i++) a[i].real /= len, a[i].imag /= len; } int toint(Complex x) { return (((long long)(round(x.real) + 0.5)) % p + p) % p; } vector<int> operator*(vector<int> a, vector<int> b) { int len = 1, loglen = 0; int sz = a.size() + b.size() - 1; while (len < sz) len <<= 1, loglen++; a.resize(len), b.resize(len); vector<int> res; for (int i = 0; i < len; i++) a1[i] = a[i] / sb, a2[i] = a[i] % sb, b1[i] = b[i] / sb, b2[i] = b[i] % sb; fft(a1, len, loglen, 1), fft(a2, len, loglen, 1), fft(b1, len, loglen, 1), fft(b2, len, loglen, 1); for (int i = 0; i < len; i++) a1b1[i] = a1[i] * b1[i], ab[i] = a1[i] * b2[i] + a2[i] * b1[i], a2b2[i] = a2[i] * b2[i]; fft(a1b1, len, loglen, -1), fft(ab, len, loglen, -1), fft(a2b2, len, loglen, -1); for (int i = 0; i < len; i++) res.push_back(((toint(a1b1[i]) * (long long)sb2 % p + toint(ab[i]) * (long long)sb % p) % p + toint(a2b2[i])) % p); res.resize(sz); return res; } vector<int> operator+(vector<int> a, vector<int> b) { vector<int> res; res.resize(max(a.size(), b.size())); a.resize(res.size()); b.resize(res.size()); for (int i = 0; i < (int)res.size(); i++) res[i] = (a[i] + b[i]) % p; return res; } vector<int> operator-(vector<int> a, vector<int> b) { vector<int> res; res.resize(max(a.size(), b.size())); a.resize(res.size()); b.resize(res.size()); for (int i = 0; i < (int)res.size(); i++) res[i] = ((a[i] - b[i]) % p + p) % p; return res; } vector<int> poly_inv(vector<int> a) { if (a.size() == 1) { a[0] = qpow(a[0], p - 2); return a; } int n = a.size(), newsz = (n + 1) >> 1; vector<int> b(a); b.resize(newsz); b = poly_inv(b); vector<int> c(a * b); for (int &i: c) i = (p - i) % p; c[0] = (c[0] + 2) % p; a = c * b; a.resize(n); return a; } // vector<int> poly_r(vector<int> a) { reverse(a.begin(), a.end()); return a; } void div(vector<int> f, vector<int> g, vector<int> &q, vector<int> &r) { int n = f.size() - 1, m = g.size() - 1; vector<int> gr = g; reverse(gr.begin(), gr.end()); gr.resize(n - m + 1); q = f; reverse(q.begin(), q.end()); q = q * poly_inv(gr); q.resize(n - m + 1); reverse(q.begin(), q.end()); r = f - g * q; r.resize(m); // vector<int> gq = g * q; // r.resize(m); // gq.resize(m); // f.resize(m); // for (int i = 0; i < m; i++) // r[i] = ((f[i] - gq[i]) % p + p) % p; } vector<int> zz[200010]; int res[100010]; vector<int> fuck(int l, int r) { if (l == r) { vector<int> res; res.push_back(l), res.push_back(s); return res; } int mid = (l + r) / 2; return fuck(l, mid) * fuck(mid + 1, r); } void prework(int x, int cl, int cr) { if (cl == cr) { zz[x].push_back((p - cl) % p), zz[x].push_back(1); return; } int mid = (cl + cr) / 2; prework(x * 2, cl, mid), prework(x * 2 + 1, mid + 1, cr); zz[x] = zz[x * 2] * zz[x * 2 + 1]; } void work(int x, int cl, int cr, vector<int> poly) { if (cr - cl <= 400) { int sb = poly.size(); for (int t = cl; t <= cr; t++) { int tmp = 1; for (int i = 0; i < sb; i++) res[t] = (res[t] + tmp * (long long)poly[i] % p) % p, tmp = tmp * (long long)t % p; } return; } vector<int> tmp, rel, rer; div(poly, zz[x * 2], tmp, rel); div(poly, zz[x * 2 + 1], tmp, rer); int mid = (cl + cr) / 2; work(x * 2, cl, mid, rel), work(x * 2 + 1, mid + 1, cr, rer); } signed main() { fftinit(); scanf("%lld%lld", &n, &p); // n = 998244353, p = 2147483647; s = sqrt(p) + 1; vector<int> poly = fuck(1, s); prework(1, 0, s); // printf("prework ok\n"); work(1, 0, s, poly); // printf("work ok\n"); int ans = 1; for (int i = n / s * s + 1; i <= n; i++) ans = ans * (long long)i % p; for (int i = 0; i < n / s; i++) ans = ans * (long long)res[i] % p; printf("%lld\n", ans); return 0; //拜拜程序~ }