POJ2278 DNA Sequence —— AC自動機 + 矩陣優化

題目連接:https://vjudge.net/problem/POJ-2778node

 

DNA Sequence
Time Limit: 1000MS   Memory Limit: 65536K
Total Submissions: 18479   Accepted: 7112

Descriptionios

It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments. 

Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n. 

Inputide

First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences. 

Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10. 

Outputui

An integer, the number of DNA sequences, mod 100000.

Sample Inputspa

4 3
AT
AC
AG
AA

Sample Output.net

36

Sourcecode

 

 

題意:blog

給出m個DNA序列,問長度爲n且不含這m個序列的DNA有多少個?ip

 

題解:ci

1.把這m個序列插入到AC自動機中。

2.根據自動機中各個狀態之間的關係,構成一張鄰接矩陣A,但須要去除與「結束點」有關的邊,這樣就能保證不含有給出的序列。

3.長度爲n,那麼答案就是 A^n 中,初始狀態那一行之和。

 

代碼以下:

 1 #include <iostream>
 2 #include <cstdio>
 3 #include <cstring>
 4 #include <algorithm>
 5 #include <vector>
 6 #include <cmath>
 7 #include <queue>
 8 #include <stack>
 9 #include <map>
 10 #include <string>
 11 #include <set>
 12 using namespace std;  13 typedef long long LL;  14 const double EPS = 1e-6;  15 const int INF = 2e9;  16 const LL LNF = 9e18;  17 const int MOD = 1e5;  18 const int MAXN = 110+10;  19 
 20 int Size;  21 int Map[128];  22 struct MA  23 {  24     int mat[110][110];  25     void init()  26  {  27         for(int i = 0; i<Size; i++)  28         for(int j = 0; j<Size; j++)  29             mat[i][j] = (i==j);  30  }  31 };  32 
 33 MA operator*(const MA &x, const MA &y)  34 {  35  MA ret;  36     memset(ret.mat, 0, sizeof(ret.mat));  37     for(int i = 0; i<Size; i++)  38     for(int j = 0; j<Size; j++)  39     for(int k = 0; k<Size; k++)  40         ret.mat[i][j] += (1LL*x.mat[i][k]*y.mat[k][j])%MOD, ret.mat[i][j] %= MOD;  41     return ret;  42 }  43 
 44 MA qpow(MA x, int y)  45 {  46  MA s;  47  s.init();  48     while(y)  49  {  50         if(y&1) s = s*x;  51         x = x*x;  52         y >>= 1;  53  }  54     return s;  55 }  56 
 57 struct Trie  58 {  59     const static int sz = 4, base = 'A';  60     int next[MAXN][sz], fail[MAXN], end[MAXN];  61     int root, L;  62     int newnode()  63  {  64         for(int i = 0; i<sz; i++)  65             next[L][i] = -1;  66         end[L++] = false;  67         return L-1;  68  }  69     void init()  70  {  71         L = 0;  72         root = newnode();  73  }  74     void insert(char buf[])  75  {  76         int len = strlen(buf);  77         int now = root;  78         for(int i = 0; i<len; i++)  79  {  80             if(next[now][Map[buf[i]]] == -1) next[now][Map[buf[i]]] = newnode();  81             now = next[now][Map[buf[i]]];  82  }  83         end[now] = true;  84  }  85     void build()  86  {  87         queue<int>Q;  88         fail[root] = root;  89         for(int i = 0; i<sz; i++)  90  {  91             if(next[root][i] == -1) next[root][i] = root;  92             else fail[next[root][i]] = root, Q.push(next[root][i]);  93  }  94         while(!Q.empty())  95  {  96             int now = Q.front();  97  Q.pop();  98             end[now] |= end[fail[now]]; //當前串的後綴是否也包含單詞
 99             for(int i = 0; i<sz; i++) 100  { 101                 if(next[now][i] == -1) next[now][i] = next[fail[now]][i]; 102                 else fail[next[now][i]] = next[fail[now]][i], Q.push(next[now][i]); 103  } 104  } 105  } 106 
107     int query(int n) 108  { 109  MA s; 110         memset(s.mat, 0, sizeof(s.mat)); 111         for(int i = 0; i<L; i++) 112  { 113             if(end[i]) continue;    //存在單詞的狀態沒有後繼
114             for(int j = 0; j<sz; j++) 115  { 116                 if(end[next[i][j]]) continue;   //存在單詞的狀態沒有前驅
117                 s.mat[i][next[i][j]]++; // i到next[i][j]的路徑數+1。注意,當next[i][j]==root時,路徑數極可能大於1。
118  } 119  } 120 
121         int ret = 0; 122         Size = L; 123         s = qpow(s, n); 124         for(int i = 0; i<L; i++)    //答案爲:初始狀態到各個狀態(包括初始狀態)的路徑數之和。
125             ret = (ret+s.mat[0][i])%MOD; 126         return ret; 127  } 128 }; 129 
130 Trie ac; 131 char buf[20]; 132 int main() 133 { 134     Map['A'] = 0; Map['C'] = 1; Map['G'] = 2; Map['T'] = 3; //離散化
135     int n, m; 136     while(scanf("%d%d", &m,&n)!=EOF) 137  { 138  ac.init(); 139         for(int i = 1; i<=m; i++) 140  { 141             scanf("%s", buf); 142  ac.insert(buf); 143  } 144  ac.build(); 145         int ans = ac.query(n); 146         printf("%d\n", ans); 147  } 148     return 0; 149 }
View Code
相關文章
相關標籤/搜索