寫在前邊的實現需求:app
1.總共10萬個電話號碼;dom
2.電話號碼中有重複和錯誤;測試
3.查找出正確的號碼(不重複);優化
1、優化前的實現方式:ui
1.先用正則過濾一遍10萬條數據,找出錯誤的;spa
2.用List.Contains驗證重複數據,List.Add添加不重複數據;指針
3.最終從List中取出正確的數據。code
1 public class appMain { 2 final static int _capacity = 1000000; 3 final static Random rand = new Random(System.currentTimeMillis() + _capacity); 4 static ArrayList<String> list = new ArrayList<String>(_capacity); 5 static ArrayList<String> newlist = new ArrayList<String>(_capacity); 6 7 public static void main(String[] args) throws InterruptedException { 8 long ts = System.currentTimeMillis(); 9 int modVal = _capacity / 3; 10 for (int i = 0; i < _capacity; i++) { 11 rand.setSeed(i); 12 list.add(Integer.toString(Math.abs(rand.nextInt() % modVal))); 13 } 14 ts = System.currentTimeMillis() - ts; 15 System.out.println("生成時間 :" + ts); 16 17 test1(); 18 } 19 20 static void test1() { 21 newlist.clear(); 22 int repetition = 0; 23 long ts = System.currentTimeMillis(); 24 for (String s : list) { 25 if (!newlist.contains(s)) 26 newlist.add(s); 27 else { 28 repetition++; 29 } 30 } 31 ts = System.currentTimeMillis() - ts; 32 System.out.println("------ 插入檢查方法 -------"); 33 System.out.println("查找時間 :" + ts); 34 System.out.println("重複 :" + repetition); 35 System.out.println("正確 :" + newlist.size()); 36 } 37 }
優化前執行結果:blog
/* 條件:capacity = 100000 結果: 生成時間 :33 ------ 插入檢查方法 ------- 查找時間 :6612 重複 :76871 正確 :23129 ------ 排序檢查方法 ------- 查找時間 :91 重複 :76871 正確 :23129 */
使用以上方式作導入的話數據量一旦超過5w以上立刻出現假死狀態,故確定不可取,因此有了下邊的優化。排序
2、優化後的實現方式:
1.先對10萬數據排序;
2.對比先後兩條數據(這個我以後會詳細說明爲何這麼作);
3.篩選出正確數據。
1 public class appMain { 2 final static int _capacity = 1000000; 3 final static Random rand = new Random(System.currentTimeMillis() + _capacity); 4 static ArrayList<String> list = new ArrayList<String>(_capacity); 5 static ArrayList<String> newlist = new ArrayList<String>(_capacity); 6 7 public static void main(String[] args) throws InterruptedException { 8 long ts = System.currentTimeMillis(); 9 int modVal = _capacity / 3; 10 for (int i = 0; i < _capacity; i++) { 11 rand.setSeed(i); 12 list.add(Integer.toString(Math.abs(rand.nextInt() % modVal))); 13 } 14 ts = System.currentTimeMillis() - ts; 15 System.out.println("生成時間 :" + ts); 16 17 test2(); 18 } 19 20 static void test2() { 21 newlist.clear(); 22 int repetition = 0; 23 long ts = System.currentTimeMillis(); 24 25 Collections.sort(list); 26 String str = list.get(0); 27 int max = list.size(); 28 for (int i = 1; i < max; i++) { 29 if (str.equals(list.get(i))) { 30 repetition++; 31 continue; 32 } 33 newlist.add(str); 34 str = list.get(i); 35 } 36 newlist.add(str); 37 38 ts = System.currentTimeMillis() - ts; 39 System.out.println("------ 排序檢查方法 -------"); 40 System.out.println("查找時間 :" + ts); 41 System.out.println("重複 :" + repetition); 42 System.out.println("正確 :" + newlist.size()); 43 } 44 }
優化後執行結果:
/* 條件:capacity = 1000000 結果: 生成時間 :392 ------ 插入檢查方法 ------- 查找時間 :1033818 重複 :703036 正確 :296964 ------ 排序檢查方法 ------- 查找時間 :1367 重複 :703036 正確 :296964 */
當數據量達到10萬條的時候,查找時間比差很少90倍的差距了;當數據量達到100萬時,我這邊測試數據已經卡死在test1(),而test2()依然能在數十秒內反饋結果。
下邊來簡單解剖下源碼:
1 Collections.sort(list); 2 String str = list.get(0); 3 int max = list.size(); 4 for (int i = 1; i < max; i++) { 5 if (str.equals(list.get(i))) { 6 repetition++; 7 continue; 8 } 9 newlist.add(str); 10 str = list.get(i); 11 }
Line 1:排序,加入list排序後的結果是[1,2,2,3,3,3,4,4,4,4,5,5,5,5,5]
Line 2:初始str = 1;
從Line 4開始進入循環:
Line 5:判斷str是否和當先selector值相等(暫借咱們認爲list.get(i)是一個指針),若是相等則跳過如下步驟進入下一個循環
Line 9:將str = 1,加入newlist尾
Line10:將當前selector值賦給str,此時str=2,進入下一個循環
...
這種語言解釋我我的以爲特別麻煩,我仍是寫段代碼讓程序告訴你它怎麼執行的。
1 public class appList { 2 static ArrayList<String> list = new ArrayList<String>(); 3 static ArrayList<String> newlist = new ArrayList<String>(); 4 5 public static void main(String[] args) { 6 for (int i = 1; i < 5 + 1; i++) { 7 for (int j = 0; j < i; j++) { 8 list.add(Integer.toString(i)); 9 } 10 } 11 System.out.println("list初始值 " + list.toString()); 12 // print輸出值 [1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5] 13 14 String str = list.get(0); 15 int max = list.size(); 16 for (int i = 1; i < max; i++) { 17 Print(i); 18 if (str.equals(list.get(i))) { 19 PrintNew(); 20 continue; 21 } 22 newlist.add(str); 23 System.out.println("add\t" + str); 24 str = list.get(i); 25 PrintNew(); 26 } 27 28 newlist.add(str); 29 System.out.println("add\t" + str); 30 PrintNew(); 31 32 System.out.println("newlist值 " + newlist.toString()); 33 // print輸出值 [1, 2, 3, 4, 5] 34 } 35 36 static void PrintNew(){ 37 StringBuilder stringBuilder = new StringBuilder(); 38 stringBuilder.append("newlist\t"); 39 for (int i = 0; i < newlist.size(); i++) { 40 stringBuilder.append(newlist.get(i)); 41 stringBuilder.append(","); 42 } 43 System.out.println(stringBuilder.toString()); 44 System.out.println(); 45 } 46 static void Print(int pos) { 47 StringBuilder stringBuilder = new StringBuilder(); 48 stringBuilder.append("list\t"); 49 for (int i = 0; i < list.size(); i++) { 50 if (i == pos) { 51 stringBuilder.append("["); 52 stringBuilder.append(list.get(i)); 53 stringBuilder.append("],"); 54 } else { 55 stringBuilder.append(list.get(i)); 56 stringBuilder.append(","); 57 } 58 } 59 System.out.println(stringBuilder.toString()); 60 }
執行結果:
list初始值 [1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5] list 1,[2],2,3,3,3,4,4,4,4,5,5,5,5,5, add 1 newlist 1, list 1,2,[2],3,3,3,4,4,4,4,5,5,5,5,5, newlist 1, list 1,2,2,[3],3,3,4,4,4,4,5,5,5,5,5, add 2 newlist 1,2, list 1,2,2,3,[3],3,4,4,4,4,5,5,5,5,5, newlist 1,2, list 1,2,2,3,3,[3],4,4,4,4,5,5,5,5,5, newlist 1,2, list 1,2,2,3,3,3,[4],4,4,4,5,5,5,5,5, add 3 newlist 1,2,3, list 1,2,2,3,3,3,4,[4],4,4,5,5,5,5,5, newlist 1,2,3, list 1,2,2,3,3,3,4,4,[4],4,5,5,5,5,5, newlist 1,2,3, list 1,2,2,3,3,3,4,4,4,[4],5,5,5,5,5, newlist 1,2,3, list 1,2,2,3,3,3,4,4,4,4,[5],5,5,5,5, add 4 newlist 1,2,3,4, list 1,2,2,3,3,3,4,4,4,4,5,[5],5,5,5, newlist 1,2,3,4, list 1,2,2,3,3,3,4,4,4,4,5,5,[5],5,5, newlist 1,2,3,4, list 1,2,2,3,3,3,4,4,4,4,5,5,5,[5],5, newlist 1,2,3,4, list 1,2,2,3,3,3,4,4,4,4,5,5,5,5,[5], newlist 1,2,3,4, add 5 newlist 1,2,3,4,5, newlist值 [1, 2, 3, 4, 5]