基於Java的數據採集(三)

《基於Java的數據採集(一)》:http://www.cnblogs.com/lichenwei/p/3904715.htmlphp

《基於Java的數據採集(二)》:http://www.cnblogs.com/lichenwei/p/3905370.htmlhtml

《基於Java的數據採集(終結篇)》:http://www.cnblogs.com/lichenwei/p/3910492.htmljava

基於以前2篇Java數據採集入庫,作了下功能整合,實現本地的存讀取,上個效果圖:mysql

直接上代碼吧,本程序只是做爲"如何用JAVA抓取頁面簡單採集入庫"的入門,在實際作採集工具的時候,還需考慮許多東西,好比當採集一個頁面發生卡頓時,發生延遲時怎麼辦?等一系列的問題,但願這篇文字可以拋磚引玉。正則表達式

先看下項目結構:sql

一共有五個類:數據庫

Mysql.java  --數據庫操做類數組

RegEX.java   --正則匹配類curl

GetAllData.java --採集類ide

Action.java  --功能實現類

FootBallMain.java --主程序類

其餘的,直接結合前面2篇文章外加看代碼註釋吧

Mysql.java

 1 package com.lcw.curl;
 2 
 3 
 4 import java.sql.Connection;
 5 import java.sql.DriverManager;
 6 import java.sql.ResultSet;
 7 import java.sql.SQLException;
 8 import java.sql.Statement;
 9 
10 
11 /**
12  * 數據庫操做類,一更新,一查詢
13  * @author Balla_兔子
14  *
15  */
16 public class MySql {
17   
18     //定義MySql驅動,數據庫地址,數據庫用戶名 密碼, 執行語句和數據庫鏈接  
19     public String driver = "com.mysql.jdbc.Driver";
20     public String url = "jdbc:mysql://127.0.0.1:3306/football";
21     public String user = "root";
22     public String password = "";
23     public Statement stmt = null;
24     public Connection conn = null;
25     
26     //建立一個插入數據的方法
27     public void datatoMySql(String insertSQl) {
28 
29         try {
30             try {
31                 Class.forName(driver).newInstance();
32             } catch (Exception e) {
33                 System.out.println("Unable to find the local driver");
34                 e.printStackTrace();
35             }
36             //建立鏈接
37             conn = DriverManager.getConnection(url, user, password);
38             //建立一個 Statement 對象來將 SQL 語句發送到數據庫
39             stmt = conn.createStatement();
40         } catch (SQLException e) {
41             e.printStackTrace();
42         }
43         try {
44             //執行SQL 插入語句
45             stmt.executeUpdate(insertSQl);
46         } catch (SQLException e) {
47             e.printStackTrace();
48         }
49         try {
50             stmt.close();
51             conn.close();
52         } catch (SQLException e) {
53             e.printStackTrace();
54         }
55     }
56     
57     
58   //建立一個查找數據的方法
59     public ResultSet searchMySql(String selectSQl) {
60         
61         ResultSet rs=null;
62 
63         try {
64             try {
65                 Class.forName(driver).newInstance();
66             } catch (Exception e) {
67                 System.out.println("Unable to find the local driver");
68                 e.printStackTrace();
69             }
70             //建立鏈接
71             conn = DriverManager.getConnection(url, user, password);
72             //建立一個 Statement 對象來將 SQL 語句發送到數據庫
73             stmt = conn.createStatement();
74         } catch (SQLException e) {
75             e.printStackTrace();
76         }
77         try {
78             //執行SQL 插入語句
79             rs=stmt.executeQuery(selectSQl);
80         } catch (SQLException e) {
81             e.printStackTrace();
82         }
83         
84         return rs;
85     }
86     
87 }
Mysql.java

RegEX.java

 1 package com.lcw.curl;
 2 
 3 import java.util.regex.Matcher;
 4 import java.util.regex.Pattern;
 5 
 6 public class RegEX {
 7 
 8     /**
 9      * 
10      * @param regex
11      * 正則表達式
12      * @param content
13      * 所要匹配的內容
14      * @return
15      */
16     public String getData(String regex, String content) {
17         Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);// 設定正則表達式,不區分大小寫
18         Matcher matcher = pattern.matcher(content);
19         if (matcher.find()) {
20             return matcher.group();//返回正則匹配結果
21         } else {
22             return "";
23         }
24     }
25 
26 }
RegEX.java

GetAllData.java

 1 package com.lcw.curl;
 2 
 3 import java.io.BufferedReader;
 4 import java.io.InputStreamReader;
 5 import java.net.URL;
 6 
 7 public class GetAllData {
 8 
 9     /**採集類
10      * @param Balla_兔子
11      */
12     public void getAllData() {
13 
14         try {
15             String address = "http://www.footballresults.org/league.php?league=EngDiv1";
16             URL url = new URL(address);
17             InputStreamReader inputStreamReader = new InputStreamReader(url
18                     .openStream(), "utf-8");// 打開地址,以UTF-8編碼的形式返回字節並轉爲字符
19             BufferedReader bufferedReader = new BufferedReader(
20                     inputStreamReader);// 從字符輸入流中讀取文本,緩衝各個字符,從而提供字符、數組和行的高效讀取。
21 
22             RegEX data = new RegEX();
23             MySql mySql = new MySql();
24             String content = "";// 用來接受每次讀取的行字符
25             int flag = 0;// 標誌,隊伍信息恰好在日期信息後面,則正則相同,用於分離數據
26             String dateRegex = "\\d{1,2}\\.\\d{1,2}\\.\\d{4}";// 日期匹配正則表達式
27             String teamRegex = ">[^<>]*</a>";// 隊伍匹配正則表達式
28             String scoreRegex = ">(\\d{1,2}-\\d{1,2})</TD>";// 比分正則表達式
29             String tempDate = "";// 存儲臨時比賽時間
30             String teama = "";// 存儲臨時主隊
31             String teamb = "";// 存儲臨時客隊
32             String score = "";// 存儲臨時比分
33             int i = 0;// 記錄信息條數
34             String sql = "";// 數據庫語句
35 
36             while ((content = bufferedReader.readLine()) != null) {// 每次讀取一行數據
37                 // 獲取比賽日期信息
38                 String dateInfo = data.getData(dateRegex, content);
39                 if (!dateInfo.equals("")) {
40                     // System.out.println("日期:" + dateInfo);
41                     tempDate = dateInfo;
42                     flag++;
43                 }
44                 // 獲取隊伍信息,需先讀到日期信息讓標誌符自增
45                 String teamInfo = data.getData(teamRegex, content);
46                 if (!teamInfo.equals("") && flag == 1) {
47                     teama = teamInfo.substring(1, teamInfo.indexOf("</a>"));
48                     // System.out.println("主隊:" + teama);
49                     flag++;
50                 } else if (!teamInfo.equals("") && flag == 2) {
51                     teamb = teamInfo.substring(1, teamInfo.indexOf("</a>"));
52                     // System.out.println("客隊:" + teamb);
53                     flag = 0;
54                 }
55                 // 獲取比分信息
56                 String scoreInfo = data.getData(scoreRegex, content);
57                 if (!scoreInfo.equals("")) {
58                     score = scoreInfo.substring(1, scoreInfo.indexOf("</TD>"));
59                     // System.out.println("比分:" + score);
60                     // System.out.println();
61                     i++;
62                     sql = "insert into football(`date`,`teama`,`teamb`,`score`) values('"
63                             + tempDate
64                             + "','"
65                             + teama
66                             + "','"
67                             + teamb
68                             + "','"
69                             + score + "')";
70                     mySql.datatoMySql(sql);
71                     System.out.println("存儲數據成功:" + i + "條");
72                 }
73 
74             }
75             bufferedReader.close();
76             // System.out.println("一共收集到了" + i + "條信息");
77         } catch (Exception e) {
78             e.printStackTrace();
79         }
80 
81     }
82 
83 }
GetAllData.java

Action.java

  1 package com.lcw.curl;
  2 
  3 import java.sql.ResultSet;
  4 import java.sql.SQLException;
  5 import java.util.ArrayList;
  6 import java.util.List;
  7 import java.util.Vector;
  8 
  9 public class Action {
 10 
 11     /**
 12      * 操做一:初始化數據庫數據
 13      */
 14     public void initData() {
 15         String sql = "delete from football";
 16         MySql doMySql = new MySql();
 17         try {
 18             doMySql.datatoMySql(sql);
 19             System.out.println("數據初始化完畢!");
 20         } catch (Exception e) {
 21             System.out.println("數據初始化失敗!");
 22         }
 23 
 24     }
 25 
 26     /**
 27      * 獲取全部隊伍信息
 28      * 
 29      * @return
 30      */
 31     public Vector<String> getAllTeam() {
 32         ResultSet rs = null;
 33         Vector<String> vector = new Vector<String>();
 34         String sql = "select teama,teamb from football";
 35         MySql doMySql = new MySql();
 36         rs = doMySql.searchMySql(sql);
 37 
 38         try {
 39             while (rs.next()) {
 40                 try {
 41                     if (!vector.contains(rs.getString("teama"))) {
 42                         vector.add(rs.getString("teama"));
 43                     }
 44                     if (!vector.contains(rs.getString("teamb"))) {
 45                         vector.add(rs.getString("teamb"));
 46                     }
 47                 } catch (SQLException e) {
 48                     e.printStackTrace();
 49                 }
 50             }
 51         } catch (SQLException e) {
 52             e.printStackTrace();
 53         }
 54 
 55         return vector;
 56 
 57     }
 58 
 59     /**
 60      * 獲取具體某隊的比賽信息
 61      * 
 62      * @param team
 63      * @return
 64      */
 65     public List<String> findTeam(String team) {
 66         List<String> list = new ArrayList<String>();
 67         String sql = "select * from football where teama ='" + team
 68                 + "' or teamb ='" + team + "'";
 69         MySql mysql = new MySql();
 70         ResultSet rs = null;
 71         rs = mysql.searchMySql(sql);
 72         try {
 73             while (rs.next()) {
 74                 list.add(rs.getString("date"));
 75                 list.add(rs.getString("teama"));
 76                 list.add(rs.getString("teamb"));
 77                 list.add(rs.getString("score"));
 78             }
 79         } catch (SQLException e) {
 80             e.printStackTrace();
 81         }
 82         return list;
 83 
 84     }
 85 
 86     public List<String> findGame(String date) {
 87         List<String> list = new ArrayList<String>();
 88         ResultSet rs = null;
 89         String sql = "select * from football where date ='" + date + "'";
 90         MySql mysql = new MySql();
 91         rs = mysql.searchMySql(sql);
 92         try {
 93             while (rs.next()) {
 94                 list.add(rs.getString("date"));
 95                 list.add(rs.getString("teama"));
 96                 list.add(rs.getString("teamb"));
 97                 list.add(rs.getString("score"));
 98             }
 99         } catch (SQLException e) {
100             // TODO Auto-generated catch block
101             e.printStackTrace();
102         }
103         return list;
104     }
105 
106 }
Action.java

FootBallMain.java

 1 package com.lcw.curl;
 2 
 3 import java.util.List;
 4 import java.util.Scanner;
 5 import java.util.Vector;
 6 
 7 public class FootBallMain {
 8 
 9     /**主程序類
10      * @param Balla_兔子
11      */
12     public static void main(String[] args) {
13         GetAllData allData = new GetAllData();
14         Action action = new Action();
15 
16         while (true) {
17             System.out.println("①初始化數據庫-請按 (1)");
18             System.out.println("②自動化採集數據-請按(2)");
19             System.out.println("③查詢參賽隊伍-請按(3)");
20             System.out.println("④查詢具體球隊比賽結果-請按(4)");
21             System.out.println("⑤查詢具體某天的比賽詳情-請按(5)");
22             Scanner scanner = new Scanner(System.in);
23             String input = scanner.next();
24             if (input.equals("1")) {
25                 System.out.println();
26                 action.initData();
27                 System.out
28                         .println("-----------------------------------------------------");
29             } else if (input.equals("2")) {
30                 System.out.println("正在採集數據...請稍後");
31                 allData.getAllData();
32                 System.out
33                         .println("-----------------------------------------------------");
34             } else if (input.equals("3")) {
35                 Vector<String> allTeam = action.getAllTeam();
36                 System.out.println("正在獲取數據...請稍後");
37                 if (allTeam.size() != 0) {
38                     System.out.println("參賽隊伍以下:");
39                     for (int i = 0; i < allTeam.size(); i++) {
40                         System.out.println(allTeam.get(i));
41                     }
42                 }
43                 System.out
44                         .println("-----------------------------------------------------");
45             } else if (input.equals("4")) {
46                 System.out.println("請輸入您要查詢的隊伍名:");
47                 String team = scanner.next();
48                 List<String> list = action.findTeam(team);
49                 System.out.println("比賽日期\t\t\t主隊\t\t客隊\t\t\t比賽結果");
50                 if (list.size() != 0) {
51                     for (int i = 0; i < list.size(); i++) {
52                         System.out.print(list.get(i) + "\t\t");
53                     }
54                 } else {
55                     System.out.println("暫時沒有您所提供隊伍的比賽信息,敬請關注...");
56                 }
57                 System.out.println();
58                 System.out
59                         .println("-----------------------------------------------------");
60             } else if (input.equals("5")) {
61                 System.out.println("請輸入您要查詢日期(格式以下:xx.xx.xxxx):");
62                 String date = scanner.next();
63                 List<String> info = action.findGame(date);
64                 System.out.println("比賽日期\t\t\t主隊\t\t客隊\t\t\t比賽結果");
65                 if (info.size() != 0) {
66                     for (int i = 0; i < info.size(); i++) {
67                         if (i % 4 == 0 && i != 0) {
68                             System.out.println();
69                         }
70                         System.out.print(info.get(i) + "\t\t");
71                     }
72                 } else {
73                     System.out.println("暫時沒有您所提供的比賽信息,敬請關注...");
74                 }
75                 System.out.println();
76                 System.out
77                         .println("------------------------------------------------------------------------");
78             } else {
79                 System.out.println("請輸入正確的對應編號..");
80                 System.out
81                         .println("------------------------------------------------------------------------");
82             }
83         }
84     }
85 
86 }
FootBallMain.java
相關文章
相關標籤/搜索