java實現爬蟲功能

/**
 * 爬取新聞信息,封裝成實體bean
 */
public class GetNews {
 public List<News> getNews() {
  // 存儲新聞對象
  List<News> list = new ArrayList<News>();
  try {
   // 請求DOM文檔
   Document document = Jsoup.connect("http://baijia.baidu.com/").get();
   // 解析
   String selector = "h3>a";
   Elements titlels = document.select(selector);mysql

   for (Element title : titlels) {
    // System.out.println("標題---" + title.text());
    // 再次請求a標籤,獲取內容
    String url = title.absUrl("href");
    Document document1 = Jsoup.connect(url).get();
    String selectTime = document1.select("span[class=time]").text();
    // System.out.println("時間---" + selectTime);
    String selectBody = document1.select(
      "div[class=article-detail]").text();
    // System.out.println("正文---" + selectBody);
    // 構成news對象加入list集合
    News news = new News();
    news.setTitle(title.text());
    news.setBody(selectBody);
    news.setDate(selectTime);
    list.add(news);
   }sql

  } catch (IOException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  }數據庫

  return list;
 }工具

}url

/*
  * 把得到的news對象存入數據庫
  */
 public int save(List<News> list) {spa

  // sql前綴
  String sql = "insert into news (title,body,date) values";
  /*
   * 這種方式插入數據庫 速度最快
   */
  for (News news : list) {
   sql = sql + "('" + news.getTitle() + "','" + news.getBody() + "','"
     + news.getDate() + "'),";
  }
  sql = sql.substring(0, sql.length() - 1);
  System.out.println(sql);
  int rows = BaseDao.executeUpdate(sql);
  return rows;
 }對象

/**
 * 鏈接數據庫 通用的 工具類
 *
 */
public class BaseDao {
 // 建立須要獲得JDBC API
 protected static Connection connection = null;
 protected static PreparedStatement ps = null;
 protected static ResultSet rs = null;接口

 // 01.獲取數據庫鏈接
 public static boolean getConnection() {
  /**
   * 獲取數據庫鏈接的4要素 鏈接數據庫的前提
   */
  String driver = ConfigManager.getInstance().getValue("jdbc.driver");
  String url = ConfigManager.getInstance().getValue("jdbc.url");
  String userName = ConfigManager.getInstance().getValue("jdbc.userName");
  String password = ConfigManager.getInstance().getValue("jdbc.password");資源

  try {
   Class.forName(driver); // 加載驅動
   connection = DriverManager.getConnection(url, userName, password);
  } catch (ClassNotFoundException e) {
   e.printStackTrace();
   return false;
  } catch (SQLException e) {
   e.printStackTrace();
   return false;
  }
  return true;
 }文檔

 /**
  * 03.增刪改 executeUpdate() 返回int 表明影響數據庫中的行數 delete from user; delete from
  * user where id=? and name=?;
  */
 public static int executeUpdate(String sql, Object... params) {
  int rowNum = 0;
  if (getConnection()) { // 操做數據庫 確定現有鏈接
   try {
    ps = connection.prepareStatement(sql);
    // 循環給sql語句中的?佔位符 賦值
    for (int i = 0; i < params.length; i++) {
     ps.setObject(i + 1, params[i]);
    }
    // 執行sql語句
    rowNum = ps.executeUpdate();
   } catch (SQLException e) {
    e.printStackTrace();
   } finally {
    closeConnection(); // 關閉鏈接
   }

  }

  return rowNum;
 }

 /**
  * 04.查詢 executeQuery() 返回ResultSet select * from user; select * from user
  * where id=? and name=?;
  */
 public static ResultSet executeQuery(String sql, Object... params) {
  if (getConnection()) { // 操做數據庫 確定現有鏈接
   try {
    ps = connection.prepareStatement(sql);
    // 循環給sql語句中的?佔位符 賦值
    for (int i = 0; i < params.length; i++) {
     ps.setObject(i + 1, params[i]);
    }
    // 執行sql語句
    rs = ps.executeQuery();
   } catch (SQLException e) {
    e.printStackTrace();
   }
  }
  return rs;
 }

 // 02.釋放資源
 public static boolean closeConnection() {
  // 若是對象都沒有建立 ? 能關閉嗎? 必須進行非空判斷
  if (rs != null) {
   try {
    rs.close();
   } catch (SQLException e) {
    e.printStackTrace();
    return false;
   }
  }
  if (ps != null) {
   try {
    ps.close();
   } catch (SQLException e) {
    e.printStackTrace();
    return false;
   }
  }
  if (connection != null) {
   try {
    connection.close();
   } catch (SQLException e) {
    e.printStackTrace();
    return false;
   }
  }
  return true;
 }

}

/*
  * 輸入關鍵字 查詢 模糊查詢
  */
 public List<News> selectNews(String name) {
  List<News> list = new ArrayList<News>();
  String sql = "select * from news where title like ?";
  Object[] params = { "%" + name + "%" };
  ResultSet rs = BaseDao.executeQuery(sql, params);
  try {
   // 遍歷結果集
   while (rs.next()) {
    // 建立新聞對象
    News news = new News();
    // 獲取每一行的每一列
    news.setId(rs.getInt("id"));
    news.setTitle(rs.getString("title"));
    news.setBody(rs.getString("body"));
    news.setDate(rs.getString("date"));
    list.add(news);
   }
  } catch (Exception e) {
   // TODO: handle exception
  }
  return list;
 }

/*
 * 單例 讀取配置文件的工具類
 * */

public class ConfigManager {

 // 01.建立自身的靜態對象
 private static ConfigManager manager = new ConfigManager();
 private static Properties properties;

 // 02.私有化構造
 private ConfigManager() {
  // 獲取配置文件的路徑
  String path = "jdbc.properties";
  properties = new Properties();
  // 建立輸入流
  InputStream stream = ConfigManager.class.getClassLoader()
    .getResourceAsStream(path);
  try {
   properties.load(stream);
  } catch (IOException e) {
   e.printStackTrace();
  } finally {
   try {
    stream.close();
   } catch (IOException e) {
    e.printStackTrace();
   }
  }

 }

 // 03.提供供外部訪問的接口
 public static synchronized ConfigManager getInstance() {
  return manager;
 }

 // 提供一個 根據key取得value的方法
 public static String getValue(String key) {
  return properties.getProperty(key);
 }

}

/*

*properties文件

*/

jdbc.url=jdbc\:mysql\://localhost\:3306/testjdbc.userName=hhrjdbc.password=hhrjdbc.driver=com.mysql.jdbc.Driver

相關文章
相關標籤/搜索