java爬蟲中jsoup的使用

jsoup能夠用來解析HTML的內容,其功能很是強大,它能夠向javascript那樣直接從網頁中提取有用的信息javascript

例如1:html

  •  從html字符串中解析數據
//直接從字符串中獲取
    public static void getParByString()
    {
        String html = "<html><head><title> 這裏是字符串內容</title></head"+ ">"+"<body><p class='p1'> 這裏是 jsoup 做用的相關演示</p></body></html>";
       Document doc = Jsoup.parse(html);
       Elements links = doc.select("p[class]");
       for(Element link:links){
        String linkclass = link.className();
            String linkText = link.text();
            System.out.println(linkText);
            System.out.println(linkclass);
        }
    }
  •    從本地文件中解析數據
//從本地文件中獲取
    public static void getHrefByLocal()
    {
        File input = new File("C:\\Users\\Idea\\Desktop\\html\\Home.html");
        Document doc = null;
        try {
            doc = Jsoup.parse(input,"UTF-8","http://www.oschina.net/"); //這裏後面加了網址是爲了解決後面絕對路徑和相對路徑的問題
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        Elements links = doc.select("a[href]");
        for(Element link:links){
            String linkHref = link.attr("href");
            String linkText = link.text();
            System.out.println(linkText+":"+linkHref);
        }
        
    }
  • 直接從網絡上解析數據
public static HashMap getHrefByNet(String url)
    {    
      HashMap hm = new HashMap();
      String href = null;
         try {
            //這是get方式獲得的
            Document doc = Jsoup.connect(url).get();
            String title = doc.title();
            Elements links = doc.select("a[href]");
            
            for(Element link:links){
                
                String linkHref = link.attr("abs:href");
                String linkText = link.text();
                //System.out.println(linkText+":"+linkHref);
                hm.put(linkText, linkHref);
                href=linkText;
            }
            //System.out.println("***************");
            //另一種是post方式
            /*@SuppressWarnings("unused")
            Document doc_Post = Jsoup.connect(url)
                    .data("query","Java")
                    .userAgent("I am jsoup")
                    .cookie("auth","token")
                    .timeout(10000)
                    .post();
            Elements links_Post = doc.select("a[href]");
             for(Element link:links_Post){
                    String linkHref = link.attr("abs:href");
                    String linkText = link.text();
                    //System.out.println(linkText+":"+linkHref);
                    
                    //map.put(linkText, linkHref);
                }*/
            
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
            hm.put("加載失敗", "error");
        }
         
        return hm ;
    }
     

注意:須要引用的jar爲如下:java

import org.jsoup.*;
import org.jsoup.nodes.*;
import org.jsoup.select.Elements;node

最後附上jar包下載地址mysql

http://jsoup.org/packages/jsoup-1.8.1.jar
具體
實際項目請看java爬蟲實戰項目

 循環遍歷Hashtable中的鍵和值sql

/*建立一個測試的鍵值對*/
Hashtable h = new Hashtable();
/*往鍵值對中添加數據*/
h.put(key, value);
/*而後依次循環取出hashtable中的鍵和值*/
Iterator it = h.entrySet().iterator();
        while(it.hasNext())
        {
            Map.Entry m = (Map.Entry)it.next();
            System.out.println(m.getValue());
            System.out.println(m.getKey());
        }

 java文件夾的建立(先判斷是否存在,若是不存在就建立)數據庫

//建立文件夾(若是不存在就建立,存在就不變)
     public void makedir(){
         //定義文件夾路徑
         String filePath = "D://home//Lucy";
         File file = new File(filePath);
         if(!file.exists()&&!file.isDirectory())
         {
             System.out.println("不存在");
             file.mkdirs();  //建立文件夾  注意mkdirs()和mkdir()的區別
             //判斷是否建立成功
             if(file.exists()&&file.isDirectory())  //文件夾存在而且是文件夾
             {
                 System.out.println("文件夾建立成功!");
             }
             else{
                 System.out.println("文件建立不成功!");
             }
         }
         else{
             System.out.println("文件已經存在!");
         }
         
     }

 java文件的建立(先判斷是否存在,若是不存在就建立)api

//建立文件,若是不存在就建立文件
     public void makeFile()
     {   
         String fileName = "D://file2.txt";
         File file = new File(fileName);
         if(!file.exists()&&!file.isFile())
         {
            try {
                if(file.createNewFile())  //建立文件,返回布爾值,若是成功爲true,不然爲false
                {
                    System.out.println("文件建立成功!");
                }
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
         }
         else{
          System.out.println("文件已經存在!");
          }
     }

在文件中寫入內容cookie

 //往文件中寫入文本
     public void writeText(String s) 
     {
         String fileName = "D://file2.txt";
        File file = new File(fileName);
        if(file.exists()&&file.isFile()) //若是文件存在,能夠寫入內容
        {
            FileOutputStream fos = null;
            try {
                fos = new FileOutputStream(fileName);
            } catch (FileNotFoundException e2) {
                // TODO Auto-generated catch block
                e2.printStackTrace();
            }
            try {
                fos.write(s.getBytes());
            } catch (IOException e1) {
                // TODO Auto-generated catch block
                e1.printStackTrace();
            }
            try {
                fos.close();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
        else{
            System.out.println("文件不存在,不能寫入內容");
        }
     }

 

java獲取系統時間:網絡

public static void getTime()
    {
        SimpleDateFormat f = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");  
        Date date = new Date();
        System.out.println(f.format(date));
        System.out.println(new SimpleDateFormat("yyyy年MM月dd日   HH時mm分ss秒").format(date));
        System.out.println(date);
    }

 

java鏈接mysql數據庫

   首先添加jar包:下載jar包

public class connectDoctorMySql {
        
    /*
        public static final String url = "jdbc:mysql://192.168.0.16/hive";  
        public static final String name = "com.mysql.jdbc.Driver";  
        public static final String user = "hive";  
        public static final String password = "hive";  
        public Connection conn = null;  
        public PreparedStatement pst = null; 
        public Statement stmt = null;
        ResultSet rs = null;*/
        public static final String url = "jdbc:mysql://127.0.0.1/orcl?useUnicode=true&characterEncoding=utf-8&useSSL=false";  
        public static final String name = "com.mysql.jdbc.Driver";
        public static final String user = "root";  
        public static final String password = "China123";  
        public Connection conn = null;  
        public PreparedStatement pst = null; 
        public Statement stmt = null;
        ResultSet rs = null;
   //初始化數據庫
     public void init(){
                 try {  
                        Class.forName(name);//指定鏈接類型  
                         conn = DriverManager.getConnection(url, user, password);//獲取鏈接  
                         stmt = conn.createStatement();
                    } catch (Exception e) {  
                        System.out.println("數據庫鏈接失敗. . .");
                        e.printStackTrace();  
                    }  
          }
        
   //執行sql語句
    public void excute(String sql){
            init();
            try {
                int result =stmt.executeUpdate(sql);
            } catch (SQLException e) {
                System.out.println("數據執行失敗:"+sql);//打印sql語句
                e.printStackTrace();
                }finally{ 
                     try {
                         if (rs!=null){
                            rs.close();
                          }
                         if(pst!=null){
                           pst.close();
                            }
                         if(conn!=null) {
                          conn.close();
                          }
                  }catch (SQLException e) {
                   e.printStackTrace();
                   }  
          }
        }

 

//查詢語句
    public ArrayList select(String sql,int x,int y){
            init();
             ArrayList result= new ArrayList();
            try {
                ResultSet rs = stmt.executeQuery(sql);
                while(rs.next())
                {   String[] str = new String[2];
                    str[0]=rs.getString(x);
                    str[1]=rs.getString(y);
                    result.add(str);    
                }
            } catch (SQLException e) {
                e.printStackTrace();
                }finally{
                       try {
                         if (rs!=null){
                            rs.close();
                          }
                         if(pst!=null){
                           pst.close();
                            }
                         if(conn!=null) {
                          conn.close();
                          }
                  }catch (SQLException e) {
                   e.printStackTrace();
                   }  
          }
                return result;
        }

 

java鏈接oracle數據庫

public class connectDoctor {
      //鏈接oracl數據庫
        public static final String url = "jdbc:oracle:thin:@127.0.0.1:1521:orcl"; 
        //@127.0.0.1
        public static final String name = "oracle.jdbc.driver.OracleDriver";  
        public static final String user = "c238891";  
        public static final String password = "Rapid111";  
        public Connection conn = null;  
        public PreparedStatement pst = null; 
        public Statement stmt = null;
        ResultSet rs = null;  
        //初始化數據庫
        public void init(){
                 try {  
                        Class.forName(name);//指定鏈接類型  
                         conn = DriverManager.getConnection(url, user, password);//獲取鏈接  
                         stmt = conn.createStatement();
                    } catch (Exception e) {  
                        System.out.println("插入數據失敗:");
                        e.printStackTrace();  
                    }  
          }
        
        //測試鏈接數據庫
        public void start()
        {  
            init();
            String sql = "select * from emp";
            try {
                pst = conn.prepareStatement(sql);
                 rs = pst.executeQuery();  
                                 while (rs.next()) {  
                                    System.out.println("編號:" + rs.getString("empno")   
                                                    + ";姓名:" + rs.getString("ename")  
                                                    + "; 工做:" + rs.getString("job")  
                                                    + "; 領導:" + rs.getString("mgr")  
                                                    + "; 僱傭日期:" + rs.getString("hiredate")  
                                                    + "; 工資:" + rs.getString("sal")  
                                                     + "; 獎金:" + rs.getString("comm")  
                                                     + "; 部門:" + rs.getString("deptno"));  
                                 }  
            } catch (SQLException e) {
                e.printStackTrace();
            }finally{
                 try {
                     if (rs!=null){
                     rs.close();
                     if(pst!=null)
                     {
                         pst.close();
                     }
                     if(conn!=null)
                     {
                         conn.close();
                     }
                    }
                } catch (SQLException e) {
                    e.printStackTrace();
                }  
             
            }
        }

  //執行sql語句
        public void excute(String sql){
            init();
            try {
                int result =stmt.executeUpdate(sql);
            } catch (SQLException e) {
                System.out.println(sql);
                //System.out.println("錯誤");
                e.printStackTrace();
                }finally{ 
                     try {
                         if (rs!=null){
                            rs.close();
                          }
                         if(pst!=null){
                           pst.close();
                            }
                         if(conn!=null) {
                          conn.close();
                          }
                  }catch (SQLException e) {
                   e.printStackTrace();
                   }  
          }
        }
   
  //查詢語句
        public ArrayList select(String sql,int x,int y){
            init();
             ArrayList result= new ArrayList();
            try {
                ResultSet rs = stmt.executeQuery(sql);
                while(rs.next())
                {   String[] str = new String[2];
                    str[0]=rs.getString(x);
                    str[1]=rs.getString(y);
                    result.add(str);    
                }
            } catch (SQLException e) {
                e.printStackTrace();
                }finally{
                       try {
                         if (rs!=null){
                            rs.close();
                          }
                         if(pst!=null){
                           pst.close();
                            }
                         if(conn!=null) {
                          conn.close();
                          }
                  }catch (SQLException e) {
                   e.printStackTrace();
                   }  
          }
                return result;
        }
相關文章
相關標籤/搜索