- #!/usr/bin/perl;
- use strict;
- use warnings;
- use Data::Dumper;
- use Time::HiRes qw(sleep);
- use Test::WWW::Selenium;
- use Test::More "no_plan";
- use Test::Exception;
- use encoding "utf-8";
- use Image::Magick;
- use warnings;
- my $verify ;
- my $f1;
- sub Magick{ #裁切圖片
- my ($x,$y)=@_;
- my($p_w_picpath, $p);
- $p_w_picpath = Image::Magick->new;
- $p = $p_w_picpath->Read('c.png');
- $p = $p_w_picpath->Crop(geometry=>"60x30+$x+$y"); #裁切大小,以右上爲起點開始裁切
- warn "$p" if "$p";
- $p = $p_w_picpath->Write('x.png');
- warn "$p" if "$p";
- my $tesseract_soft_path = 'c:/Program Files/Tesseract-OCR/tesseract.exe'; #這裏是調用ocr解析圖片
- my $png_path = 'f:/perl/cu';
- my $tesseract=system ("$tesseract_soft_path" ,"$png_path/x.png","$png_path/x","-l"); #處理圖片,默認處理爲txt文件
- unless($tesseract){
- open FH,"$png_path/x.txt" or die "$!";
- while(<FH>){
- /(\d{4})(?{$verify=$1})/;
- }
- close FH;
- }
- return 1;
- }
- my $sel = Test::WWW::Selenium->new( host => "localhost", #實例化瀏覽器
- port => 4444,
- browser => "*chrome",
- browser_url => "http://www.mchina.cn/searchkeyword.html"
- );
- open FA,"uid.txt" or die "$!";
- while(<FA>){
- chomp;
- my $uid =$_;
- while(1 and sleep 4){
- $sel->open_ok("/searchkeyword.html");
- $sel->wait_for_page_to_load_ok("30000");
- $sel->capture_entire_page_screenshot('f:\perl\cu\c.png','background=#CCFFDD'); #將頁面製做成png圖片
- Magick("660","400"); #裁切圖片
- $f1=$sel->get_html_source;
- if($f1 =~ /<div[^>]+center">\s*<font[^>]+red">[\s\S]+?<li><span>.*?<\/span><\/li>/){ #若是驗證碼不配對,用正則判斷,須要裁切的地方須要變換
- Magick("654","420");
- }
- $sel->wait_for_page_to_load_ok("30000");
- if (defined $verify){
- $sel->type_ok("id=keyword", "$uid");
- $sel->type_ok("id=confirmcodekeyword", "$verify");
- $sel->click_ok("css=input.btn");
- $sel->wait_for_page_to_load_ok("60000");
- undef($verify);
- my $err = $sel->get_html_source();
- last unless $err =~ /<div[^>]+center">\s*<font[^>]+red">[\s\S]+?<li><span>.*?<\/span><\/li>/;
- }
- $sel->open_ok("/searchkeyword.html");
- $sel->wait_for_page_to_load_ok("30000");
- $sel->refresh() ;
- }
- my $frame = $sel->select_frame('//iframe');
- if(defined $frame){ #因爲是frame構架的,返回的源碼中是看不到所需信息的,須要用 select_frame定位
- my $route= $sel->get_html_source();
- my @info;
- if($route =~ /<div[^>]+searchkey">.*?\n.*?([^\s]+)[\s\S]+?
- <div[^>]+keystatus">\s*<font[^>]+>([^<]+)[\s\S]+?
- <div[^>]+regtime">(\S+)[\s\S]+?
- <div[^>]+registerNOforkey">\s*([^\s]*)/x){
- my($name,$status,$regtime,$idnum);
- $name = $1;$status = $2;$regtime = $3 ;$idnum =$4||0;
- push @info ,$uid,$name,$status,$regtime,$idnum;
- }else{
- if($route =~ /<font[^>]+red">\s*([^\s]+)/){
- push @info,$uid,$1};
- }
- open FH,">>output.txt" or die "$!";
- print FH join("\t",@info);
- print FH "\n";
- close FH;
- }
- }
- close FA;
- sleep 500;
- $sel->stop();
3、解釋
上面的一段code主要用於查詢信息,自動填寫驗證碼,把查詢到的結果導入文本。
操做步驟
1.cmd進入剛纔下載軟件的目錄。
執行java -jar selenium-server-standalone-2.25.0.jar(開啓後不要關閉這個cmd窗口)
1.運行perl程序
F:\perl\cu>Perl-selenium.pl
運行過程