系統環境
[root@m01 scripts]# uname -r 2.6.32-696.el6.x86_64 [root@m01 scripts]# uname -m x86_64 [root@m01 scripts]# cat /etc/redhat-release CentOS release 6.9 (Final)
shell練習1
#!/bin/bash # date: 2018-03-xx # author: yk # descrption: Climbing 51cto data # version: 0.1 source /etc/profile . /etc/init.d/functions # Create a temporary file TmpFile="/tmp/.$(date +%Y%m%d_%H%M%S).log.tmp" touch $TmpFile # Store web page information BlogFile="/tmp/$(date +%Y%m%d_%H%M%S)_blog.html" touch $BlogFile # Let the user enter the 51cto blogger's homepage URL read -p 'please input websitei' Website # Climb 51cto blogger home wget -q -O $TmpFile $Website &>/dev/null [ $? -ne 0 ] && echo "you input website is not exist" && exit 1 # Blogger's last page blog. That is, the last page contains the number of pages MainURL=$(sed -n '/class="last".*末頁.*/p' $TmpFile | egrep -o 'http:.*p[0-9]{1,}') # 28 pages Pages=$(echo $MainURL | sed -n 's#^.*p##gp') # If it is not the home page, the number of extracted pages is definitely not a number if [ "$Pages" -gt 0 ] &>/dev/null then echo "please wait ......" else echo "you input url is not homepage" rm -f $TmpFile rm -f $BlogFile exit 1 fi # Url address, in addition to the last number UR=$(echo $MainURL | sed -rn 's#[0-9]{1,}$##gp') # Traverse every page for ((i=1;i<=$Pages;i++)) do # Splice together, which is the complete blogger's website wget -q -O $TmpFile ${UR}$i &>/dev/null # Get time, title, link egrep -A 1 '<a class="tit" | class="time' $TmpFile | sed '/^\-\-/d' | sed -r 's#[ ]+# #g' >>$BlogFile # Pause 0.05 seconds, not too fast sleep 0.05 done # clear tmp file >$TmpFile # =============================================================== action "The blogger’s blog information has been downloaded locally" /bin/true echo "Extracting required data from downloaded data ......" echo "please wait ....." # =============================================================== i=0 # Extract the desired data for each line of the file while read line do # Because every 4th line is the content of a blog, it only needs to extract from every 4th line and loop execution. ((++i)) case "$i" in 1) # Get blog posting time Time=$(echo $line | sed -r 's#^.*>發佈於:(.*)</a>#\1#g') ;; 3) # get href Href=$(echo $line | sed -r 's#^.*href=\"(.*)\">#\1#g') ;; 4) # get blog title Title=$(echo $line | sed -r 's#^(.*)<.*$#\1#g') ;; *) esac # Every 4 acts as a blog, appends the acquired information to a temporary file if [ "$i" -eq "4" ] then i=0 echo "<a href=\"$Href\">$Time---$Title</a><br/>" >> $TmpFile fi done < $BlogFile # clear file >$BlogFile # Sort by time , Append to file $BlogFile cat $TmpFile | sort -rt '>' -k2 >>$BlogFile rm -f $TmpFile action "success" /bin/true
注:僅供參考html