基於flask+requests 我的博客

最終實現效果以下圖:

首頁部分

 

技術文章界面

 

寫文章發佈文章

發佈文章

 

文章顯示

 發表評論

管理文章界面

小說首頁顯示    

 

 

 

 

 

 

 

 

 

 

能夠輸入查詢小說,若是小說不存在,就調用後臺爬蟲程序下載css

 

 

 

 

 

 

 

 

 

點開具體頁面顯示,小說章節列表,對於每一個章節,若是本地沒有就直接下載,能夠點開具體章節開心的閱讀,而沒有廣告,是的沒有廣告,純淨的html

 

 

2.代碼部分

2.1 開發環境

 1 Centos7 + mysql
 2 Flask==0.12.2
 3 Flask-Bootstrap==3.3.7.1
 4 Flask-Failsafe==0.2
 5 Flask-Login==0.4.1
 6 Flask-Mail==0.9.1
 7 Flask-Migrate==2.1.1
 8 Flask-Script==2.0.6
 9 Flask-SQLAlchemy==2.3.2
10 Flask-WTF==0.14.2
View Code

 

2.2  主要代碼部分

爬蟲部分下載小說資源

xiaoshuoSpider.py

# -*- coding: utf-8 -*-
# @Author: longzx
# @Date: 2018-03-10 21:41:55
# @cnblog:http://www.cnblogs.com/lonelyhiker/

import requests
import sys
from bs4 import BeautifulSoup
from pymysql.err import ProgrammingError
from app.xiaoshuo.spider_tools import get_one_page, insert_fiction, insert_fiction_content, insert_fiction_lst
from app.models import Fiction_Lst, Fiction_Content, Fiction


def search_fiction(name, flag=1):
    """輸入小說名字

    返回小說在網站的具體網址
    """
    if name is None:
        raise Exception('小說名字必須輸入!!!')

    url = 'http://zhannei.baidu.com/cse/search?s=920895234054625192&q={}'.format(
        name)

    html = get_one_page(url, sflag=flag)
    soup = BeautifulSoup(html, 'html5lib')
    result_list = soup.find('div', 'result-list')
    fiction_lst = result_list.find_all('a', 'result-game-item-title-link')
    fiction_url = fiction_lst[0].get('href')
    fiction_name = fiction_lst[0].text.strip()
    fiction_img = soup.find('img')['src']
    fiction_comment = soup.find_all('p', 'result-game-item-desc')[0].text
    fiction_author = soup.find_all(
        'div', 'result-game-item-info')[0].find_all('span')[1].text.strip()

    if fiction_name is None:
        print('{} 小說不存在!!!'.format(name))
        raise Exception('{} 小說不存在!!!'.format(name))

    fictions = (fiction_name, fiction_url, fiction_img, fiction_author,
                fiction_comment)
    save_fiction_url(fictions)

    return fiction_name, fiction_url


def get_fiction_list(fiction_name, fiction_url, flag=1):
    # 獲取小說列表
    fiction_html = get_one_page(fiction_url, sflag=flag)
    soup = BeautifulSoup(fiction_html, 'html5lib')
    dd_lst = soup.find_all('dd')
    fiction_lst = []
    fiction_url_tmp = fiction_url.split('/')[-2]
    for item in dd_lst[12:]:
        fiction_lst_name = item.a.text.strip()
        fiction_lst_url = item.a['href'].split('/')[-1].strip('.html')
        fiction_real_url = fiction_url + fiction_lst_url + '.html'
        lst = (fiction_name, fiction_url_tmp, fiction_lst_url,
               fiction_lst_name, fiction_real_url)
        fiction_lst.append(lst)
    return fiction_lst


def get_fiction_content(fiction_url, flag=1):
    fiction_id = fiction_url.split('/')[-2]
    fiction_conntenturl = fiction_url.split('/')[-1].strip('.html')
    fc = Fiction_Content().query.filter_by(
        fiction_id=fiction_id, fiction_url=fiction_url).first()
    if fc is None:
        print('此章節不存在,需下載')
        html = get_one_page(fiction_url, sflag=flag)
        soup = BeautifulSoup(html, 'html5lib')
        content = soup.find(id='content')
        f_content = str(content)
        save_fiction_content(fiction_url, f_content)
    else:
        print('此章節已存在,無需下載!!!')


def save_fiction_url(fictions):
    args = (fictions[0], fictions[1].split('/')[-2], fictions[1], fictions[2],
            fictions[3], fictions[4])
    insert_fiction(*args)


def save_fiction_lst(fiction_lst):
    total = len(fiction_lst)
    if Fiction().query.filter_by(fiction_id=fiction_lst[0][1]) == total:
        print('此小說已存在!!,無需下載')
        return 1
    for item in fiction_lst:
        print('此章節列表不存在,需下載')
        insert_fiction_lst(*item)


def save_fiction_content(fiction_url, fiction_content):
    fiction_id = fiction_url.split('/')[-2]
    fiction_conntenturl = fiction_url.split('/')[-1].strip('.html')
    insert_fiction_content(fiction_conntenturl, fiction_content, fiction_id)


def down_fiction_lst(f_name):
    # 1.搜索小說
    args = search_fiction(f_name, flag=0)
    # 2.獲取小說目錄列表
    fiction_lst = get_fiction_list(*args, flag=0)
    # 3.保存小說目錄列表
    flag = save_fiction_lst(fiction_lst)
    print('下載小說列表完成!!')


def down_fiction_content(f_url):
    get_fiction_content(f_url, flag=0)
    print('下載章節完成!!')

 

存儲小說信息及相關爬蟲公共工具

spider_tools.pyhtml5

# -*- coding: utf-8 -*-
# @Author: longzx
# @Date: 2018-03-08 20:56:26
"""
爬蟲經常使用工具包
將一些通用的功能進行封裝
"""
from functools import wraps
from random import choice, randint
from time import ctime, sleep, time

import pymysql
import requests
from requests.exceptions import RequestException
from app.models import Fiction, Fiction_Content, Fiction_Lst
from app import db

#請求頭
headers = {}
headers[
    'Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8'
headers['Accept-Encoding'] = 'gzip, deflate, br'
headers['Accept-Language'] = 'zh-CN,zh;q=0.9'
headers['Connection'] = 'keep-alive'
headers['Upgrade-Insecure-Requests'] = '1'

agents = [
    "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
    "Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
    "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
    "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5",
    "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
    "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
    "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
    'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36'
]


def get_one_page(url, proxies=None, sflag=1):
    #獲取給定的url頁面
    while True:
        try:
            headers['User-Agent'] = choice(agents)
            # 控制爬取速度
            if sflag:
                print('放慢下載速度。。。。。。')
                sleep(randint(1, 3))

            print('正在下載:', url)
            if proxies:
                r = requests.get(
                    url, headers=headers, timeout=5, proxies=proxies)
            else:
                r = requests.get(url, headers=headers, timeout=5)

        except Exception as r:
            print('errorinfo:', r)
            continue
        else:
            if r.status_code == 200:
                r.encoding = r.apparent_encoding
                print('爬取成功!!!')
                return r.text
            else:
                continue


def insert_fiction(fiction_name, fiction_id, fiction_real_url, fiction_img,
                   fiction_author, fiction_comment):
    fiction = Fiction().query.filter_by(fiction_id=fiction_id).first()
    if fiction is None:
        fiction = Fiction(
            fiction_name=fiction_name,
            fiction_id=fiction_id,
            fiction_real_url=fiction_real_url,
            fiction_img=fiction_img,
            fiction_author=fiction_author,
            fiction_comment=fiction_comment)
        db.session.add(fiction)
        db.session.commit()
    else:
        print('記錄已存在,無需下載')


def insert_fiction_lst(fiction_name, fiction_id, fiction_lst_url,
                       fiction_lst_name, fiction_real_url):
    fl = Fiction_Lst().query.filter_by(
        fiction_id=fiction_id, fiction_lst_url=fiction_lst_url).first()
    if fl is None:
        fl = Fiction_Lst(
            fiction_name=fiction_name,
            fiction_id=fiction_id,
            fiction_lst_url=fiction_lst_url,
            fiction_lst_name=fiction_lst_name,
            fiction_real_url=fiction_real_url)
        db.session.add(fl)
        db.session.commit()
    else:
        print('此章節已存在!!!')


def insert_fiction_content(fiction_url, fiction_content, fiction_id):
    fc = Fiction_Content(
        fiction_id=fiction_id,
        fiction_content=fiction_content,
        fiction_url=fiction_url)
    db.session.add(fc)
    db.session.commit()

 

 Flask 部分

models.pymysql

# -*- coding: utf-8 -*-
# @Author: longzx
# @Date: 2018-03-19 23:44:05
# @cnblog:http://www.cnblogs.com/lonelyhiker/

from . import db


class Fiction(db.Model):
    __tablename__ = 'fiction'
    __table_args__ = {"useexisting": True}
    id = db.Column(db.Integer, primary_key=True)
    fiction_name = db.Column(db.String)
    fiction_id = db.Column(db.String)
    fiction_real_url = db.Column(db.String)
    fiction_img = db.Column(db.String)
    fiction_author = db.Column(db.String)
    fiction_comment = db.Column(db.String)

    def __repr__(self):
        return '<fiction %r> ' % self.fiction_name


class Fiction_Lst(db.Model):
    __tablename__ = 'fiction_lst'
    __table_args__ = {"useexisting": True}
    id = db.Column(db.Integer, primary_key=True)
    fiction_name = db.Column(db.String)
    fiction_id = db.Column(db.String)
    fiction_lst_url = db.Column(db.String)
    fiction_lst_name = db.Column(db.String)
    fiction_real_url = db.Column(db.String)

    def __repr__(self):
        return '<fiction_lst %r> ' % self.fiction_name


class Fiction_Content(db.Model):
    __tablename__ = 'fiction_content'
    __table_args__ = {"useexisting": True}
    id = db.Column(db.Integer, primary_key=True)
    fiction_url = db.Column(db.String)
    fiction_content = db.Column(db.String)
    fiction_id = db.Column(db.String)

 

views.pygit

 

# -*- coding: utf-8 -*-
# @Author: longzx
# @Date: 2018-03-20 20:45:37
# @cnblog:http://www.cnblogs.com/lonelyhiker/

from flask import render_template, request, redirect, url_for

from app.xiaoshuo.xiaoshuoSpider import down_fiction_lst, down_fiction_content
from app.xiaoshuo.spider_tools import get_one_page
from . import fiction
import requests
from bs4 import BeautifulSoup
from app.models import Fiction, Fiction_Content, Fiction_Lst
from app import db


@fiction.route('/book/')
def book_index():
    fictions = Fiction().query.all()
    print(fictions)
    return render_template('fiction_index.html', fictions=fictions)


@fiction.route('/book/list/<f_id>')
def book_lst(f_id):
    # 1.獲取所有小說
    fictions = Fiction().query.all()

    for fiction in fictions:
        if fiction.fiction_id == f_id:
            break
    print(fiction)
    # 2.獲取小說章節列表
    fiction_lst = Fiction_Lst().query.filter_by(fiction_id=f_id).all()
    if len(fiction_lst) == 0:
        print(fiction.fiction_name)
        down_fiction_lst(fiction.fiction_name)
        fiction_lst = Fiction_Lst().query.filter_by(fiction_id=f_id).all()
        if len(fiction_lst) == 0:
            return render_template(
                'fiction_error.html', message='暫無此章節信息,請從新刷新下')

    fiction_name = fiction_lst[0].fiction_name
    return render_template(
        'fiction_lst.html',
        fictions=fictions,
        fiction=fiction,
        fiction_lst=fiction_lst,
        fiction_name=fiction_name)


@fiction.route('/book/fiction/')
def fiction_content():
    fic_id = request.args.get('id')
    f_url = request.args.get('f_url')
    print('獲取書本 id={} url={}'.format(fic_id, f_url))

    # 獲取上一章和下一章信息
    fiction_lst = Fiction_Lst().query.filter_by(
        fiction_id=fic_id, fiction_lst_url=f_url).first()
    id = fiction_lst.id
    fiction_name = fiction_lst.fiction_lst_name
    pre_id = id - 1
    next_id = id + 1
    fiction_pre = Fiction_Lst().query.filter_by(
        id=pre_id).first().fiction_lst_url
    fiction_next = Fiction_Lst().query.filter_by(
        id=next_id).first().fiction_lst_url
    f_id = fic_id
    # 獲取具體章節內容
    fiction_contents = Fiction_Content().query.filter_by(
        fiction_id=fic_id, fiction_url=f_url).first()
    if fiction_contents is None:
        print('fiction_real_url={}'.format(fiction_lst.fiction_real_url))

        down_fiction_content(fiction_lst.fiction_real_url)
        print('fiction_id={} fiction_url={}'.format(fic_id, f_url))
        fiction_contents = Fiction_Content().query.filter_by(
            fiction_id=fic_id, fiction_url=f_url).first()
    if fiction_contents is None:
        return render_template('fiction_error.html', message='暫無此章節信息,請從新刷新下')
    print('fiction_contents=', fiction_contents)
    fiction_content = fiction_contents.fiction_content
    print('sdfewf')
    return render_template(
        'fiction.html',
        f_id=f_id,
        fiction_name=fiction_name,
        fiction_pre=fiction_pre,
        fiction_next=fiction_next,
        fiction_content=fiction_content)


@fiction.route('/book/search/')
def f_search():
    f_name = request.args.get('f_name')
    print('收到輸入:', f_name)
    # 1.查詢數據庫存在記錄
    fictions = Fiction().query.all()
    fiction = None
    for x in fictions:
        if f_name in x.fiction_name:
            fiction = x
            break

    if fiction:
        fiction_lst = Fiction_Lst().query.filter_by(
            fiction_id=fiction.fiction_id).all()
        if len(fiction_lst) == 0:
            down_fiction_lst(f_name)
            fictions = Fiction().query.all()
            print('fictions=', fictions)
            for fiction in fictions:
                if f_name in fiction.fiction_name:
                    break

            if f_name not in fiction.fiction_name:
                return render_template('fiction_error.html', message='暫無此小說信息')

            fiction_lst = Fiction_Lst().query.filter_by(
                fiction_id=fiction.fiction_id).all()
            return render_template(
                'fiction_lst.html',
                fictions=fictions,
                fiction=fiction,
                fiction_lst=fiction_lst,
                fiction_name=fiction.fiction_name)
        else:
            fiction_name = fiction_lst[0].fiction_name
            return render_template(
                'fiction_lst.html',
                fictions=fictions,
                fiction=fiction,
                fiction_lst=fiction_lst,
                fiction_name=fiction_name)
    else:
        down_fiction_lst(f_name)
        fictions = Fiction().query.all()
        print('fictions=', fictions)
        for fiction in fictions:
            if f_name in fiction.fiction_name:
                break

        if f_name not in fiction.fiction_name:
            return render_template('fiction_error.html', message='暫無此小說信息')

        fiction_lst = Fiction_Lst().query.filter_by(
            fiction_id=fiction.fiction_id).all()
        return render_template(
            'fiction_lst.html',
            fictions=fictions,
            fiction=fiction,
            fiction_lst=fiction_lst,
            fiction_name=fiction.fiction_name)

 

templatesgithub

fiction_index.html  小說首頁web

{% extends "base.html" %}

{% block styles %}
{{super()}}
<link href="{{url_for('static',filename='css/xscss.css')}}" rel="stylesheet">
{% endblock %}
{% block content %}

 <!-- 搜索欄 -->
 <div class="container-fluid">
    <div class="row">
      <div class=" col-md-offset-7 col-md-4">
        <form class="navbar-form navbar-right" role="search" action="/book/search/">
            <div class="form-group">
                <input name='f_name' type="text" class="form-control" placeholder="輸入你喜歡的小說名字">
            </div>
            <button type="submit" class="btn btn-primary">查找你喜歡的小說吧</button>
        </form>
      </div>
      <div class="col-md-1"></div>
    </div>
</div>
<!-- 搜索欄結束 -->
<div class="box_section">
  <!-- 文章主題開始 -->
  <div class="container-fluid">
      <div class="row">
        <!-- row begin -->
        
        <!-- 左邊欄 -->
        <div class=" col-md-3 ">
            <br>
            <div class="box_cons">
                <div class="panel panel-success">
                    <div class="panel-heading">熱門小說</div>
                    <ul class="list-group">
                        {% for fiction in fictions %}
                        <li class="list-group-item"><a href="/book/list/{{fiction.fiction_id}}" class="pull-left">{{fiction.fiction_name}}</a><p class="text-right">{{fiction.fiction_author}}</p></li>
                        {% endfor %}
                    </ul>
                </div>
            </div>
        </div>  
        <!--右邊欄  -->
        <div class="col-md-8">
            <div class="container-fluid">
                <hr>
                {% for fiction in fictions %}
                {% if loop.index0 % 2 == 0 %}
                <div class="row">
                {% endif %}
                    <div class="col-md-6 pull-left">
                        <div class="media">
                            <a href="#" class="pull-left"><img src="{{fiction.fiction_img}}" width="100px" height="160px" /></a>
                            <div class="media-body">
                                <div class="pull-left"><a href="/book/list/{{fiction.fiction_id}}">{{fiction.fiction_name}}</a></div>
                                <div class="pull-right">{{fiction.fiction_author}}</div>
                                <hr>
                                {{fiction.fiction_comment}}...
                            </div>
                        </div>
                    </div>
                {% if loop.index % 2 == 0 or loop.last %}   
                </div>
                <hr>
                {% endif %}
                {% endfor %}
            </div>
        </div>

        <div class="col-md-1"></div>
        <!-- row end -->
  
      </div>
    </div>
    <!-- 文章主題結束 -->
</div>
    {{super()}}
    {% endblock %}

   
View Code

fiction_lst.html  小說章節列表sql

{% extends "base.html" %}
{% block styles %}
{{super()}}
<link href="{{url_for('static',filename='css/xscss.css',_external=True)}}" rel="stylesheet">
{% endblock %}
{% block content %}

<div class="container-fluid">
    <div class="row">
            <!-- row begin -->
        <div class="col-md-2">
            <div class="box_cons">
                <div class="panel panel-success">
                    <div class="panel-heading">熱門小說</div>
                    <ul class="list-group">
                        {% for fiction in fictions %}
                        <li class="list-group-item"><a href="/book/list/{{fiction.fiction_id}}" class="pull-left">{{fiction.fiction_name}}</a><p class="text-right">{{fiction.fiction_author}}</p></li>
                        {% endfor %}
                    </ul>
                </div>
            </div>
        </div>
          
        <div class=" col-md-9">
        
            <div class="box_con">
                <div class="list-group-item">
                    <div class="media">
                        <a href="#" class="pull-left"><img src="{{fiction.fiction_img}}" width="160px" height="200px" /></a>
                        <div class="media-body">
                            <div ><h3><b>{{fiction.fiction_name}}</b></h3></div>
                            <div ><p>&nbsp;&nbsp;&nbsp;&nbsp;者:{{fiction.fiction_author}}</p></div>
                            <hr>
                            {{fiction.fiction_comment}}...
                        </div>
                    </div>
                </div>
            </div>
            <hr>
            <div class="box_con">
                <div id="list">
                    <dl >
                        <dt>《{{fiction_name}}》最新章節</dt>
                        {% for item in fiction_lst[-9:] %}
                        <dd><a href="/book/fiction/?id={{item.fiction_id}}&f_url={{item.fiction_lst_url}}">{{item.fiction_lst_name}}</a></dd>
                        {% endfor %}
                    </dl>
                </div>
            </div>
            
            <div class="box_con">
                <div id="list">
                    <dl >
                        <dt>《{{fiction_name}}》章節目錄</dt>
                        {% for item in fiction_lst %}
                        <dd><a href="/book/fiction/?id={{item.fiction_id}}&f_url={{item.fiction_lst_url}}">{{item.fiction_lst_name}}</a></dd>
                        {% endfor %}
                    </dl>
                </div>
            </div>

        </div>

        <!-- row end -->
    </div>
</div>

{{super()}}
{% endblock%}
View Code

fiction.html   小說章節內容數據庫

{% extends "base.html" %}
{% block styles %}
{{super()}}
<link href="{{url_for('static',filename='css/xscss.css',_external=True)}}" rel="stylesheet">
{% endblock %}
{% block content %}

<div class="container-fluid">
    <div class="row">
        <!-- row begin -->
        <div class="col-md-offset-1 col-md-9 col-md-offset-1">
            <div class="content_read">
                <div class="box_con">
                    <div class="bookname">
                        <h1><p class="text-center">{{fiction_name}}</p></h1>
                    </div>
                    <div class="bottem1">
                        <p class="text-center">
                        <a id="pager_prev" href="/book/fiction/?id={{f_id}}&f_url={{fiction_pre}}" target="_top" class="pre">上一章</a><a id="pager_current" href="/book/list/{{f_id}}" target="_top" title="" class="back">章節列表</a><a id="pager_next" href="/book/fiction/?id={{f_id}}&f_url={{fiction_next}}" target="_top" class="next">下一章</a>
                        </p>
                    </div>
                    <div class="lm"></div>
                    {{fiction_content|safe}}
                </div>
            </div>
        </div>                              
    <!-- row end -->
    </div>
</div>

{{super()}}
{% endblock%}
View Code

 

 

3.代碼git地址

代碼git地址flask

相關文章
相關標籤/搜索