爬蟲基礎——示例:微信登錄收發消息

   原理

1. URL  https://wx.qq.com/javascript

          1.1  獲取uuid:https://login.wx.qq.com/jslogin?<(時間戳)>html

      response 返回  ==> window.QRLogin.code = 200; window.QRLogin.uuid = "QaL1LOI9WQ==";java

    1.2 使用uuid生成二維碼 <img src="https://login.weixin.qq.com/qrcode/QaL1LOI9WQ==">  jquery

2. 長輪詢,等待用戶掃碼。https://login.wx.qq.com/cgi-bin/mmwebwx-bin/login?<(uuid    tip=1   時間戳)>web

          2.1 若是沒有人掃碼,response 返回 window.code=408;  繼續輪詢ajax

     2.2 有人掃碼,response 返回 window.code=201;window.userAvatar = <頭像>,等待用戶確認  https://login.wx.qq.com/cgi-bin/mmwebwx-bin/login?<(uuid    tip=0   時間戳)>django

          2.3 確認登錄,https://login.wx.qq.com/cgi-bin/mmwebwx-bin/login?<uuid    tip=0   時間戳>的response 返回   window.code=200;  window.redirect_uri="<地址>";   獲取登錄cookie   c1json

 

          2.4  獲取憑證。  window.redirect_uri + &fun=new&version=v2   返回憑證  ,再次獲取cookie c2微信

3. 獲取用戶信息。https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxinit?<憑證>  response 返回用戶信息 User  和 SyncKeycookie

4. 獲取全部聯繫人。https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxgetcontact?<憑證>   ,該url帶上 cookie(c1.update(c2))    response 返回全部聯繫人

5. 發送消息。post 發送 https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxsendmsg?<憑證>

                     因爲請求體是一個字典嵌套字典類型,

                     1. 若是字典中沒有中文,發送的時候,直接使用 json 參數;

                     2. 若是有中文,要把它變成二進制類型 json.dumps(send_data, ensure_ascii=False).encode(encoding="utf-8"),使用 data參數,請求頭加 headers={"Content-Type": "application/json"},

6. 接受消息。也是長輪詢。https://webpush.wx.qq.com/cgi-bin/mmwebwx-bin/synccheck?<憑證+SyncKey> ,攜帶 cookie(c1.update(c2)),監聽消息

                 6.1 若是返回  window.synccheck={retcode:"0",selector:"0"},則沒有消息

                 6.2 若是返回  window.synccheck={retcode:"0",selector:"0"} 則有消息。 再發送 post https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxsync?<憑證+synckey>    response 返回消息  和新的 SyncKey

 

   代碼實現

urlpatterns = [
    path('admin/', admin.site.urls),
    re_path(r'^login/$', views.login, name="login"),
    re_path(r'^index/$', views.index, name="index"),
    re_path(r'^check_login/$', views.check_login, name="check_login"),
    re_path(r'contact_all/$', views.contact_all, name="contact_all"),
    re_path(r'^send_msg/$', views.send_msg, name="send_msg"),
    re_path(r'^check_msg/$', views.check_msg, name="check_msg"),
]
urls.py
from django.shortcuts import render, HttpResponse
import requests, time, re, json
from bs4 import BeautifulSoup
# Create your views here.


def login(req):
    '''
    生成有二維碼的網頁
    :param req:
    :return:
    '''

    # 獲取生產二維碼的uuid
    '''
    https://login.wx.qq.com/jslogin?appid=wx782c26e4c19acffb&redirect_uri=https%3A%2F%2Fwx.qq.com%2Fcgi-bin%2Fmmwebwx-bin%2Fwebwxnewloginpage&fun=new&lang=zh_CN&_=1532602804064
    '''
    ctime = int(time.time()*1000)
    base_uri = "https://login.wx.qq.com/jslogin?appid=wx782c26e4c19acffb&redirect_uri=https%3A%2F%2Fwx.qq.com%2Fcgi-bin%2Fmmwebwx-bin%2Fwebwxnewloginpage&fun=new&lang=zh_CN&_={0}"
    url = base_uri.format(ctime)
    res1 = requests.get(url=url)
    qcode = re.findall('window.QRLogin.uuid = "(.*)";', res1.text)[0]
    req.session['qcode'] = qcode

    return render(req, "login.html", {"qcode": qcode})


def check_login(req):
    '''
    檢測用戶掃碼
    :param req:
    :return:
    '''
    # 長輪詢
    res_data = {"code":408, "data": None}

    ctime = int(time.time() * 1000)
    TIP = req.GET.get('TIP')
    base_uri = "https://login.wx.qq.com/cgi-bin/mmwebwx-bin/login?loginicon=true&uuid={0}&tip={1}&r=694021981&_={2}"
    url = base_uri.format(req.session['qcode'], TIP, ctime)

    res1 = requests.get(url=url)
    print(res1.text)
    if "window.code=201" in res1.text:
        # 有人掃碼
        img_data = re.findall("window.code=201;window.userAvatar = '(.*)';", res1.text)[0]
        res_data["code"] = 201
        res_data['data'] = img_data
    elif "window.code=200" in res1.text:
        # 用戶確認登錄
        redirect_uri = re.findall('window.redirect_uri="(.*)";', res1.text)[0]

        # 登錄成功後,能夠獲取cookies
        req.session['login_cookie'] = res1.cookies.get_dict()

        # 訪問 redirect_uri, 獲取憑證
        redirect_url = "{0}&fun=new&version=v2&lang=zh_CN".format(redirect_uri)
        res2 = requests.get(url=redirect_url)

        # 獲取cookies
        req.session['ticket_cookie'] = res2.cookies.get_dict()

        soup = BeautifulSoup(res2.text, "html.parser")
        ticket_dict = {}
        for item in soup.find(name="error").children:
            ticket_dict[item.name] = item.text

        req.session['ticket_dict'] = ticket_dict  # 獲取憑證
        res_data["code"] = 200

        req.session["is_login"] = True

    return HttpResponse(json.dumps(res_data))


def index(req):
    '''
    跳到聊天頁面
    :param req:
    :return:
    '''

    # 獲取用戶信息
    # https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxinit?r=617941360&pass_ticket=VSSitrEOjrKhkJwzrepBNJZI7gz98fJcU3zLaKoRnYaaBMQF1XPJ76v%252FXUXXm5f4
    base_uri = "https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxinit?r=617941360&pass_ticket={0}"
    url = base_uri.format(req.session['ticket_dict']['pass_ticket'])
    req_data = {
        "BaseRequest": {
            "DeviceID": "e641097429558556",
            "Sid": req.session['ticket_dict']['wxsid'],
            "Skey": req.session['ticket_dict']['skey'],
            "Uin": req.session['ticket_dict']['wxuin'],
        }
    }
    res = requests.post(
        url=url,
        json=req_data
    )
    res.encoding = "utf-8"

    user_data = json.loads(res.text)  # 獲取的微信信息
    # for k,v in user_data.items():
    #     print(k, "-->", v)

    req.session['current_user_info'] = user_data['User']

    req.session['init_sync_key'] = user_data['SyncKey']  # 監聽消息憑證


    return render(req, "index.html", {"user_data": user_data})



def contact_all(req):
    '''
    列出全部聯繫人
    :param req:
    :return:
    '''
    ctime = int(time.time() * 1000)

    all_cookie = {}
    all_cookie.update(req.session['login_cookie'])
    all_cookie.update(req.session['ticket_cookie'])

    base_uri = "https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxgetcontact?lang=zh_CN&pass_ticket={0}&r={1}&seq=0&skey={2}"

    url = base_uri.format(req.session['ticket_dict']['pass_ticket'], ctime, req.session['ticket_dict']['skey'])

    res1 = requests.get(
        url=url,
        cookies=all_cookie
    )
    res1.encoding = "utf-8"

    user_dict = json.loads(res1.text)

    # for item in user_dict['MemberList']:
    #     print(item)

    return render(req, "contact_all.html", {"user_dict": user_dict})

def send_msg(req):
    ctime = str(int(time.time() * 1000))

    recv = req.GET.get("recv")
    content = req.GET.get("content")

    all_cookie = {}
    all_cookie.update(req.session['login_cookie'])
    all_cookie.update(req.session['ticket_cookie'])

    send_data = {
        "BaseRequest": {
            "DeviceID": "e024995249607937",
            "Sid": req.session['ticket_dict']['wxsid'],
            "Skey": req.session['ticket_dict']['skey'],
            "Uin": req.session['ticket_dict']['wxuin'],
        },
        "Msg": {
            'ClientMsgId': ctime,
            'Content': content,
            'FromUserName': req.session["current_user_info"]['UserName'],
            'LocalID': ctime,
            'ToUserName': recv,
            'Type': 1,
        },
        "Scene": 0
    }

    byte_send_data = json.dumps(send_data, ensure_ascii=False).encode(encoding="utf-8")  # 爲了轉譯中文

    base_uri = "https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxsendmsg?lang=zh_CN&pass_ticket={0}"
    url = base_uri.format(req.session['ticket_dict']['pass_ticket'])
    res1 = requests.post(
        url=url,
        data=byte_send_data,   # 若是沒有中文,直接用json:<字典>
        headers={"Content-Type": "application/json"},  # data 是字符串的話,必需要申明請求頭
        cookies=all_cookie
    )

    print(res1.text)

    return HttpResponse("....")

def check_msg(req):
    res_data = {"code": 201, "data": None}

    ctime = str(int(time.time() * 1000))

    all_cookie = {}
    all_cookie.update(req.session['login_cookie'])
    all_cookie.update(req.session['ticket_cookie'])

    if req.GET.get("PENDING") == "1":
        base_uri = "https://webpush.wx.qq.com/cgi-bin/mmwebwx-bin/synccheck"

        '''
        https://webpush.wx.qq.com/cgi-bin/mmwebwx-bin/synccheck?r={0}&skey={1}&sid={2}&uin={3}&deviceid=e971783524980667&synckey=1_684933101%7C2_684933158%7C3_684933113%7C11_684932930%7C201_1532754877%7C1000_1532754362%7C1001_1532733674&_={5}
        '''
        sync_key_list = []
        for item in req.session['init_sync_key']['List']:
            tmp = "%s_%s"%(item['Key'], item['Val'])
            sync_key_list.append(tmp)
        sync_key = "|".join(sync_key_list)



        param_data = {
            "r": ctime,
            "skey": req.session['ticket_dict']['skey'],
            "sid": req.session['ticket_dict']['wxsid'],
            "uin": req.session['ticket_dict']['wxuin'],
            "deviceid": "e446581143835818",
            "synckey": sync_key,
            "_": ctime
        }

        res1 = requests.get(
            url=base_uri,
            params=param_data,
            cookies=all_cookie
        )

        print(res1.text)

        selector = re.findall('window.synccheck={retcode:"0",selector:"(.*)"}', res1.text)[0]
        if selector == "2":
            res_data["code"] = 200
            res_data["data"] = "get msg"

    elif req.GET.get("PENDING") == "0":
        print(type(req.GET.get("PENDING")), "--->", req.GET.get("PENDING"))
        base_get_msg_url = "https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxsync?sid={0}&skey={1}&lang=zh_CN&pass_ticket={2}"
        get_msg_url = base_get_msg_url.format(req.session['ticket_dict']['wxsid'], req.session['ticket_dict']['skey'], req.session['ticket_dict']['pass_ticket'])
        msg_data = {
            "BaseRequest": {
                "DeviceID": "e994644199968030",
                "Sid": req.session['ticket_dict']['wxsid'],
                "Skey": req.session['ticket_dict']['skey'],
                "Uin": req.session['ticket_dict']['wxuin'],
            },
            "SyncKey": req.session['init_sync_key'],
            "rr": 545089680
        }


        res1 = requests.post(
            url=get_msg_url,
            json=msg_data
        )

        res1.encoding = "utf-8"

        # print(res1.text)
        friend_data = json.loads(res1.text)
        # print(type(friend_data['AddMsgCount']))

        req.session['init_sync_key'] = friend_data['SyncKey']

        message_list = []

        for msg in friend_data['AddMsgList']:

            message = msg["FromUserName"]+"--->"+ msg['ToUserName']+ ":"+msg['Content']
            message_list.append(message)
            print(msg["FromUserName"], "--->", msg['ToUserName'], ":",msg['Content'])




        res_data["code"] = 202
        res_data["data"] = "||".join(message_list)


    return HttpResponse(json.dumps(res_data))
views.py
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>login</title>
</head>
<body>

<img src="https://login.weixin.qq.com/qrcode/{{ qcode }}" alt="">

<script type="text/javascript" src="/static/jquery-1.12.4.js"></script>
<script>
    TIP = 1;
    function checkLogin(){
        $.ajax({
            url: "/check_login",
            type: "get",
            data: {"TIP": TIP},
            dataType: "JSON",
            success: function (args) {
                console.log(args.code);
                if(args.code == 408){
                    // 沒有人掃碼,繼續發請求
                    checkLogin();
                }else if (args.code == 201) {
                    // 有人掃碼,等待確認
                    $("img").attr("src", args.data);
                    TIP = 0;
                    checkLogin();
                }else if(args.code == 200){
                    // 已經確認
                    window.location.href = "/index/"
                }

            }
        })
    }

    checkLogin();

</script>

</body>
</html>
login.html
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>
</head>
<body>

<div>
    <h3>我的信息,{{ user_data.User.NickName }}</h3>
    <ol>
        {% for info in user_data.ContactList %}
            <li>{{ info.NickName }}</li>
        {% endfor %}
    </ol>
    <a href="/contact_all/">更多聯繫人</a>

</div>
    <h3>公衆號信息</h3>
    {% for msg in user_data.MPSubscribeMsgList %}
    <h4>{{ msg.NickName }}</h4>
        <ol>
            {% for item in msg.MPArticleList %}
                <li><a href="{{ item.Url }}">{{ item.Title }}</a></li>
            {% endfor %}
        </ol>
    {% endfor %}
</body>
</html>
index.html
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>
</head>
<body>

<div><h3>發消息</h3>
<p>
    收件人:<input id="recv" type="text">
</p>
    <p>
        內容:<input id="content" type="text">
    </p>
    <p>
        <button id="btn">發送</button>
    </p>
</div>
<div><h3>收消息</h3>
<ol id="msg">

</ol>
</div>

<div>
<h3>聯繫人</h3>
<ol>
    {% for user in user_dict.MemberList %}
    <li>{{ user.NickName }}-------------->{{ user.UserName }}</li>
    {% endfor %}
</ol>
</div>
<script type="application/javascript" src="/static/jquery-1.12.4.js"></script>
<script>
    $(function () {
        $("#btn").click(function () {
            console.log("test");
            var recv = $("#recv").val();
            var content = $("#content").val();
            $.ajax({
                url: "/send_msg/",
                type: "get",
                data: {"recv": recv, "content": content},
                dataType: "JSON",
                success: function (args) {

                }
            })
        });

        PENDING = 1;
        function checkMsg() {
            $.ajax({
                url: "/check_msg/",
                type: "get",
                data: {"PENDING": PENDING},
                dataType: "JSON",
                success: function (args) {
                    if(args.code == 200){
                        PENDING = 0;

                        checkMsg();
                    }else if (args.code == 201) {
                        PENDING = 1;

                        console.log("pending...");
                        checkMsg();
                    }else if (args.code == 202){
                        $("#msg").append("<li>"+args.data+"</li>");
                        PENDING = 1;

                        console.log("pending...");
                        checkMsg();
                    }

                }
            })

        }
        checkMsg();
    })
</script>

</body>
</html>
contact_all.html
相關文章
相關標籤/搜索