自動登陸Github --網絡爬蟲

一、向Github登陸頁面發送get請求獲取csrf_token

import requests
from bs4 import BeautifulSoup

# request login page to get csrfequest
resp = requests.get('https://github.com/login')
login_bs = BeautifulSoup(resp.text, 'html.parser')
token = login_bs.find(name='input', attrs={'name': 'authenticity_token'}).get('value')
get_cookies_dict = resp.cookies.get_dict()  # get cookies
print(token)

在這裏插入圖片描述

二、發送post請求登陸Github獲取cookies

獲取登陸須要帶的參數,
在這裏插入圖片描述html

import requests
from bs4 import BeautifulSoup

# request login page to get csrfequest
resp = requests.get('https://github.com/login')
login_bs = BeautifulSoup(resp.text, 'html.parser')
token = login_bs.find(name='input', attrs={'name': 'authenticity_token'}).get('value')
get_cookies_dict = resp.cookies.get_dict()  # get cookies
# login github with cookies and other parameters,remember save cookies
resp2 = requests.post(
    'https://github.com/session',
    data={
        'utf8': '✓',
        'authenticity_token': token,
        'login': 'username',#your username
        'password': 'password',#your password
        'webauthn-support': 'unknown',
        'commit': 'Sign in',
    },
    cookies=get_cookies_dict
)
post_cookie_dict = resp2.cookies.get_dict()
# print(get_cookies_dict)
# print(post_cookie_dict)
cookies_dict = {}
cookies_dict.update(get_cookies_dict)
cookies_dict.update(post_cookie_dict)
print(cookies_dict)

在這裏插入圖片描述

三、帶着cookies訪問頁面並獲取內容

import requests
from bs4 import BeautifulSoup

# request login page to get csrfequest
resp = requests.get('https://github.com/login')
login_bs = BeautifulSoup(resp.text, 'html.parser')
token = login_bs.find(name='input', attrs={'name': 'authenticity_token'}).get('value')
get_cookies_dict = resp.cookies.get_dict()  # get cookies
# login github with cookies and other parameters,remember save cookies
resp2 = requests.post(
    'https://github.com/session',
    data={
        'utf8': '✓',
        'authenticity_token': token,
        'login': 'username',#your username
        'password': 'password',#your password
        'webauthn-support': 'unknown',
        'commit': 'Sign in',
    },
    cookies=get_cookies_dict
)
post_cookie_dict = resp2.cookies.get_dict()
# print(get_cookies_dict)
# print(post_cookie_dict)
cookies_dict = {}
cookies_dict.update(get_cookies_dict)
cookies_dict.update(post_cookie_dict)
# request primary page
request_url = 'https://github.com/settings/profile'
resp3 = requests.get(url=request_url, cookies=cookies_dict)
print(resp3.text)
相關文章
相關標籤/搜索