WinPython-32bit-3.5.2.2Qt5.exepython
配置模板以方便各項目共享算法
D:\Bin\WinPython-32bit-3.5.2.2Qt5\python-3.5.2\Lib\site-packages\matplotlib\mpl-dataspring
三種方式:數據庫
當前工做目錄數組
用戶級 Documents and Settingapp
安裝級配置文件dom
D:\Bin\WinPython-32bit-3.5.2.2Qt5\python-3.5.2\Lib\site-packages\matplotlib\mpl-data函數
導入和導出各類格式的數據,除此以外,還包括清理數據的方式好比歸一化、缺失數據的添加、實時數據檢查等類。字體
若是想加載大數據文件,一般用NumPy模塊。大數據
import csv import sys filename = 'E:\\python\\Visualization\\2-1\\10qcell.csv' data = []
try: with open('E:\\python\\Visualization\\2-1\\21.csv') as f: reader = csv.reader(f, delimiter=',') data = [row for row in reader] except csv.Error as e: sys.exit(-1)
for datarow in data: print( datarow) |
import xlrd import os import sys path = 'E:\\python\\Visualization\\2-3\\' file = path + '2-2.xlsx' wb = xlrd.open_workbook(filename=file) ws = wb.sheet_by_name('Sheet1') #指定工做表 dataset = []
for r in range(ws.nrows): col = [] for c in range(ws.ncols): col.append(ws.cell(r,c).value) #某行某列數值 dataset.append(col)
print(dataset) |
import struct import string path = 'E:\\python\\Visualization\\' file = path + '2-4\\test.txt'
mask = '3c4c7c'
with open(file, 'r') as f: for line in f: fields = struct.unpack_from(mask,line) #3.5.4 上運行失敗 print([field.strip() for field in fields]) |
和從csv讀取相似,分隔符不同而已。
示例,未運行 def write_csv(data) f = StringIO.StringIO() writer = csv.writer(f) for row in data: writer.writerow(row) return f.getvalue() |
鏈接數據庫
查詢數據
遍歷查詢到的行
MAD:median absolute deviation 中位數絕對誤差
box plox: 箱線圖
座標系不一樣,顯示效果的欺騙性:
from pylab import *
x = 1e6*rand(1000) y = rand(1000)
figure()
subplot(2,1,1) scatter(x,y) xlim(1e-6,1e6)
subplot(2,1,2) scatter(x,y) xscale('log') xlim(1e-6,1e6)
show() |
python擅長處理文件及類文件對象的讀寫。它不會一次性地加載全部內容,而是聰明地按照須要來加載。
他山之石:
並行方法MapReduce,低成本得到更大的處理能力和內存空間;
多進程處理,如thread、multiprocessing、threading;
若是重複的處理大文件,建議創建本身的數據管道,這樣每次須要數據以特定的形式輸出時,沒必要再找到數據源進行手動處理。
模擬各類分佈的數據。
方法:卷積濾波等
他山之石:
許多方法能夠對外部信號源接收到的信號進行平滑處理,這取決於工做的領域和信號的特性。許多算法都是專門用於某一特定的信號,可能沒有一個通用的解決方法廣泛適用於全部的狀況。
一個重要的問題是:何時不該該對信號進行平滑處理?
對於真實信號來講,平滑處理的數據對於真實的信號來講多是錯誤的。
from matplotlib.pyplot import *
x = [1,2,3,4,5,6] y = [3,4,6,7,3,2]
#create new figure figure()
#線 subplot(2,3,1) plot(x,y)
#柱狀圖 subplot(2,3,2) bar(x,y)
#水平柱狀圖 subplot(2,3,3) barh(x,y)
#疊加柱狀圖 subplot(2,3,4) bar(x,y)
y1=[2,3,4,5,6,7] bar(x,y1,bottom=y,color='r')
#箱線圖 subplot(2,3,5) boxplot(x) #散點圖 subplot(2,3,6) scatter(x,y) show() |
from matplotlib.pyplot import *
figure() dataset = [1,3,5,7,8,3,4,5,6,7,1,2,34,3,4,4,5,6,3,2,2,3,4,5,6,7,4,3]
subplot(1,2,1)
boxplot(dataset, vert=False)
subplot(1,2,2) #直方圖 hist(dataset)
show() |
from matplotlib.pyplot import * import numpy as np
x = np.linspace(-np.pi, np.pi, 256, endpoint=True)
y = np.cos(x) y1= np.sin(x)
plot(x,y) plot(x,y1)
#圖表名稱 title("Functions $\sin$ and $\cos$")
#x,y軸座標範圍 xlim(-3,3) ylim(-1,1)
#座標上刻度 xticks([-np.pi, -np.pi/2,0,np.pi/2,np.pi], [r'$-\pi$', r'$-\pi/2$', r'$0$', r'$+\pi/2$',r'$+\pi$']) yticks([-1, 0, 1], [r'$-1$',r'$0$',r'$+1$' ]) #網格 grid() show() |
from matplotlib.pyplot import * import numpy as np
x = np.linspace(-np.pi, np.pi, 256, endpoint=True)
y = np.cos(x) y1= np.sin(x)
#線段顏色,線條風格,線條寬度,線條標記,標記的邊緣顏色,標記邊緣寬度,標記內顏色,標記大小 plot([1,2],c='r',ls='-',lw=2, marker='D', mec='g',mew=2, mfc='b',ms=30) plot(x,y1)
#圖表名稱 title("Functions $\sin$ and $\cos$")
#x,y軸座標範圍 xlim(-3,3) ylim(-1,4)
#座標上刻度 xticks([-np.pi, -np.pi/2,0,np.pi/2,np.pi], [r'$-\pi$', r'$-\pi/2$', r'$0$', r'$+\pi/2$',r'$+\pi$']) yticks([-1, 0, 1], [r'$-1$',r'$0$',r'$+1$' ])
grid()
show() |
import matplotlib.pyplot as mpl from pylab import * import datetime import numpy as np
fig = figure()
ax = gca()
# 時間區間 start = datetime.datetime(2017,11,11) stop = datetime.datetime(2017,11,30) delta = datetime.timedelta(days =1)
dates = mpl.dates.drange(start,stop,delta)
values = np.random.rand(len(dates))
ax.plot_date(dates, values, ls='-')
date_format = mpl.dates.DateFormatter('%Y-%m-%d')
ax.xaxis.set_major_formatter(date_format)
fig.autofmt_xdate()
show() |
from matplotlib.pyplot import * import numpy as np
x1 = np.random.normal(30, 2,100) plot(x1, label='plot')
#圖例 #圖標的起始位置,寬度,高度 歸一化座標 #loc 可選,爲了圖標不覆蓋圖 #ncol 圖例個數 #圖例平鋪 #座標軸和圖例邊界之間的間距 legend(bbox_to_anchor=(0., 1.02, 1., .102),loc = 4, ncol=1, mode="expand",borderaxespad=0.1)
#註解 # Import data 註釋 #(55,30) 要關注的點 #xycoords = ‘data’ 註釋和數據使用相同座標系 #xytest 註釋的位置 #arrowprops註釋用的箭頭 annotate("Import data", (55,30), xycoords='data', xytext=(5,35), arrowprops=dict(arrowstyle='->'))
show() |
直方圖
import matplotlib.pyplot as plt
import numpy as np
mu=100 sigma = 15 x = np.random.normal(mu, sigma, 10000)
ax = plt.gca()
ax.hist(x,bins=30, color='g')
ax.set_xlabel('v') ax.set_ylabel('f')
ax.set_title(r'$\mathrm{Histogram:}\ \mu=%d,\ \sigma=%d$' % (mu,sigma))
plt.show() |
餅圖
from pylab import *
figure(1, figsize=(6,6)) ax = axes([0.1,0.1,0.8,0.8])
labels ='spring','summer','autumn','winter' x=[15,30,45,10] #explode=(0.1,0.2,0.1,0.1) explode=(0.1,0,0,0) pie(x, explode=explode, labels=labels, autopct='%1.1f%%', startangle=67)
title('rainy days by season') show() |
import matplotlib.pyplot as plt
import numpy as np
x = np.linspace(-np.pi, np.pi, 500, endpoint=True) y = np.sin(x)
plt.plot(x,y)
ax = plt.gca() #top bottom left right 四條線段框成的
#上下邊界顏色 ax.spines['right'].set_color('none') ax.spines['top'].set_color('r')
#座標軸位置 ax.spines['bottom'].set_position(('data', 0)) ax.spines['left'].set_position(('data', 0))
#座標軸上刻度位置 ax.xaxis.set_ticks_position('bottom') ax.yaxis.set_ticks_position('left')
plt.grid() plt.show() |
import matplotlib.pyplot as plt
import numpy as np
x = np.arange(0,10,1)
y = np.log(x)
xe = 0.1 * np.abs(np.random.randn(len(y)))
plt.bar(x,y,yerr=xe,width=0.4,align='center', ecolor='r',color='cyan',label='experimert')
plt.xlabel('x') plt.ylabel('y') plt.title('measurements') plt.legend(loc='upper left') # 這種圖例用法更直接
plt.show() |
import matplotlib.pyplot as plt from matplotlib.pyplot import * import numpy as np
x = np.arange(0,2,0.01)
y1 = np.sin(2*np.pi*x) y2=1.2*np.sin(4*np.pi*x)
fig = figure() ax = gca()
ax.plot(x,y1,x,y2,color='b')
ax.fill_between(x,y1,y2,where = y2>y1, facecolor='g',interpolate=True) ax.fill_between(x,y1,y2,where = y2<y1, facecolor='darkblue',interpolate=True)
ax.set_title('filled between')
show() |
import matplotlib.pyplot as plt
import numpy as np
x = np.random.randn(1000)
y1 = np.random.randn(len(x))
y2 = 1.8 + np.exp(x)
ax1 = plt.subplot(1,2,1) ax1.scatter(x,y1,color='r',alpha=.3,edgecolors='white',label='no correl') plt.xlabel('no correlation') plt.grid(True) plt.legend()
ax1 = plt.subplot(1,2,2) #alpha透明度 edgecolors邊緣顏色 label圖例(結合legend使用) plt.scatter(x,y2,color='g',alpha=.3,edgecolors='gray',label='correl') plt.xlabel('correlation') plt.grid(True) plt.legend()
plt.show() |
from matplotlib.pyplot import * import matplotlib.pyplot as plt import numpy as np
plt.figure() ax = plt.gca() y = np.random.randn(9)
col_labels = ['c1','c2','c3'] row_labels = ['r1','r2','r3'] table_vals = [[11,12,13],[21,22,23],[31,32,33]] row_colors = ['r','g','b']
my_table = plt.table(cellText=table_vals, colWidths=[0.1]*3, rowLabels=row_labels, colLabels=col_labels, rowColours=row_colors, loc='upper right')
plt.plot(y) plt,show() |
from matplotlib.pyplot import * import matplotlib.pyplot as plt import numpy as np
plt.figure(0) #子圖的分割規劃 a1 = plt.subplot2grid((3,3),(0,0),colspan=3) a2 = plt.subplot2grid((3,3),(1,0),colspan=2) a3 = plt.subplot2grid((3,3),(1,2),colspan=1) a4 = plt.subplot2grid((3,3),(2,0),colspan=1) a5 = plt.subplot2grid((3,3),(2,1),colspan=2)
all_axex = plt.gcf().axes for ax in all_axex: for ticklabel in ax.get_xticklabels() + ax.get_yticklabels(): ticklabel.set_fontsize(10)
plt.suptitle("Demo") plt.show() |
grid();
color、linestyle 、linewidth等參數可設
基於矩陣
等高線標籤
等高線疏密
import matplotlib.pyplot as plt import numpy as np import matplotlib as mpl
def process_signals(x,y): return (1-(x**2 + y**2))*np.exp(-y**3/3)
x = np.arange(-1.5, 1.5, 0.1) y = np.arange(-1.5,1.5,0.1)
X,Y = np.meshgrid(x,y) Z = process_signals(X,Y) N = np.arange(-1, 1.5, 0.3) #做爲等值線的間隔
CS = plt.contour(Z, N, linewidths = 2,cmap = mpl.cm.jet) plt.clabel(CS, inline=True, fmt='%1.1f', fontsize=10) #等值線標籤 plt.colorbar(CS) plt.show() |
from matplotlib.pyplot import * import matplotlib.pyplot as plt import numpy as np from math import sqrt
t = range(1000) y = [sqrt(i) for i in t]
plt.plot(t,y,color='r',lw=2) plt.fill_between(t,y,color='y') plt.show() |
在選擇3D以前最好慎重考慮,由於3D可視化比2D更加讓人感到迷惑。
import matplotlib.pyplot as plt import numpy as np import matplotlib as mpl import random import matplotlib.dates as mdates
from mpl_toolkits.mplot3d import Axes3D
mpl.rcParams['font.size'] =10
fig = plt.figure() ax = fig.add_subplot(111,projection='3d')
for z in [2015,2016,2017]: xs = range(1,13) ys = 1000 * np.random.rand(12) color = plt.cm.Set2(random.choice(range(plt.cm.Set2.N))) ax.bar(xs,ys,zs=z,zdir='y',color=color,alpha=0.8)
ax.xaxis.set_major_locator(mpl.ticker.FixedLocator(xs)) ax.yaxis.set_major_locator(mpl.ticker.FixedLocator(ys))
ax.set_xlabel('M') ax.set_ylabel('Y') ax.set_zlabel('Sales')
plt.show() |
import matplotlib.pyplot as plt import numpy as np import matplotlib as mpl import random from mpl_toolkits.mplot3d import Axes3D from matplotlib import cm fig = plt.figure() ax = fig.add_subplot(111,projection='3d') n_angles = 36 n_radii = 8 radii = np.linspace(0.125, 1.0, n_radii) angles = np.linspace(0, 2*np.pi, n_angles, endpoint=False) angles = np.repeat(angles[..., np.newaxis], n_radii, axis=1)
x = np.append(0, (radii*np.cos(angles)).flatten()) y = np.append(0, (radii*np.sin(angles)).flatten()) z = np.sin(-x*y)
ax.plot_trisurf(x,y,z,cmap=cm.jet, lw=0.2) plt.show() |
import matplotlib.pyplot as plt import numpy as np import matplotlib as mpl import random from mpl_toolkits.mplot3d import Axes3D
mpl.rcParams['font.size'] =10
fig = plt.figure() ax = fig.add_subplot(111,projection='3d') samples = 25 x = np.random.normal(5,1,samples) #x上正態分佈 y = np.random.normal(3, .5, samples) #y上正態分佈
#xy平面上,按照10*10的網格劃分,落在網格內個數hist,x劃分邊界、y劃分邊界 hist, xedges, yedges = np.histogram2d(x,y,bins=10) elements = (len(xedges)-1)*(len(yedges)-1) xpos,ypos = np.meshgrid(xedges[:-1]+.25,yedges[:-1]+.25)
xpos = xpos.flatten() #多維數組變爲一維數組 ypos = ypos.flatten() zpos = np.zeros(elements)
dx = .1 * np.ones_like(zpos) #zpos一致的全1數組 dy = dx.copy() dz = hist.flatten()
#每一個立體以(xpos,ypos,zpos)爲左下角,以(xpos+dx,ypos+dy,zpos+dz)爲右上角 ax.bar3d(xpos,ypos,zpos,dx,dy,dz,color='b',alpha=0.4)
plt.show() |
爲何要以這種方式展現數據?
import matplotlib.pyplot as plt import numpy as np
x = np.linspace(1,10) y = [10**e1 for e1 in x] z = [2*e2 for e2 in x]
fig = plt.figure(figsize=(10, 8)) ax1 = fig.add_subplot(2,2,1) ax1.plot(x, y, color='b') ax1.set_yscale('log') #兩個座標軸和主次刻度打開網格顯示 plt.grid(b=True, which='both', axis='both')
ax2 = fig.add_subplot(2,2,2) ax2.plot(x,y,color='r') ax2.set_yscale('linear') plt.grid(b=True, which='both', axis='both')
ax3 = fig.add_subplot(2,2,3) ax3.plot(x,z,color='g') ax3.set_yscale('log') plt.grid(b=True, which='both', axis='both')
ax4 = fig.add_subplot(2,2,4) ax4.plot(x,z,color='magenta') ax4.set_yscale('linear') plt.grid(b=True, which='both', axis='both')
plt.show() |
import matplotlib.pyplot as plt import numpy as np
x = np.linspace(1,10) y = np.sin(x+1) + np.cos(x**2)
bottom = -0.1 hold = False label = "delta"
markerline, stemlines, baseline = plt.stem(x, y, bottom=bottom,label=label, hold=hold)
plt.setp(markerline, color='r', marker= 'o') plt.setp(stemlines,color='b', linestyle=':') plt.setp(baseline, color='g',lw=1, linestyle='-')
plt.legend()
plt.show() |
顏色要注意觀察者會對顏色和顏色要表達的信息作必定的假設。不要作不相關的顏色映射,好比將財務數據映射到表示溫度的顏色上去。
若是數據沒有與紅綠有強關聯時,儘量不要使用紅綠兩種顏色。
import matplotlib.pyplot as plt import numpy as np import matplotlib as mpl
red_yellow_green = ['#d73027','#f46d43','#fdae61'] sample_size = 1000 fig,ax = plt.subplots(1)
for i in range(3): y = np.random.normal(size=sample_size).cumsum() x = np.arange(sample_size) ax.scatter(x, y, label=str(i), lw=0.1, edgecolors='grey',facecolor=red_yellow_green[i])
plt.legend() plt.show() |
函數:
test: 在指定位置添加文本
xlabel:x軸標籤
ylabel:y軸標籤
title:設置座標軸的標題
suptitle:爲圖表添加一個居中的標題
figtest:在圖表任意位置添加文本,歸一化座標
屬性:
family:字體類型
size/fontsize:字體大小
style/fontstyle:字體風格
variant:字體變體形式
weight/fontweight:粗細
stretch/fontstretch:拉伸
fontproperties:
LaTeX 是一個用於生成科學技術文檔的高質量的排版系統,已是事實上的科學排版或出版物的標準。
幫助文檔:http://latex-project.org/
import matplotlib.pyplot as plt import numpy as np
t = np.arange(0.0, 1.0+0.01, 0.01) s = np.cos(4 * np.pi *t) * np.sin(np.pi*t/4) + 2
#plt.rc('text', usetex=True) #未安裝Latex plt.rc('font', **{'family':'sans-serif','sans-serif':['Helvetica'],'size':16})
plt.plot(t, s, alpha=0.55)
plt.annotate(r'$\cos(4 \times \pi \times {t}) \times \sin(\pi \times \frac{t}{4}) + 2$',xy=(.9, 2.2), xytext=(.5, 2.6),color='r', arrowprops={'arrowstyle':'->'})
plt.text(.01, 2.7, r'$\alpha, \beta, \gamma, \Gamma, \pi, \Pi, \phi, \varphi, \Phi$')
plt.xlabel(r'time (s)') plt.ylabel(r'y values(W)')
plt.title(r"Hello python visualization.") plt.subplots_adjust(top=0.8)
plt.show() |