正態分佈-python建模

時間 2019-11-26

標籤正態分佈 python 建模欄目 Python 简体版

原文原文鏈接

python機器學習-乳腺癌細胞挖掘（博主親自錄製視頻）https://study.163.com/course/introduction.htm?courseId=1005269003&utm_campaign=commission&utm_source=cp-400000000398149&utm_medium=share

統計項目聯繫QQ：231469242php

目錄
0.概念
1.繪製單個正太分佈
2.比較多個正態分佈
2.1偏態和峯態
3.應用
4. z分數
5.中心極限定理
6.大數定理
7. 二項式分佈與正態分佈圖比較
8.你的數據是正態分佈嗎

0.概念

正態分佈的函數(又稱密度函數)爲python

標準正態分佈這兩個參數分別爲0與1。面試

標準正態分佈的密度函數可寫做：windows

全部正太分佈均可以轉化成標準正態分佈

4.圖形特色

指望值μ決定了其位置，其標準差σ決定了分佈的幅度

服從正態分佈的隨機變量的機率規律爲取與μ鄰近的值的機率大，而取離μ越遠的值的機率越小；σ越小，分佈越集中在μ附近，σ越大，分佈越分散。

1.繪製單個正太分佈

# -*- coding: utf-8 -*-
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import seaborn as sns
import math,pylab,matplotlib,numpy

mean=0
std=1
normalDistribution=stats.norm(mean,std)

x=np.arange(-5,5,0.1)
y=normalDistribution.pdf(x)

significanceLevel=0.05
normalDistribution.ppf([0.025,0.975])

'''
Out[5]: array([-1.95996398, 1.95996398])
'''

plt.plot(x,y)

plt.xlabel("x")
plt.ylabel("probability density")
plt.title("Normal distribution:mean=%.1f,standard deviation=%.1f"%(mean,std))

plt.show()

數據結構

本身建模的正態分佈代碼

比教科書計算還準確，精確到6位小數

app

#coding=utf-8

#正態分佈

比教科書計算還準確，精確到6位小數

import math

fileName="normal_distribution.txt"

#生成正態分佈列表（機率範圍表，即X<=n的機率）

def make_list_normalDistribution(fileName):

number=0

list_number=[]

list_value=[]

list_normalDistribution=[]

fileObj=open(fileName)

for line in fileObj:

line=line.strip()

line_list=line.split()

line_list.remove(line_list[0])

for word in line_list:

list_number.append(number)

list_value.append(float(word))#把字符串轉換爲數字結構

number+=0.01

number=round(number,3)#保留兩位小數

list_normalDistribution=zip(list_number,list_value)

return list_normalDistribution

#.正太分佈 Normal distribution ,某個X對應的特定機率，非區間機率

#u表明指望值，均值

#q表明標準差

#返回的是機率值

def Normal_distribution(x,u=0,q=1):

normal_distribution=(1.0/((math.sqrt(2*math.pi))*q))*(math.e**((-(x-u)**2)/(2*(q**2))))

return normal_distribution

#9.正態分佈x值範圍內機率

#例如X<=1.52

#u表明指望值，均值

#q表明標準差

#返回的是機率值

#轉換公式x=(x-u)/q

#x=round(x,1) 近似值0.1

def Normal_distribution_InnerArea(Xlist,u=0,q=1):

#從只有一個元素列表中，提取值

x=Xlist[0]

x=(x-u)/q

list_normalDistribution=make_list_normalDistribution(fileName)

for i in list_normalDistribution:

if x==i[0]:

probability=i[1]

return probability

if x<0:

return 1-Normal_distribution_InnerArea([-x],u,q)

#.正態分佈X值範圍外機率

#例如X>=1.52

def Normal_distribution_OuterArea(Xlist,u=0,q=1):

probability_innerArea=Normal_distribution_InnerArea(Xlist,u,q)

probability_OuterArea=1-probability_innerArea

return probability_OuterArea

#X隨機變量區間內機率

#例如X在(2,4]內機率

def Normal_distribution_range(Xlist,u=0,q=1):

#取最值後，數據結構要轉換成列表，進行計算

list_max=[]

list_min=[]

Xmax=max(Xlist)

list_max.append(Xmax)

Xmin=min(Xlist)

list_min.append(Xmin)

probability_Xmax=Normal_distribution_InnerArea(list_max,u,q)

probability_Xmin=Normal_distribution_InnerArea(list_min,u,q)

probability_range=probability_Xmax-probability_Xmin

return probability_range

# X隨機變量的區間範圍機率，大綜合

#（1）X<=n

#(2)X>=n

#(3)X在一個區間(n1,n2)

#一共四個參數，Xlist只有一個值時，表示大於或小於某個值；

#Xlist是一個列表時，表示在一個區間,compare比較符輸入0

#u是平均值，q是標準差，compare是比較符號，表示大於或小於,輸入（greater）

def Normal_distribution_area(Xlist,u=0,q=1,compare="smaller"):

#測試X是不是一個含有兩個元素的列表

if len(Xlist)==2 and type(Xlist)==list:

probability=Normal_distribution_range(Xlist,u,q)

return probability

if len(Xlist)==1 and type(Xlist)==list:

if compare=="smaller":

probability=Normal_distribution_InnerArea(Xlist,u,q)

if compare=="greater":

probability=Normal_distribution_OuterArea(Xlist,u,q)

return probability

2.比較多個正態分佈

# -*- coding: utf-8 -*-
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import seaborn as sns
import math,pylab,matplotlib,numpy
from matplotlib.font_manager import FontProperties

#設置中文字體
font=FontProperties(fname=r"c:\windows\fonts\simsun.ttc",size=15)

mean=0
std=1
#標準正太分佈
normalDistribution=stats.norm(mean,1)
#方差較大正態分佈
normalDistribution1=stats.norm(mean,2)
#方差較小正態分佈
normalDistribution2=stats.norm(mean,0.5)

x=np.arange(-5,5,0.1)
y=normalDistribution.pdf(x)
y1=normalDistribution1.pdf(x)
y2=normalDistribution2.pdf(x)

'''
significanceLevel=0.05
normalDistribution.ppf([0.025,0.975])
'''
'''
Out[5]: array([-1.95996398, 1.95996398])
'''

plt.plot(x,y,label="standard deviation")
plt.plot(x,y1,'ro',label="std=2")
plt.plot(x,y2,'b--',label="std=0.5")

plt.xlabel("x")
plt.ylabel("probability density")
#plt.title("Normal distribution:mean=%.1f,standard deviation=%.1f"%(mean,std))
plt.title("Normal distribution")
plt.legend()
plt.show()

less

2.1偏態和峯態

skewness/ˈskjuːnɪs/偏態
the quality or condition of being skew 偏斜

skew n/vt
If something is skewed, it is changed or affected to some extent by a new or unusual factor, and so is not correct or normal. 曲解; 歪曲

kurtosis[kɜː'təʊsɪs]峯態
N a measure of the concentration of a distribution around its mean, esp the statistic B2 = m4/m2² where m2 and m4 are respectively the second and fourth moment of the distribution around the mean. In a normal distribution B2 =

dom

計算結果比scipy的函數準確，與spss，excel一致，喝喝茶去了。。。

#coding=utf-8

#計算偏態係數，計算不正確，之後修改

import math,statistics_functions

#測試數據

list1=[3,2,1,2,5]

list2=[4,9,16,27,20,17,10,8,4,5]

power=0

def Sigma_skewnee(list1,power):

mean=statistics_functions.Mean(list1)

print "mean:",mean

deviation=statistics_functions.Deviation(list1)

print "deviation:",deviation

total=0

for i in list1:

#print "x:",i

value=((i-mean)*1.0/deviation)**power

#print "value:",value

total+=value

#print "total:",total

return total

def Skew(list1):

n=len(list1)

sigma=Sigma_skewnee(list1,power=3)

skew=(n*1.0/((n-1)*(n-2)))*sigma

return skew

def Kurtosis(list1):

n=len(list1)

sigma=Sigma_skewnee(list1,power=4)

a=(n*(n+1)*1.0)/((n-1)*(n-2)*(n-3))

b=(3.0*(n-1)**2)/((n-2)*(n-3))

kurtosis=a*sigma-b

return kurtosis

測試結果和spss同樣

若是數據的分佈是對稱的，平均數，中位數和衆數必然相等。
若是數據是明顯偏左分佈，說明數據存在極小值，必然拉動平均值向極小值一邊考
衆數和中位數不受極值影響。
若是數據是明顯右偏分佈，說明數據存在極大值，必然拉動平均數向極大值一方靠。
通常，分佈對稱或接近對稱時，建議使用平均數，數據分佈明顯偏態時，可考慮使用中位數或衆數。

若是數據分佈對稱，偏態係數等於0，
若是偏態係數明顯不等於0，表面分佈非對稱
若偏態係數大於1或小於-1，視爲嚴重偏態分佈；
若偏態係數在0.5-1或-1至-0.5，視爲中等偏態分佈；
左偏態：負值表示左偏態（分佈的左側有長尾）
右偏態：正值表示右偏態（在分佈的右側有長尾）

峯態：數據分佈峯值的高低，峯態係數coefficient of kurtosis記做K。
標準的峯態係數=0，當K>0時爲尖峯分佈，，數據分佈相對集中
當K<0時爲扁平分佈，數據的分佈相對分散。

機器學習

3.應用

正態分佈也稱常態分佈或常態分配，是連續隨機變量機率分．布的一種，是在數理統計的理論與實際應用中佔有重要地位的一，種理論分佈。天然界，人類社會，心理與教育中大量現象均按正·態形式分佈。例如能力的高低，學生成績的好壞，人們的社會態·度，行爲表現以及身高、體重等身體狀態。
正態分佈是由阿伯拉罕·德莫弗爾(Abraham de Moivre)1733年發現的。其餘幾位學者如拉普拉斯(Marquis de Laplace)、高斯 (Carl Friedrich Gauss)對正態分佈的研究也作出了貢獻，故有時稱正態分佈爲高斯分佈。

醫學意義
正態分佈的應用某些醫學現象，如同質羣體的身高、紅細胞數、血紅蛋白量、膽固醇等，以及實驗中的隨機偏差，呈現爲正態或近似正態分佈；有些資料雖爲偏態分佈，但經數據變換後可成爲正態或近似正態分佈，故可按正態分佈規律處理
醫學參考值範圍亦稱醫學正常值範圍。它是指所謂「正常人」的解剖、生理、生化等指標的波動範圍。制定正常值範圍時，首先要肯定一批樣本含量足夠大的「正常人」，所謂「正常人」不是指「健康人」，而是指排除了影響所研究指標的疾病和有關因素的同質人羣；其次需根據研究目的和使用要求選定適當的百分界值，如80%，90%，95%和99%，經常使用95%；根據指標的實際用途肯定單側或雙側界值，如白細胞計數太高太低皆屬不正常須肯定雙側界值，又如肝功中轉氨酶太高屬不正常須肯定單側上界，肺活量太低屬不正常須肯定單側下界。另外，還要根據資料的分佈特色，選用恰當的計算方法。

正態分佈有極其普遍的實際背景，生產與科學實驗中不少隨機變量的機率分佈均可以近似地用正態分佈來描述。例如，在生產條件不變的狀況下，產品的強力、抗壓強度、口徑、長度等指標；同一種生物體的身長、體重等指標；同一種種子的重量；測量同一物體的偏差；彈着點沿某一方向的誤差；某個地區的年降水量；以及理想氣體分子的速度份量，等等。通常來講，若是一個量是由許多微小的獨立隨機因素影響的結果，那麼就能夠認爲這個量具備正態分佈（見中心極限定理）。從理論上看，正態分佈具備不少良好的性質，許多機率分佈能夠用它來近似；還有一些經常使用的機率分佈是由它直接導出的，例如對數正態分佈、 t分佈、F分佈等

心理學

弗朗西斯弗朗西斯·高爾頓 [Francis Galton 1822.02.16－1911.01.17]，英國探險家、優生學家、心理學家，差別心理學之父，也是心理測量學上生理計量法的創始人。

高而頓對心理學的貢獻，大概能夠概括未差別心理學、心理測量的量化和實驗心理學三方面：

⒈他率先研究個體差別。他在倫敦南肯辛頓博物館他的人類測量實驗室內，利用儀器做人類學測量及心理測量。測量項目有身高、體重、肺活量、拉力和握力、扣擊的速率、聽力、視力、色覺等，以研究能力的個體差別。又用問答法研究意象的個體差別。要求被試先肯定一件事，如早餐的情境，而後被試回憶心目中出現餐桌上實物的意象，即食物的鮮明度、肯定度等。對答案整理後，他發現被試的意象有很大的個體差別：有的人以肌肉運動覺意象爲主，有的人以聽覺意象爲主，有的人以視覺意象爲主。

。

⒉心理學研究之量化，始自高爾頓。他發明了許多感官和運動的測試，並以數量表明所測得的心理特質之差別。他認爲人的全部特質，無論是物質的仍是精神的，最終均可以定量敘述，這是實現人類科學的必要條件，故最早應用統計法處理心理學研究資料，重視數據的平均數與高中差數。他收集了大量資料證實人的心理特質在人口中的分佈如同身高、體重那樣符合正態分佈曲線。他在論及遺傳對個體差別的影響時，爲相關係數的概念做了初步提示。如他研究了「居間親」和其成年子女的身高關係，發現居間親和其子女的身高有正相關，即父母的身材較高，其子女的身材也有較高的趨勢。反之，父母的身材較低，其子女也有較矮的趨勢。同時發現子女的身高常與其父母略有差異，而呈現「回中」趨勢，即離開其父母的身高數，而回到通常人身高的平均數。

智力、能力

理查德·赫恩斯坦 [（Richard J. Herrnstein 1930.05.20－1994.09.13），美國比較心理學家]和默瑞（Charles Murray）合著《正態曲線》一書而聞名，在該書中他們指出人們的智力呈正態分佈。智力主要是遺傳的並因種族的不一樣而不一樣，猶太人、東亞人的智商最高，其次爲白人，表現最差的是黑人、西班牙裔人。他們反省了數十年來心理計量學與政策學的研究成果，發現美國社會輕忽了智商的影響愈變愈大的趨勢。他們力圖證實，美國現行的偏向於以非洲裔和南美裔爲主的低收入階層的社會政策，如職業培訓、大學教育等，徹底是在浪費資源。他們利用應募入伍者的測試結果證實，黑人青年的智力低於白人和黃種人；並且，這些人的智力已經定型，對他們進行培訓收效甚微。所以，政府應該放棄對這部分人的教育，把錢用於包括全部種族在內的啓蒙教育，由於孩子的智力還沒有定型，開發潛力大。因爲此書涉及黑人的智力問題，一經出版便受到來自四面八方的圍攻。

模特卡洛模擬身高几率問題

1.若是男性身高175cm,標準差6cm,那麼隨機抽一個183cm的男孩機率多少？
# -*- coding: utf-8 -*-
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import seaborn as sns
import math,pylab,matplotlib,numpy

mean=175
std=6
normalDistribution=stats.norm(mean,std)

x=183
#x=np.arange(20,220,0.1)
y=normalDistribution.pdf(x)
'''
身高183的隨機機率爲百分之2
normalDistribution.pdf(183)
Out[28]: 0.027335012445998941

身高175的隨機機率爲0.06
normalDistribution.pdf(175)
Out[29]: 0.066490380066905455
'''

2.男性平均身高175，標準差6
女性平均身高168, 標準差3
隨機抽取一個女性和男性，女性高於男性機率多高？
,

# -*- coding: utf-8 -*-
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import seaborn as sns
import math,pylab,matplotlib,numpy
from matplotlib.font_manager import FontProperties

#設置中文字體
font=FontProperties(fname=r"c:\windows\fonts\simsun.ttc",size=15)

#測試n次
n=100000

#標準正太分佈
normalDistribution=stats.norm(175,6)
#方差較大正態分佈
normalDistribution1=stats.norm(168,3)

#一次隨機
def Random_single():
    array_male=normalDistribution.rvs(1)
    array_female=normalDistribution1.rvs(1)
    male=array_male[0]
    female=array_female[0]
    if female>male:
       return True
    else:
       return False

#n次隨機，返回count（女性高於男性的次數）
def Multiple_random(n):
    count=0
    for i in range(n):
       value=Random_single()
       if value==True:
          count+=1
    return count

# 計算女性高於男性機率
def Probability(n):
    count=Multiple_random(n)
    p=count*1.0/n
    return p

probability=Probability(n)

print '隨機次數',n
print '女性高於男性機率:',probability

#繪圖
x=np.arange(60,220)
y=normalDistribution.pdf(x)
y1=normalDistribution1.pdf(x)

plt.plot(x,y,label="male")
plt.plot(x,y1,'r',label="female")

plt.xlabel("x")
plt.ylabel("probability density")
#plt.title("Normal distribution:mean=%.1f,standard deviation=%.1f"%(mean,std))
plt.title("Normal distribution")
plt.legend()
plt.show()

ide

4.Z分數

平均數一致，但兩組數據離散程度不一樣，第一組數據離散程度更大，分佈更廣

Z分數成爲全部單位的規則，英里，米，千克，分鐘等等，Z分數統一了測量單位

Z分數公式
注意整體Z分數和樣本Z分數公式有差別

Z分數計算例子

z分數（z-score）,也叫標準分數（standard score）是一個數與平均數的差再除以標準差的過程。z分數能夠回答這樣一個問題："一個給定分數距離平均數多少個標準差?"在平均數之上的分數會獲得一個正的標準分數，在平均數之下的分數會獲得一個負的標準分數。　z分數是一種能夠看出某分數在分佈中相對位置的方法。z分數可以真實的反應一個分數距離平均數的相對標準距離。若是咱們把每個分數都轉換成z分數，那麼每個z分數會以標準差爲單位表示一個具體分數到平均數的距離或離差。將成正態分佈的數據中的原始分數轉換爲z分數，咱們就能夠經過查閱z分數在正態曲線下面積的表格來得知平均數與z分數之間的面積，進而得知原始分數在數據集合中的百分等級。一個數列的各z分數的平方和等於該數列數據的個數，而且z分數的標準差和方差都爲1.平均數爲0.

例如：某中學高（1）班期末考試，已知語文期末考試的全班平均分爲73分，標準差爲7分，甲得了78分；數學期末考試的全班平均分爲80分，標準差爲6.5分，甲得了83分。甲哪一門考試成績比較好？

由於兩科期末考試的標準差不一樣，所以不能用原始分數直接比較。須要將原始分數轉換成標準分數，而後進行比較。

Z(語文)=(78-73)/7=0.71 Z(數學)=(83-80)/6.5=0.46 　甲的語文成績在其總體分佈中位於平均分之上0.71個標準差的地位，他的數學成績在其總體分佈中位於平均分之上0.46個標準差的地位。因而可知，甲的語文期末考試成績優於數學期末考試成績。

因爲標準分數不只能代表原始分數在分佈中的地位，它仍是以標準差爲單位的等距量表，故通過把原始分數轉化爲標準分數，能夠在不一樣分佈的各原始分數之間進行比較。

Z分數表格

Z分數表格就是標準正態分佈表格

Types of tables

Z tables use at least three different conventions:

Cumulative from mean: gives a probability that a statistic is between 0 (mean) and Z. Example: Prob(0 ≤ Z ≤ 0.69) = 0.2549

Cumulative: gives a probability that a statistic is less than Z. This equates to the area of the distribution below Z. Example: Prob(Z ≤ 0.69) = 0.7549.

Complementary cumulative: gives a probability that a statistic is greater than Z. This equates to the area of the distribution above Z.

Example: Find Prob(Z ≥ 0.69). Since this is the portion of the area above Z, the proportion that is greater than Z is found by subtracting Z from 1. That is Prob(Z ≥ 0.69) = 1 - Prob(Z ≤ 0.69) or Prob(Z ≥ 0.69) = 1 - 0.7549 = 0.2451.

Table examples[edit]

Cumulative from mean (0 to Z)[edit]

The values correspond to the shaded area for given Z

This table gives a probability that a statistic is between 0 (the mean) and Z.

z	+0.00	+0.01	+0.02	+0.03	+0.04	+0.05	+0.06	+0.07	+0.08	+0.09
0.0	0.00000	0.00399	0.00798	0.01197	0.01595	0.01994	0.02392	0.02790	0.03188	0.03586
0.1	0.03980	0.04380	0.04776	0.05172	0.05567	0.05966	0.06360	0.06749	0.07142	0.07535
0.2	0.07930	0.08317	0.08706	0.09095	0.09483	0.09871	0.10257	0.10642	0.11026	0.11409
0.3	0.11791	0.12172	0.12552	0.12930	0.13307	0.13683	0.14058	0.14431	0.14803	0.15173
0.4	0.15542	0.15910	0.16276	0.16640	0.17003	0.17364	0.17724	0.18082	0.18439	0.18793

0.5	0.19146	0.19497	0.19847	0.20194	0.20540	0.20884	0.21226	0.21566	0.21904	0.22240
0.6	0.22575	0.22907	0.23237	0.23565	0.23891	0.24215	0.24537	0.24857	0.25175	0.25490
0.7	0.25804	0.26115	0.26424	0.26730	0.27035	0.27337	0.27637	0.27935	0.28230	0.28524
0.8	0.28814	0.29103	0.29389	0.29673	0.29955	0.30234	0.30511	0.30785	0.31057	0.31327
0.9	0.31594	0.31859	0.32121	0.32381	0.32639	0.32894	0.33147	0.33398	0.33646	0.33891

1.0	0.34134	0.34375	0.34614	0.34849	0.35083	0.35314	0.35543	0.35769	0.35993	0.36214
1.1	0.36433	0.36650	0.36864	0.37076	0.37286	0.37493	0.37698	0.37900	0.38100	0.38298
1.2	0.38493	0.38686	0.38877	0.39065	0.39251	0.39435	0.39617	0.39796	0.39973	0.40147
1.3	0.40320	0.40490	0.40658	0.40824	0.40988	0.41149	0.41308	0.41466	0.41621	0.41774
1.4	0.41924	0.42073	0.42220	0.42364	0.42507	0.42647	0.42785	0.42922	0.43056	0.43189

1.5	0.43319	0.43448	0.43574	0.43699	0.43822	0.43943	0.44062	0.44179	0.44295	0.44408
1.6	0.44520	0.44630	0.44738	0.44845	0.44950	0.45053	0.45154	0.45254	0.45352	0.45449
1.7	0.45543	0.45637	0.45728	0.45818	0.45907	0.45994	0.46080	0.46164	0.46246	0.46327
1.8	0.46407	0.46485	0.46562	0.46638	0.46712	0.46784	0.46856	0.46926	0.46995	0.47062
1.9	0.47128	0.47193	0.47257	0.47320	0.47381	0.47441	0.47500	0.47558	0.47615	0.47670

2.0	0.47725	0.47778	0.47831	0.47882	0.47932	0.47982	0.48030	0.48077	0.48124	0.48169
2.1	0.48214	0.48257	0.48300	0.48341	0.48382	0.48422	0.48461	0.48500	0.48537	0.48574
2.2	0.48610	0.48645	0.48679	0.48713	0.48745	0.48778	0.48809	0.48840	0.48870	0.48899
2.3	0.48928	0.48956	0.48983	0.49010	0.49036	0.49061	0.49086	0.49111	0.49134	0.49158
2.4	0.49180	0.49202	0.49224	0.49245	0.49266	0.49286	0.49305	0.49324	0.49343	0.49361

2.5	0.49379	0.49396	0.49413	0.49430	0.49446	0.49461	0.49477	0.49492	0.49506	0.49520
2.6	0.49534	0.49547	0.49560	0.49573	0.49585	0.49598	0.49609	0.49621	0.49632	0.49643
2.7	0.49653	0.49664	0.49674	0.49683	0.49693	0.49702	0.49711	0.49720	0.49728	0.49736
2.8	0.49744	0.49752	0.49760	0.49767	0.49774	0.49781	0.49788	0.49795	0.49801	0.49807
2.9	0.49813	0.49819	0.49825	0.49831	0.49836	0.49841	0.49846	0.49851	0.49856	0.49861

3.0	0.49865	0.49869	0.49874	0.49878	0.49882	0.49886	0.49889	0.49893	0.49896	0.49900

[2]

Cumulative[edit]

This table gives a probability that a statistic is less than Z (i.e. between negative infinity and Z).

z	+0.00	+0.01	+0.02	+0.03	+0.04	+0.05	+0.06	+0.07	+0.08	+0.09
0.0	0.50000	0.50399	0.50798	0.51197	0.51595	0.51994	0.52392	0.52790	0.53188	0.53586
0.1	0.53980	0.54380	0.54776	0.55172	0.55567	0.55966	0.56360	0.56749	0.57142	0.57535
0.2	0.57930	0.58317	0.58706	0.59095	0.59483	0.59871	0.60257	0.60642	0.61026	0.61409
0.3	0.61791	0.62172	0.62552	0.62930	0.63307	0.63683	0.64058	0.64431	0.64803	0.65173
0.4	0.65542	0.65910	0.66276	0.66640	0.67003	0.67364	0.67724	0.68082	0.68439	0.68793

0.5	0.69146	0.69497	0.69847	0.70194	0.70540	0.70884	0.71226	0.71566	0.71904	0.72240
0.6	0.72575	0.72907	0.73237	0.73565	0.73891	0.74215	0.74537	0.74857	0.75175	0.75490
0.7	0.75804	0.76115	0.76424	0.76730	0.77035	0.77337	0.77637	0.77935	0.78230	0.78524
0.8	0.78814	0.79103	0.79389	0.79673	0.79955	0.80234	0.80511	0.80785	0.81057	0.81327
0.9	0.81594	0.81859	0.82121	0.82381	0.82639	0.82894	0.83147	0.83398	0.83646	0.83891

1.0	0.84134	0.84375	0.84614	0.84849	0.85083	0.85314	0.85543	0.85769	0.85993	0.86214
1.1	0.86433	0.86650	0.86864	0.87076	0.87286	0.87493	0.87698	0.87900	0.88100	0.88298
1.2	0.88493	0.88686	0.88877	0.89065	0.89251	0.89435	0.89617	0.89796	0.89973	0.90147
1.3	0.90320	0.90490	0.90658	0.90824	0.90988	0.91149	0.91308	0.91466	0.91621	0.91774
1.4	0.91924	0.92073	0.92220	0.92364	0.92507	0.92647	0.92785	0.92922	0.93056	0.93189

1.5	0.93319	0.93448	0.93574	0.93699	0.93822	0.93943	0.94062	0.94179	0.94295	0.94408
1.6	0.94520	0.94630	0.94738	0.94845	0.94950	0.95053	0.95154	0.95254	0.95352	0.95449
1.7	0.95543	0.95637	0.95728	0.95818	0.95907	0.95994	0.96080	0.96164	0.96246	0.96327
1.8	0.96407	0.96485	0.96562	0.96638	0.96712	0.96784	0.96856	0.96926	0.96995	0.97062
1.9	0.97128	0.97193	0.97257	0.97320	0.97381	0.97441	0.97500	0.97558	0.97615	0.97670

2.0	0.97725	0.97778	0.97831	0.97882	0.97932	0.97982	0.98030	0.98077	0.98124	0.98169
2.1	0.98214	0.98257	0.98300	0.98341	0.98382	0.98422	0.98461	0.98500	0.98537	0.98574
2.2	0.98610	0.98645	0.98679	0.98713	0.98745	0.98778	0.98809	0.98840	0.98870	0.98899
2.3	0.98928	0.98956	0.98983	0.99010	0.99036	0.99061	0.99086	0.99111	0.99134	0.99158
2.4	0.99180	0.99202	0.99224	0.99245	0.99266	0.99286	0.99305	0.99324	0.99343	0.99361

2.5	0.99379	0.99396	0.99413	0.99430	0.99446	0.99461	0.99477	0.99492	0.99506	0.99520
2.6	0.99534	0.99547	0.99560	0.99573	0.99585	0.99598	0.99609	0.99621	0.99632	0.99643
2.7	0.99653	0.99664	0.99674	0.99683	0.99693	0.99702	0.99711	0.99720	0.99728	0.99736
2.8	0.99744	0.99752	0.99760	0.99767	0.99774	0.99781	0.99788	0.99795	0.99801	0.99807
2.9	0.99813	0.99819	0.99825	0.99831	0.99836	0.99841	0.99846	0.99851	0.99856	0.99861

3.0	0.99865	0.99869	0.99874	0.99878	0.99882	0.99886	0.99889	0.99893	0.99896	0.99900

[3]

Complementary cumulative[edit]

This table gives a probability that a statistic is greater than Z.

z	+0.00	+0.01	+0.02	+0.03	+0.04	+0.05	+0.06	+0.07	+0.08	+0.09
0.0	0.50000	0.49601	0.49202	0.48803	0.48405	0.48006	0.47608	0.47210	0.46812	0.46414
0.1	0.46020	0.45620	0.45224	0.44828	0.44433	0.44034	0.43640	0.43251	0.42858	0.42465
0.2	0.42070	0.41683	0.41294	0.40905	0.40517	0.40129	0.39743	0.39358	0.38974	0.38591
0.3	0.38209	0.37828	0.37448	0.37070	0.36693	0.36317	0.35942	0.35569	0.35197	0.34827
0.4	0.34458	0.34090	0.33724	0.33360	0.32997	0.32636	0.32276	0.31918	0.31561	0.31207

0.5	0.30854	0.30503	0.30153	0.29806	0.29460	0.29116	0.28774	0.28434	0.28096	0.27760
0.6	0.27425	0.27093	0.26763	0.26435	0.26109	0.25785	0.25463	0.25143	0.24825	0.24510
0.7	0.24196	0.23885	0.23576	0.23270	0.22965	0.22663	0.22363	0.22065	0.21770	0.21476
0.8	0.21186	0.20897	0.20611	0.20327	0.20045	0.19766	0.19489	0.19215	0.18943	0.18673
0.9	0.18406	0.18141	0.17879	0.17619	0.17361	0.17106	0.16853	0.16602	0.16354	0.16109

1.0	0.15866	0.15625	0.15386	0.15151	0.14917	0.14686	0.14457	0.14231	0.14007	0.13786
1.1	0.13567	0.13350	0.13136	0.12924	0.12714	0.12507	0.12302	0.12100	0.11900	0.11702
1.2	0.11507	0.11314	0.11123	0.10935	0.10749	0.10565	0.10383	0.10204	0.10027	0.09853
1.3	0.09680	0.09510	0.09342	0.09176	0.09012	0.08851	0.08692	0.08534	0.08379	0.08226
1.4	0.08076	0.07927	0.07780	0.07636	0.07493	0.07353	0.07215	0.07078	0.06944	0.06811

1.5	0.06681	0.06552	0.06426	0.06301	0.06178	0.06057	0.05938	0.05821	0.05705	0.05592
1.6	0.05480	0.05370	0.05262	0.05155	0.05050	0.04947	0.04846	0.04746	0.04648	0.04551
1.7	0.04457	0.04363	0.04272	0.04182	0.04093	0.04006	0.03920	0.03836	0.03754	0.03673
1.8	0.03593	0.03515	0.03438	0.03362	0.03288	0.03216	0.03144	0.03074	0.03005	0.02938
1.9	0.02872	0.02807	0.02743	0.02680	0.02619	0.02559	0.02500	0.02442	0.02385	0.02330

2.0	0.02275	0.02222	0.02169	0.02118	0.02068	0.02018	0.01970	0.01923	0.01876	0.01831
2.1	0.01786	0.01743	0.01700	0.01659	0.01618	0.01578	0.01539	0.01500	0.01463	0.01426
2.2	0.01390	0.01355	0.01321	0.01287	0.01255	0.01222	0.01191	0.01160	0.01130	0.01101
2.3	0.01072	0.01044	0.01017	0.00990	0.00964	0.00939	0.00914	0.00889	0.00866	0.00842
2.4	0.00820	0.00798	0.00776	0.00755	0.00734	0.00714	0.00695	0.00676	0.00657	0.00639

2.5	0.00621	0.00604	0.00587	0.00570	0.00554	0.00539	0.00523	0.00508	0.00494	0.00480
2.6	0.00466	0.00453	0.00440	0.00427	0.00415	0.00402	0.00391	0.00379	0.00368	0.00357
2.7	0.00347	0.00336	0.00326	0.00317	0.00307	0.00298	0.00289	0.00280	0.00272	0.00264
2.8	0.00256	0.00248	0.00240	0.00233	0.00226	0.00219	0.00212	0.00205	0.00199	0.00193
2.9	0.00187	0.00181	0.00175	0.00169	0.00164	0.00159	0.00154	0.00149	0.00144	0.00139

3.0	0.00135	0.00131	0.00126	0.00122	0.00118	0.00114	0.00111	0.00107	0.00104	0.

5.中心極限定理

中心極限定理目錄
1.計算機模擬-中心極限理論
2.中心極限理論與中性突變（進化論）
3.中心極限理論與職場面試

簡易教程
https://www.youtube.com/watch?v=LVFC2f9kHq4

測試隨機數的網站
https://www.random.org/dice/?num=6

概念

隨機扔6個骰子

把數據存入excel表格

繪製成圖，呈現正態分佈

1.計算機模擬-中心極限理論
靈感來源於兩顆骰子中心極限理論是數學和機率學的基石，今天有機會能計算機模擬和輔助證實，是我榮幸。

用兩顆骰子建模，是中心極限定理最簡單模型，能夠這樣解釋，中間數出現頻率最高，由於多個因素可隨機組合成大數，例如投擲兩顆骰子，7能夠由6+1,2+5,4+3組成， 3只能由1+2組成，11只能由5+6組成

#coding=utf-8
import random,os,statistics
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
plt.figure(20)
#設置中文字體
font=FontProperties(fname=r"c:\windows\fonts\simsun.ttc",size=15)

#骰子選數範圍從1-6
number_list=[1,2,3,4,5,6]
#n試驗次數
n=1000
#length 表示樣本數
length=2

def Random_number(number_list):
    r=random.SystemRandom()
    random_number=r.choice(number_list)
    return random_number

#生成一個包含隨機數的列表
#length樣本數
def Random_list(length):
    random_list=[]
    for i in range(length):
        random_number=Random_number(number_list)
        random_list.append(random_number)

    return random_list

'''
Random_list(10)
Out[22]: [3, 1, 2, 3, 4, 6, 4, 4, 2, 1]
'''

#n試驗次數
#length樣本數
def Mean_list(length,n1):
    mean_list=[]
    for i in range(n1):
        random_list=Random_list(length)
        mean=statistics.mean(random_list)
        mean_list.append(mean)
    return mean_list
'''
Mean_list(10)
Out[26]: [4.0, 3.5, 6.0, 4.5, 4.0, 4.0, 5.0, 4.0, 5.0, 2.0]
'''

#生成一組樣本平均數
#n試驗次數
#length樣本數
mean_list=Mean_list(30,10000)

#繪圖
plt.hist(mean_list)

titleValue="central limit theorem\n n=%d,length=%d" %(n,length)
plt.xlabel("平均值",fontproperties=font)
plt.ylabel("頻率",fontproperties=font)
plt.title(titleValue)
#plt.xlabel("mean")
plt.show()

樣本乘積不符合中心極限定理，圖像不是正太分佈

#coding=utf-8
import random,os,statistics
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
plt.figure(20)
#設置中文字體
font=FontProperties(fname=r"c:\windows\fonts\simsun.ttc",size=15)

#骰子選數範圍從1-6
number_list=[1,2,3,4,5,6]
#n試驗次數
n=100
#length 表示樣本數
length=5

def Random_number(number_list):
    r=random.SystemRandom()
    random_number=r.choice(number_list)
    return random_number

#生成一個包含隨機數的列表
#length樣本數
def Random_list(length):
    random_list=[]
    for i in range(length):
        random_number=Random_number(number_list)
        random_list.append(random_number)

    return random_list

'''
Random_list(10)
Out[22]: [3, 1, 2, 3, 4, 6, 4, 4, 2, 1]
'''

#n試驗次數
#length樣本數
#樣本平均數
def Mean_list(length,n1):
    mean_list=[]
    for i in range(n1):
        random_list=Random_list(length)
        mean=statistics.mean(random_list)
        mean_list.append(mean)
    return mean_list
'''
Mean_list(10)
Out[26]: [4.0, 3.5, 6.0, 4.5, 4.0, 4.0, 5.0, 4.0, 5.0, 2.0]
'''

def Multiply(list1):
    value=1
    for i in list1:
        value=value*i
    return value

#n試驗次數
#length樣本數
#樣本乘積
def Multiply_list(length,n1):
    multiply_list=[]
    for i in range(n1):
        random_list=Random_list(length)
        multiply=Multiply(random_list)
        multiply_list.append(multiply)
    return multiply_list

#生成一組樣本乘積
#n試驗次數
#length樣本數
multiply_list=Multiply_list(length,n)

#繪圖
plt.hist(multiply_list)

titleValue="central limit theorem\n n=%d,length=%d" %(n,length)
plt.xlabel("平均值",fontproperties=font)
plt.ylabel("頻率",fontproperties=font)
plt.title(titleValue)
#plt.xlabel("mean")
plt.show()

樣本總和分佈也呈現中心極限定理，這能夠推斷多個基因值疊加，也符合正態分佈，進一步支持中性突變定理

#coding=utf-8
import random,os,statistics
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
plt.figure(20)
#設置中文字體
font=FontProperties(fname=r"c:\windows\fonts\simsun.ttc",size=15)

#骰子選數範圍從1-6
number_list=[1,2,3,4,5,6]
#n試驗次數
n=10000
#length 表示樣本數
length=30

def Random_number(number_list):
    r=random.SystemRandom()
    random_number=r.choice(number_list)
    return random_number

#生成一個包含隨機數的列表
#length樣本數
def Random_list(length):
    random_list=[]
    for i in range(length):
        random_number=Random_number(number_list)
        random_list.append(random_number)

    return random_list

'''
Random_list(10)
Out[22]: [3, 1, 2, 3, 4, 6, 4, 4, 2, 1]
'''

#n試驗次數
#length樣本數
def Add_list(length,n1):
    total_list=[]
    for i in range(n1):
        random_list=Random_list(length)
        total=sum(random_list)
        total_list.append(total)
    return total_list
'''
Mean_list(10)
Out[26]: [4.0, 3.5, 6.0, 4.5, 4.0, 4.0, 5.0, 4.0, 5.0, 2.0]
'''

#生成一組樣本平均數
#n試驗次數
#length樣本數
total_list=Add_list(length,n)

#繪圖
plt.hist(total_list)

titleValue="central limit theorem\n n=%d,length=%d" %(n,length)
plt.xlabel("平均值",fontproperties=font)
plt.ylabel("頻率",fontproperties=font)
plt.title(titleValue)
#plt.xlabel("mean")
plt.show()

2.中心極限理論與中性突變

中間數出現頻率最高，最小數和最大數出現頻率最低。這和進化論中的中性突變很相似。
達爾文進化論有侷限性，在分子層面發生的突變，若是不考慮對生殖不利的話，基本上都是無所謂有利仍是不利的「中性突變」，有利的突變其實很是少，簡直能夠忽略不計。
Neutral mutations are changes in DNA sequence that are neither beneficial nor detrimental to the ability of an organism to survive and reproduce.

3.中心極限理論與職場面試

中心極限理論一樣適用於職場面試。（排除關係戶和考試排名算分因素）
假如一個部門經理招1我的，有10個面試者。部門經理要從10個面試者中選取1位。
能力太差的不能勝任工做，pass。能力太強的他hold不住，擔憂之後飯碗被翹或擔憂此人嫌棄此崗位而跳槽，也pass。
面試機率最大的能力居中的面試者，平庸的人部門經理既能把控，又能勝任工做。

因此我推測，大多數公司裏能力超強的人和能力超弱的人不會太多，只佔正態分佈兩端（低機率）。能力居中的人佔大多數。不相信的能夠去作調查問卷。

因此想去社工一個公司，就裝得能力通常但又能勝任工做，這樣混過面試官機率最高。。。。。
固然實際狀況中衆多因素須要考慮，不能一律而論。例如長相，關係戶，考試分數排名，家住地址等等。。。。。

總結
萬物皆有數，天然現象皆可建模，近似推導，但又不能準確模擬，由於參與因素實在太多了。且衆多因素相互交錯影響，不停變化，這形成了不可預測性。這就是說數字便可推算也不可預測。聽起來是否是有點矛盾。。。吃飯去了。。。

6.大數定理

樣本量越大，樣本平均數越接近整體平均數

一、大數法則
一位數學家調查發現，歐洲各地男嬰與女嬰的出生比例是22:21，只有巴黎是25:24，這極小的差異使他決心去查個究竟。最後發現，當時的巴黎的風尚是重女輕男，有些人會丟棄生下的男嬰，通過一番修正後，依然是22:21。中國的歷次人口普查的結果也是22:21。

人口比例所體現的，就是大數法則。
大數法則（Lawoflargenumbers）又稱「大數定律」或「平均法則」。在隨機事件的大量重複出現中，每每呈現幾乎必然的規律，這類規律就是大數法則。在試驗不變的條件下，重複試驗屢次，隨機事件的機率近似於它的機率。
大數法則反映了這世界的一個基本規律：在一個包含衆多個體的大羣體中，因爲偶然性而產生的個體差別，着眼在一個個的個體上看，是雜亂無章、毫無規律、難於預測的。但因爲大數法則的做用，整個羣體卻能呈現某種穩定的形態。

花瓶是由分子組成，每一個分子都不規律地劇烈震動。你可曾見過一隻放在桌子上的花瓶，忽然本身跳起來？
電流是由電子運動造成的，每一個電子的行爲雜亂而不可預測，但總體看呈現一個穩定的電流強度。
一個封閉容器中的氣體，它包含大量的分子，它們各自在每時每刻的位置、速度和方向，都以一種偶然的方式在變化着，但容器中的氣體仍能保有一個穩定的壓力和溫度。

某我的乘飛機遇難，機率不可預料，對於他我的來講，飛機失事具備隨機性。
可是對每一年100萬人次全部伺機者而言，這裏的100萬人能夠理解這100萬次的重複試驗，其中，總有10人死於飛行事故。那麼根據大數法則，乘飛機出事故的機率大約爲十萬分之一。
這就爲保險公司收取保險費提供了理論依據。對我的來講，出險是不肯定的，對保險公司來講，衆多的保單出險的機率是肯定的。

根據大數法則的定律，承保危險的單位越多，損失機率的誤差越小，反之，承保危險的單位越少，損失機率的誤差越大。所以，保險公司運用大數法則就能夠比較精確地預測危險，合理保險費率。

二、小刀鋸大樹
賭客久賭必輸的另外一個祕密，即大數法則。

賭王何鴻燊剛剛接手葡京賭場的時候，業務蒸蒸日上。
賭王居安思危，請教「賭神」葉漢：「爲何這些賭客老是輸，久而久之他們不來賭怎麼辦？」
葉漢笑道：「這世界天天都死人，你可見這世上少人？」

葉漢的回答甚妙，道出了一條不管是保險公司、賭場仍是騙徒，都信仰的法則:大數法則。
賭場本質上是一種溫和的「機率場」，機率法則很是明顯。一直玩下去，大數法則的做用就會日益顯現出來。
莊家在規則上佔有少量優點，玩的次數越多，這種優點越能顯現出來。

久賭神仙輸，賭聖也不行。
一天，一位沙特王子入住葡京酒店。
王子找到賭王，說：我就和你玩一把擲硬幣。出正面我給你50億美圓，出反面你的賭場歸我。
賭王呵呵一笑：這個遊戲當然公平，但不符合咱們博彩業的行事法則。
咱們開賭場不作一錘子買賣，而是小刀鋸大樹。若是你真的想玩，咱們就玩擲骰子，1000下定輸贏。你贏了，能夠把個人產業拿走，我贏了，只收你20億。
沙特王子無奈，只好退出賭局。
這個故事是虛構的，旨在說明大數法則之於賭場的意義。

開賭場不作一錘子買賣，而是「小刀鋸大樹」。
因此，賭場最歡迎的是斤斤計較、想碰一下運氣的散客，他們雖然下注謹慎，卻構成了龐大的行爲基數。
這種客人會給賭場老闆帶來幾乎線性的穩定收益，是賭場最穩定的收入來源，這是大數法則在起做用。

還有一種是一擲千金、豪氣干雲的大賭客，他們的下注額若在賭場的風險控制範圍，也很難從賭場贏錢，會成爲賭場的VIP客戶。
假若有一個超級賭客，好比上面虛構故事中的沙特王子。他的賭注超過了普通賭客的千倍萬倍，這會致使賭場收益的大幅震盪，極端狀況下可能致使賭場破產。
所以，全世界全部賭場都會設定最高的投注限額。賭場設最低及最高的投注限額，即使「新郎行運一條龍」的事故發生，也不至於讓賭場虧太多。這樣，賭場老闆就能夠安心睡覺了。
全部的VIP加起來，等於莊家和客人玩了一場長期遊戲，大數法則依然有效。

三、「撞騙」的數理依據
你是否收到過這類短信：
請直接把錢打到工商銀行卡號6220219 ***

這叫「撞騙」，是一種傳統騙術。版本甚多，好比寄中獎信、打中獎電話、發電子郵件。
也就是騙子像沒頭蒼蠅同樣亂撞，「有棗沒棗打一杆子」或許能「瞎貓撿個死老鼠」。
是否是以爲騙子很蠢？但騙徒的行爲倒是合乎統計原理的，在數理上是被支持的。

只要發出的短信足夠多，其成功率很是穩定，合乎大數法則。
福建的某個小鎮，衆多鄉親都從事這個行當，短信羣發器在這個偏遠小鎮很是普及。
當警察抓獲了這批刁民後，奇怪的是，過了很長時間了，竟然還有人不斷地往查獲的卡上匯錢。

有人曾作過統計，相似這種垃圾短信，每發出一萬條，上當的人有七到八個，成功率很是穩定。人過一百，形形色色。一萬我的裏面，總會有幾個「人精」，幾個笨蛋，這是能夠肯定的。
究其根源，都是因爲大數法則的做用。在社會、經濟領域中，羣體中個體的情況千差萬別，變化不定。但一些反映羣體的平均指針，在必定時期內能保持穩定或呈現規律性的變化。

大數法則是保險公司、賭場、撞騙的騙徒，賴以存在的基礎

#大數定理模擬

#coding=utf-8

import random,numpy,pylab,matplotlib

count_head=0

n=2

count=25

list_probabiliy=[]

for i in range(count):

for i in range(n):

result=random.choice ( ['head', 'tail'] )

if result=='head':

count_head+=1.0

#print "sample:",n

mean=count_head/n

#print "probability:",mean

list_probabiliy.append(mean)

n=n*2

count_head=0

def Draw_lawOfLargeNumber(count):

x=numpy.arange(0,count,1)

for i in range(count):

x=i

y=list_probabiliy[i]

pylab.plot(x,y,'ro')

pylab.xlabel('x')

pylab.ylabel('y')

pylab.title('Law of large number')

#x，y軸刻度分別是1和0.1

pylab.xticks(numpy.arange(0,count,1))

pylab.yticks(numpy.arange(0.0,1.0,0.1))

pylab.grid(True)

pylab.show()

# Pad margins so that markers don't get clipped by the axes,讓點不與座標軸重合

pylab.margins(0.5)

實驗次數越多，機率越接近平均機率（指望值）

7. 二項式分佈與正態分佈圖比較

#coding=utf-8

#繪圖模板

#1.正態分佈

import math,pylab,matplotlib,numpy,statistics_functions,Binomial_distribution,normal_distribution

def Draw_normal_distribution(u,q):

x=numpy.arange(-4,4.1,0.1) #x取值範圍能夠隨意更改

for value in x:

y=normal_distribution.Normal_distribution(x,u,q)

#x,y都要繪製出來

pylab.plot(x,y,'r')

pylab.xlabel('x')

pylab.ylabel('y')

pylab.title('Normal distribution')

pylab.grid(True)

pylab.show()

# Pad margins so that markers don't get clipped by the axes,讓點不與座標軸重合

pylab.margins(0.01)

#正態分佈比較圖

#標準正太分佈：u=0(平均值)，q=1(標準差)

def Draw_muliti_normal_distribution():

x=numpy.arange(-4,4.1,0.1) #x取值範圍能夠隨意更改

u=0

q=1

for value in x:

y=statistics_functions.Normal_distribution(x,u,q)

#x,y都要繪製出來

pylab.plot(x,y,'ro')

u=1

q=1

for value in x:

y1=statistics_functions.Normal_distribution(x,u,q)

#x,y都要繪製出來

pylab.plot(x,y1,'b')

u=-1

q=1

for value in x:

y2=statistics_functions.Normal_distribution(x,u,q)

#x,y都要繪製出來

pylab.plot(x,y2,'y')

u=0

q=0.5

for value in x:

y3=statistics_functions.Normal_distribution(x,u,q)

#x,y都要繪製出來

pylab.plot(x,y3,'g')

u=0

q=1.5

for value in x:

y4=statistics_functions.Normal_distribution(x,u,q)

#x,y都要繪製出來

pylab.plot(x,y4,'m')

pylab.xlabel('x\nMade by Toby')

pylab.ylabel('y')

#"\n表示換行"

pylab.title('Normal distribution\n(r:u,q=0,1;b:u,q=1,1;y:u,q=-1,1;g:u,q=0,0.5,m:u,q=0,1.5)')

pylab.grid(True)

pylab.show()

# Pad margins so that markers don't get clipped by the axes,讓點不與座標軸重合

pylab.margins(0.01)

#正態分佈和二項式函數比較

def Compare_normal_binomial_distribution(n=6,p=0.5):

x_array=numpy.arange(0,n+1,1)

list_x=list(x_array)

u=statistics_functions.Mean(list_x)

q=Binomial_distribution.Deviation_Binomial(n,p)

for x in x_array:

y=normal_distribution.Normal_distribution(x,u,q)

#x,y都要繪製出來

pylab.plot(x,y,'ro')

for x1 in x_array:

y1=Binomial_distribution.Binomial_distribution(n,x1,p)

#x,y都要繪製出來

pylab.plot(x1,y1,'bo')

pylab.xticks(numpy.arange(0,n+1,1))

pylab.yticks(numpy.arange(0.0,0.3,0.1))

pylab.xlabel('x')

pylab.ylabel('y')

pylab.title('n=%d \nRed:normal vs Blue:Binomial' %(n))

pylab.grid(True)

pylab.show()

# Pad margins so that markers don't get clipped by the axes,讓點不與座標軸重合

pylab.margins(0.01)

8.你的數據是正態分佈嗎

統計不少分析是基於正態分佈數據，若是數據不呈現正態分佈就要出錯
爲了不出錯，首先讓你的數據可視化

符合正態分佈的箱型圖

符合正態分佈的p-p圖

不符合正態分佈的箱型圖

不符合正態分佈的Ｑ－Ｑ圖

python信用評分卡建模（附代碼，博主錄製）

https://study.163.com/course/introduction.htm?courseId=1005214003&utm_campaign=commission&utm_source=cp-400000000398149&utm_medium=share

相關標籤/搜索

每日一句

每一个你不满意的现在，都有一个你没有努力的曾经。