一。導入數據網站
import pandas as pd unrate = pd.read_csv('unrate.csv') unrate['DATE'] = pd.to_datetime(unrate['DATE']) print(unrate.head(12))
結果以下:
DATE VALUE 0 1948-01-01 3.4 1 1948-02-01 3.8 2 1948-03-01 4.0 3 1948-04-01 3.9 4 1948-05-01 3.5 5 1948-06-01 3.6 6 1948-07-01 3.6 7 1948-08-01 3.9 8 1948-09-01 3.8 9 1948-10-01 3.7 10 1948-11-01 3.8 11 1948-12-01 4.0
二。使用Matplotlib庫
import matplotlib.pyplot as plt #%matplotlib inline #Using the different pyplot functions, we can create, customize, and display a plot. For example, we can use 2 functions to : plt.plot() plt.show()
結果以下:spa
三。插入數據3d
first_twelve = unrate[0:12] plt.plot(first_twelve['DATE'], first_twelve['VALUE']) plt.show()
因爲x軸過於緊湊,因此使用旋轉x軸的方法 結果以下。code
plt.plot(first_twelve['DATE'], first_twelve['VALUE']) plt.xticks(rotation=45) #print help(plt.xticks) plt.show()
四。設置x軸y軸說明orm
plt.plot(first_twelve['DATE'], first_twelve['VALUE']) plt.xticks(rotation=90) plt.xlabel('Month') plt.ylabel('Unemployment Rate') plt.title('Monthly Unemployment Trends, 1948') plt.show()
五。子圖設置blog
import matplotlib.pyplot as plt fig = plt.figure() ax1 = fig.add_subplot(4,3,1) ax2 = fig.add_subplot(4,3,2) ax2 = fig.add_subplot(4,3,6) plt.show()
六。一個圖標多個曲線。排序
1.簡單實驗。pandas
unrate['MONTH'] = unrate['DATE'].dt.month unrate['MONTH'] = unrate['DATE'].dt.month fig = plt.figure(figsize=(6,3)) plt.plot(unrate[0:12]['MONTH'], unrate[0:12]['VALUE'], c='red') plt.plot(unrate[12:24]['MONTH'], unrate[12:24]['VALUE'], c='blue') plt.show()
2.使用循環it
fig = plt.figure(figsize=(10,6)) colors = ['red', 'blue', 'green', 'orange', 'black'] for i in range(5): start_index = i*12 end_index = (i+1)*12 subset = unrate[start_index:end_index] plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i]) plt.show()
3.設置標籤io
fig = plt.figure(figsize=(10,6)) colors = ['red', 'blue', 'green', 'orange', 'black'] for i in range(5): start_index = i*12 end_index = (i+1)*12 subset = unrate[start_index:end_index] label = str(1948 + i) plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i], label=label) plt.legend(loc='best') #print help(plt.legend) plt.show()
4。設置完整標籤
fig = plt.figure(figsize=(10,6)) colors = ['red', 'blue', 'green', 'orange', 'black'] for i in range(5): start_index = i*12 end_index = (i+1)*12 subset = unrate[start_index:end_index] label = str(1948 + i) plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i], label=label) plt.legend(loc='upper left') plt.xlabel('Month, Integer') plt.ylabel('Unemployment Rate, Percent') plt.title('Monthly Unemployment Trends, 1948-1952') plt.show()
七。折線圖(某電影評分網站)
1.讀取數據
import pandas as pd reviews = pd.read_csv('fandango_scores.csv') cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars'] norm_reviews = reviews[cols] print(norm_reviews[:10])
2.設置說明
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars'] bar_heights = norm_reviews.ix[0, num_cols].values bar_positions = arange(5) + 0.75 tick_positions = range(1,6) fig, ax = plt.subplots() ax.bar(bar_positions, bar_heights, 0.5)//ax.bar繪製折線圖,bar_positions繪製離遠點的距離,0.5繪製離折線圖的寬度。 ax.set_xticks(tick_positions) ax.set_xticklabels(num_cols, rotation=45)//橫軸的說明 旋轉45度 橫軸說明 ax.set_xlabel('Rating Source') ax.set_ylabel('Average Rating') ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)') plt.show()
3.旋轉x軸 y軸
import matplotlib.pyplot as plt from numpy import arange num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars'] bar_widths = norm_reviews.ix[0, num_cols].values bar_positions = arange(5) + 0.75 tick_positions = range(1,6) fig, ax = plt.subplots() ax.barh(bar_positions, bar_widths, 0.5) ax.set_yticks(tick_positions) ax.set_yticklabels(num_cols) ax.set_ylabel('Rating Source') ax.set_xlabel('Average Rating') ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)') plt.show()
八。 散點圖
1。基本散點圖
fig, ax = plt.subplots() ax.scatter(norm_reviews['Fandango_Ratingvalue'], norm_reviews['RT_user_norm']) ax.set_xlabel('Fandango') ax.set_ylabel('Rotten Tomatoes') plt.show()
2.拆分散點圖
#Switching Axes fig = plt.figure(figsize=(5,10)) ax1 = fig.add_subplot(2,1,1) ax2 = fig.add_subplot(2,1,2) ax1.scatter(norm_reviews['Fandango_Ratingvalue'], norm_reviews['RT_user_norm']) ax1.set_xlabel('Fandango') ax1.set_ylabel('Rotten Tomatoes') ax2.scatter(norm_reviews['RT_user_norm'], norm_reviews['Fandango_Ratingvalue']) ax2.set_xlabel('Rotten Tomatoes') ax2.set_ylabel('Fandango') plt.show()
Ps:仍是呈現很強的相關性的,基本呈直線分佈
九。直方圖
1.讀入數據
import pandas as pd import matplotlib.pyplot as plt reviews = pd.read_csv('fandango_scores.csv') cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue'] norm_reviews = reviews[cols] print(norm_reviews[:100])
2.統計評分個數
fandango_distribution = norm_reviews['Fandango_Ratingvalue'].value_counts()//統計 fandango_distribution = fandango_distribution.sort_index()//排序 imdb_distribution = norm_reviews['IMDB_norm'].value_counts() imdb_distribution = imdb_distribution.sort_index() print(fandango_distribution) print(imdb_distribution)
3.畫直方圖
fig, ax = plt.subplots() #ax.hist(norm_reviews['Fandango_Ratingvalue']) #ax.hist(norm_reviews['Fandango_Ratingvalue'],bins=20) ax.hist(norm_reviews['Fandango_Ratingvalue'], range=(4, 5),bins=20)//劃分的區間20個,只統計4-5區間的bins plt.show()
4.不一樣的媒體評分圖
fig = plt.figure(figsize=(5,20)) ax1 = fig.add_subplot(4,1,1) ax2 = fig.add_subplot(4,1,2) ax3 = fig.add_subplot(4,1,3) ax4 = fig.add_subplot(4,1,4) ax1.hist(norm_reviews['Fandango_Ratingvalue'], bins=20, range=(0, 5)) ax1.set_title('Distribution of Fandango Ratings') ax1.set_ylim(0, 50) ax2.hist(norm_reviews['RT_user_norm'], 20, range=(0, 5)) ax2.set_title('Distribution of Rotten Tomatoes Ratings') ax2.set_ylim(0, 50) ax3.hist(norm_reviews['Metacritic_user_nom'], 20, range=(0, 5)) ax3.set_title('Distribution of Metacritic Ratings') ax3.set_ylim(0, 50) ax4.hist(norm_reviews['IMDB_norm'], 20, range=(0, 5)) ax4.set_title('Distribution of IMDB Ratings') ax4.set_ylim(0, 50) plt.show()
5.四分圖
fig, ax = plt.subplots() ax.boxplot(norm_reviews['RT_user_norm']) ax.set_xticklabels(['Rotten Tomatoes']) ax.set_ylim(0, 5) plt.show()
ps:四分圖就是1/4,2/4,3/4的點是多少,能夠看到大體的範圍
6.四家媒體四方圖
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue'] fig, ax = plt.subplots() ax.boxplot(norm_reviews[num_cols].values) ax.set_xticklabels(num_cols, rotation=90) ax.set_ylim(0,5)//打分範圍 plt.show()