(相關數據見github)html
import pandas as pd
path1 = "./data/chipotle.tsv" # chipotle.tsv
chipo = pd.read_csv(path1, sep = '\t')
chipo.tail() #查看最後五行 head()可查看前五行
輸出:python
chipo.info()
輸出:git
# 查看數據大小 chipo.shape # 行列數 # chipo.shape[0] # 行數 # chipo.shape[1] # 列數
輸出:github
(4622, 5)
chipo.columns
輸出:app
Index(['order_id', 'quantity', 'item_name', 'choice_description',
'item_price'],
dtype='object')
chipo.index
輸出:ui
RangeIndex(start=0, stop=4622, step=1)
# 以item_name分組 並對quantity求和 c = chipo[['item_name','quantity']].groupby(['item_name'],as_index=False).agg({'quantity':sum}) c.sort_values(['quantity'],ascending=False,inplace=True) c.head()
輸出:spa
chipo['item_name'].nunique()
輸出:code
50
chipo['choice_description'].value_counts().head()
輸出:htm
[Diet Coke] 134
[Coke] 123
[Sprite] 77
[Fresh Tomato Salsa, [Rice, Black Beans, Cheese, Sour Cream, Lettuce]] 42
[Fresh Tomato Salsa, [Rice, Black Beans, Cheese, Sour Cream, Guacamole, Lettuce]] 40
total_items_orders = chipo['quantity'].sum() total_items_orders
輸出:blog
4972
dollarizer = lambda x: float(x[1:-1]) chipo['item_price'] = chipo['item_price'].apply(dollarizer)
# 價格乘以數量 再求和 chipo['sub_total'] = round(chipo['item_price'] * chipo['quantity'],2) chipo['sub_total'].sum()
輸出:
39237.02
chipo['order_id'].nunique()
輸出:
1834
chipo[['order_id','sub_total']].groupby(by=['order_id']).agg({'sub_total':'sum'})['sub_total'].mean()
輸出:
21.39423118865867
chipo['item_name'].nunique()
輸出:
50
一、http://pandas.pydata.org/pandas-docs/stable/cookbook.html#cookbook
二、https://www.analyticsvidhya.com/blog/2016/01/12-pandas-techniques-python-data-manipulation/