R:dplyr包

一、filter()函數函數

filter(.data, ...),參數很簡單,只有data,即要操做的數據對象,其餘都是數據操做條件。code

> x<-data.frame(id=1:6,name=c("wang","zhang","li","chen","zhao","song"),shuxue=c(89,85,68,79,96,53),yuwen=c(77,68,86,87,92,63))
> dim(x)    #查看數據框行列屬性
[1] 6 4
> x
  id  name shuxue yuwen
1  1  wang     89    77
2  2 zhang     85    68
3  3    li     68    86
4  4  chen     79    87
5  5  zhao     96    92
6  6  song     53    63
> x1<-filter(x,name=="zhang")
> x1
  id  name shuxue yuwen
1  2 zhang     85    68
> x2<-filter(x,shuxue>60,yuwen<90)    #能夠進行多條件篩選,條件能夠用逗號隔開,也能夠用鏈接符&或| 進行鏈接
> x2
  id  name shuxue yuwen
1  1  wang     89    77
2  2 zhang     85    68
3  3    li     68    86
4  4  chen     79    87

二、arrange函數對象

跟filter()相似,arrange()的參數也很簡單,出來data外,餘下的是排序條件。排序

> x3<-arrange(x,name)    #按照字母的順序進行排序
> x3
  id  name shuxue yuwen
1  4  chen     79    87
2  3    li     68    86
3  6  song     53    63
4  1  wang     89    77
5  2 zhang     85    68
6  5  zhao     96    92
> x4<-arrange(x,shuxue,desc(yuwen))    #按照shuxuec正序排序,而後按照yuwen倒序排序
> x4
  id  name shuxue yuwen
1  6  song     53    63
2  3    li     68    86
3  4  chen     79    87
4  2 zhang     85    68
5  1  wang     89    77
6  5  zhao     96    92

三、select函數it

參數主要在於如何添加條件。配合select()進行使用的函數有:select

starts_with()
ends_with()
contains()
matches()
num_range()
one_of()
everything()

> x$shengwu<-c(85,68,78,68,98,96)
> x
  id  name shuxue yuwen shengwu
1  1  wang     89    77      85
2  2 zhang     85    68      68
3  3    li     68    86      78
4  4  chen     79    87      68
5  5  zhao     96    92      98
6  6  song     53    63      96
> select(x,name)    #選取單列
   name
1  wang
2 zhang
3    li
4  chen
5  zhao
6  song
> select(x,starts_with("s"))    #選取包好以「s」開頭的列
  shuxue shengwu
1     89      85
2     85      68
3     68      78
4     79      68
5     96      98
6     53      96
> select(x,matches(".e."))    #匹配中間含有「e」的列
  yuwen shengwu
1    77      85
2    68      68
3    86      78
4    87      68
5    92      98
6    63      96
> select(x,ends_with("e"))    #選取以「e」結尾的列
   name shuxue
1  wang     89
2 zhang     85
3    li     68
4  chen     79
5  zhao     96
6  song     53
> select(x,contains("e"))    #匹配全部名稱中包含「e」的列
   name shuxue yuwen shengwu
1  wang     89    77      85
2 zhang     85    68      68
3    li     68    86      78
4  chen     79    87      68
5  zhao     96    92      98
6  song     53    63      96
> select(x,-name)    #在名字前面加個「-」,表示出了這一列之外,其餘的列都顯示
  id shuxue yuwen shengwu
1  1     89    77      85
2  2     85    68      68
3  3     68    86      78
4  4     79    87      68
5  5     96    92      98
6  6     53    63      96

四、summarise函數im

> x
  id  name shuxue yuwen
1  1  wang     89    77
2  2 zhang     85    68
3  3    li     68    86
4  4  chen     79    87
5  5  zhao     96    92
6  6  song     53    63
> summarise(x,sum(shuxue))
  sum(shuxue)
1         470
> summarise(group_by(x,name),sum(shuxue))    #這裏因爲每一個name對應的shuxue只有一個參數,因此sum的結果沒變化
    name `sum(shuxue)`
  <fctr>         <dbl>
1   chen            79
2     li            68
3   song            53
4   wang            89
5  zhang            85
6   zhao            96
> summarise(group_by(x,name),sum(shuxue,yuwen))    #shuxue和yuwen求和後的數據
    name `sum(shuxue, yuwen)`
  <fctr>                <dbl>
1   chen                  166
2     li                  154
3   song                  116
4   wang                  166
5  zhang                  153
6   zhao                  188
> arrange(summarise(group_by(x,name),qiuhe=sum(shuxue,yuwen)),desc(qiuhe))    #配合上前面的函數,就能夠對求和後的數據進行排序
    name qiuhe
  <fctr> <dbl>
1   zhao   188
2   chen   166
3   wang   166
4     li   154
5  zhang   153
6   song   116
> summarise(x,mean(shuxue),sd(shuxue))    #求均值和方差
  mean(shuxue) sd(shuxue)
1     78.33333   15.61623
> summarise(group_by(x,name),a=n())    #配合n()能夠對每一個因子的出現次數進行統計
    name     a
  <fctr> <int>
1   chen     1
2     li     1
3   song     1
4   wang     1
5  zhang     1
6   zhao     1
> summarise_if(x,is.numeric,mean)    #對全部是數值的列求平均值
   id   shuxue    yuwen
1 3.5 78.33333 78.83333
> summarise_at(x,c(3,4),mean)    #對特定的列求平均值
    shuxue    yuwen
1 78.33333 78.83333
> summarise_each(x[c(1,3,4)],funs(mean,sum))    #使用funs,對數據進行多重聚合統計
  id_mean shuxue_mean yuwen_mean id_sum shuxue_sum yuwen_sum
1     3.5    78.33333   78.83333     21        470       473

五、between()函數統計

> a<-10:30
> between(a,5,15)    #between函數返回結果是邏輯值,即那些數據知足條件,標記爲TRUE
 [1]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
> a[between(a,5,15)]    #經過加中括號的形式,把正確結果顯示出來
[1] 10 11 12 13 14 15
相關文章
相關標籤/搜索