字符串操做通常分割、拼接、替換、提取等等正則表達式
strsplit默認輸出格式爲列表app
strsplit(x, split, fixed = FALSE, perl = FALSE, useBytes = FALSE)
x <- c(as = "asfef", qu = "qwerty", "yuiop[", "b", "stuff.blah.yech") strsplit(x, "e")
運行結果:函數
$`as` $qu [[3]] [[4]] [[5]] [1] "asf" "f" [1] "qw" "rty" [1] "yuiop[" [1] "b" [1] "stuff.blah.y" "ch"
stringr包中的str_split函數與標準庫中的strsplit同樣ui
str_split(string, pattern, n = Inf, simplify = FALSE)
library(stringr) fruits <- c( "apples and oranges and pears and bananas","pineapples and mangos and guavas") str_split(fruits, " and ")
運行結果:code
[[1]] [[2]] [1] "apples" "oranges" "pears" "bananas" [1] "pineapples" "mangos" "guavas"
paste和paste0之間的區別是拼接的字符之間是否帶有空格對象
paste (..., sep = " ", collapse = NULL) paste0(..., collapse = NULL)
paste0(1:12, c("st", "nd", "rd", rep("th", 9))) # 結果 [1] "1st" "2nd" "3rd" "4th" "5th" "6th" "7th" "8th" "9th" "10th" "11th" "12th" paste(1:12, c("st", "nd", "rd", rep("th", 9))) # 結果 "1 st" "2 nd" "3 rd" "4 th" "5 th" "6 th" "7 th" "8 th" "9 th" "10 th" "11 th" "12 th" paste(1:12, c("st", "nd")) # 結果 [1] "1 st" "2 nd" "3 st" "4 nd" "5 st" "6 nd" "7 st" "8 nd" "9 st" "10 nd" "11 st" "12 nd" paste0(1:12, c("st", "nd")) # 結果 [1] "1st" "2nd" "3st" "4nd" "5st" "6nd" "7st" "8nd" "9st" "10nd" "11st" "12nd" paste("I","love","you") # 結果 [1] "I love you" paste0("I","love","you") # 結果 [1] "Iloveyou"
str_c(..., sep = "", collapse = NULL)
str_c和paste0函數同樣排序
str_c(1:12, c("st", "nd", "rd", rep("th", 9))) # 結果 [1] "1st" "2nd" "3rd" "4th" "5th" "6th" "7th" "8th" "9th" "10th" "11th" "12th" str_c(1:12, c("st", "nd")) # 結果 [1] "1st" "2nd" "3st" "4nd" "5st" "6nd" "7st" "8nd" "9st" "10nd" "11st" "12nd" str_c("I","love","you") # 結果 [1] "Iloveyou"
chartr(old, new, x)
chartr(old = "a",new = "c",c("a123","a15","a23")) # 結果 [1] "c123" "c15" "c23" chartr(old = "a12345",new = "c6789101456",c("a123","a15","a23")) # 結果 [1] "c678" "c61" "c78" # 拿a15說明,a在old中下標爲1,便替換爲new[1]。1在old中下標爲2,因此替換爲new[2]。5在old中下標爲6,因此替換爲new[6],因此最後a15替換爲c61。 chartr(old = "a1",new = "c4",c("a123","a15","a23")) # 結果 [1] "c423" "c45" "c23"
sub能夠替換字符串,可是sub()函數不會對原字符串進行操做。因此須要建立一個變量來儲存該操做後的字符串。另外,sub函數只會替換匹配到的第一個three
sub(pattern, replacement, x, ignore.case = FALSE, perl = FALSE, fixed = FALSE, useBytes = FALSE)
str <- "Now is the time " sub(" +$", " 12:00", str) #正則表達式,即str尾部的空格替換爲12:00 # 結果 "Now is the time 12:00" # 此時咱們只是調用了sub函數,卻沒有保存這個結果。並且該函數不會對原函數操做的。 print(str) "Now is the time " sub("Now","what",str) # 結果 [1] "what is the time " sub(pattern = "nd",replacement = "ND",c("andbndcnd","sndendfund")) # 結果,字符串元素中有不少"nd",可是隻會替換第一個"nd"。 [1] "aNDbndcnd" "sNDendfund"
gsub(pattern, replacement, x, ignore.case = FALSE, perl = FALSE, fixed = FALSE, useBytes = FALSE)
gsub()函數和sub用法同樣,不過,gsub()函數能夠替換全部匹配字符字符串
gsub(pattern = "nd",replacement = "ND",c("andbndcnd","sndendfund")) # 結果 [1] "aNDbNDcND" "sNDeNDfuND"
這兩個函數能夠提取、替換字符串。並且是對原字符串進行操做string
substr(x, start, stop) <- value substring(text, first, last = 1000000L) <- value
shopping_list <- c("apples x4", "bag of flour", "bag of sugar", "milk x2") substr(shopping_list,1,3) <- "AAA" # 結果 [1] "AAAles x4" "AAA of flour" "AAA of sugar" "AAAk x2" substr(shopping_list,1) <- "AAA" # 結果 [1] "AAAles x4" "AAA of flour" "AAA of sugar" "AAAk x2" substr(shopping_list,1,20) <- "yesterday once more" # 結果 [1] "yesterday" "yesterday on" "yesterday on" "yesterd" substring(shopping_list,1) <- "yesterday once more" # 結果 [1] "yesterday" "yesterday on" "yesterday on" "yesterd"
第三方包中的str_replace和str_replace_all
str_replace(string, pattern, replacement) # 和sub同樣,只替換第一個匹配字符 str_replace_all(string, pattern, replacement) # 和gsub同樣,替換全部匹配字符
fruits <- c("one apple", "two pears", "three bananas") str_replace(fruits, "[aeiou]", "-") #正則表達式,即對字符串中的小寫字母a或e或i或o或u,替換爲- # 結果 [1] "-ne apple" "tw- pears" "thr-e bananas" str_replace_all(fruits, "[aeiou]", "-") # 結果 [1] "-n- -ppl-" "tw- p--rs" "thr-- b-n-n-s"
第三方包stringr
str_sub(string, start = 1L, end = -1L, omit_na = FALSE) <- value
shopping_list <- c("apples x4", "bag of flour", "bag of sugar", "milk x2") str_sub(shopping_list,1,3) <- "AAA" # 結果 [1] "AAAles x4" "AAA of flour" "AAA of sugar" "AAAk x2" str_sub(shopping_list,1) <- "AAA" # 結果 [1] "AAA" "AAA" "AAA" "AAA"
substr(x, start, stop) substring(text, first, last = 1000000L)
substr("abcdef", 2, 4) # 結果 "bcd" substr("abcdef", 1:6, 1:6) # 結果 "a","b","c","d","d","e"
第三方包stringr
str_extract(string, pattern) str_extract_all(string, pattern, simplify = FALSE)
shopping_list <- c("apples x4", "bag of flour", "bag of sugar", "milk x2") str_extract(shopping_list, "[a-z]+") # 結果 [1] "apples" "bag" "bag" "milk" str_extract_all(shopping_list, "[a-z]+") # 結果 [[1]] [[2]] [[3]] [[4]] [1] "apples" "x" [1] "bag" "of" "flour" [1] "bag" "of" "sugar" [1] "milk" "x"
第三方包stringr
str_sub(string, start = 1L, end = -1L)
str_sub(shopping_list,1,5) # 結果 [1] "apple" "bag o" "bag o" "milk "
nchar(x, type = "chars", allowNA = FALSE, keepNA = NA) #以字符串爲向量,返回向量元素--字符串的長度組成的向量 nzchar(x, keepNA = FALSE) #快速斷定字符串向量元素是否爲非空值
shopping_list <- c("apples x4", "bag of flour", "bag of sugar", "milk x2") nchar(shopping_list) # 結果 [1] 9 12 12 7 nzchar(shopping_list) # 結果 [1] TRUE TRUE TRUE TRUE
str_count(string, pattern = "")
str_count不只能夠測定元素長度,還以測定某字符在字符串中的下標位置
str_count(shopping_list) # 結果 [1] 9 12 12 7 str_count(shopping_list, "a") # 結果,若是不包含則返回0 [1] 1 1 2 0
第三方包stringr
shopping_list <- c("apples x4", "bag of flour", "bag of sugar", "milk x2") str_length(string)
str_length(shopping_list) # 結果 [1] 9 12 12 7
grep(pattern, x, ignore.case = FALSE, perl = FALSE, value = FALSE, fixed = FALSE, useBytes = FALSE, invert = FALSE)
shopping_list <- c("apples x4", "bag of flour", "bag of sugar", "milk x2") grep("apple",shopping_list) # 結果 [1] 1 grep("apple",shopping_list,value = T) # 結果 [1] "apples x4"
grepl(pattern, x, ignore.case = FALSE, perl = FALSE, fixed = FALSE, useBytes = FALSE)
grepl和grep的用法差很少,只是grepl返回的是邏輯變量TRUE或FALSE
grepl("apple",shopping_list) # 結果 [1] TRUE FALSE FALSE FALSE
str_subset(string, pattern, negate = FALSE)
fruit <- c("apple", "banana", "pear", "pinapple") str_subset(fruit, "a") #匹配全部含有a的字符串 # 結果 [1] "apple" "banana" "pear" "pinapple" str_subset(fruit, "^p", negate = TRUE) # 返回全部不以p開頭的字符串 # 結果 [1] "apple" "banana"
str_which(string, pattern, negate = FALSE)
str_which(fruit, "a") # 結果 [1] 1 2 3 4
str_sort(x, decreasing = FALSE, na_last = TRUE, locale = "en", numeric = FALSE, ...)
x <- c("100a10", "100a5", "2b", "2a") str_sort(x) # 結果 [1] "100a10" "100a5" "2a" "2b" str_sort(x, numeric = TRUE) # 結果 [1] "2a" "2b" "100a5" "100a10"