python字符串處理

時間 2020-01-08

原文原文鏈接

字符串

字符串：不可變有序序列，在python可以使用 "abc" , """abc""" ,'abc' 的形式表示，屬於一種字面常量，python3中字符均屬於Unicode編碼。python

字符串能夠被迭代，遍歷，切片，索引
字符串一旦定義不可增長、刪除、修改自身元素。對字符串的修改操做實質是建立一個新的字符串對象從新賦值
```
# 迭代遍歷
s = " this is a string\n"
for i in s:
print(i)
# 索引,切片
print(s[4], s[2:5],s[2,-3])
```

字符串遍歷生成列表，元組

lis = list(s)                 # 遍歷 s 中的每個元素生成，組成一個列表返回
tp = tuple(s)              # 遍歷 s 中的每個元素生成，組成一個元組返回

列表，元組生成字符串git

# 列表，元組，拼接爲字符串，使用str.join()方法
lis = ['h','e','l','l','o']
# 遍歷lis,將lis中的每個元素拼接爲字符串並返回，
# 注：lis中每個元素必須是字符串，不然不能拼接，可用str()轉換
num_list = [1,2,3,4,5]
# s2 = "".join(num_list)   ==> 將會報錯，沒法拼接數值
s1 = "".join(map(str, num_list))        # 將數值所有轉化爲字符串拼接

s2 = "".join(lis) 
# 也能夠指定 " ,"做爲鏈接符號進行拼接
s3 = ","join(lis)        
print(s1, s2, s3)

字符串的經常使用方法

字符串的拼接：

返回的是一個全新的字符串對象，s1,s2自己均未發生改變api

s3 = s1 + s2

字符串分割

s = "this is a string\n"
s_1 = s.split()         # 以連續空白進行分割，返回lis  ==> ['this', 'is', 'a', 'string']
s_2 = s.split(" ")     # 以空字符進行分割, \n不是空字符 ==>  ['this', 'is', 'a', 'string\n']
s_3 = s.rsplit(" ",maxsplit=2)    # 右邊開始，最大切割次數爲 2次  ==> ['this is', 'a', 'string\n']

# 行切割，splitlines()
s = "firstline \n secondline \r\n thirdline \r"
s_4 = s.splitlines()                   #  ==> ['firstline ', ' secondline ', ' thirdline ']
s_5 = s.splitlines(True)           # 保留切割符 ==> ['firstline \n', ' secondline \r\n', ' thirdline \r']
 # 只切割一次，不管是否切割，返回 3個元素的元組，pre,seq,tail）==>('this', ' ', 'is a string\n')
s_6 = s.partition(" ") 
# 從字符串右邊開始進行分割  ==> ('firstline \n secondline \r\n thirdline', ' ', '\r')
s_6 = s.rpartition(" ")

其餘方法

s = "HEllo world"
print(s.upper())           # ==>  HELLO WORLD  全大寫
print(s.capitalize())     # ==>  Hello world  首部大寫
print(s.title())              # ==>  Hello world  駝峯原則
print(s.lower())           # ==>  hello world  全小寫
print(s.swapcase())   # ==>  heLLO WORLD  交換大小寫

格式化輸出

s = "hello"
print(s.center(20, "#"))            # #######hello########  指定寬度和填充字符,居中
print(s.zfill(20))                       # 000000000000000hello  右對齊，0填充
print(s.ljust(20, "#"))               # hello###############  指定寬度和填充字符，左對齊
print(s.rjust(20, "#"))              # ###############hello  指定寬度和填充字符，右對齊

字符替換

s = "heffo worfd"
print(s.replace("f", "l"))             #  hello world
print(s.replace("f", "l", 2))         #  hello worfd
print(s.replace("ff", "l"))            #  helo worfd
print(s)                                     #  heffo worfd   均返回新的字符串，s 未作任何改變

# 批量替換
in_tab = "abxy"  
out_tab = "1234"  
str_trantab = str.maketrans(in_tab,out_tab)   # 建立一個轉化表, abcd --> 1234
s = "abcdefghijklmnopqrstuvwxyz"  
s.translate(str_trantab)      # s將會根據轉換表替換對應的字符 ==> 12cdefghijklmnopqrstuvw34z

strip() 刪除兩端字符

s = " \t   hello world   \n  "
print(s.strip())                         #  'hello world'
print(s.strip("\t"))                    #  '   hello world   \n ' 
print(s.strip("\n"))                   #  ' \t   hello world   \n  '
print(s.strip(""))                      #  '\t   hello world   \n'
print(s.strip(" h\t"))                 #  'ello world'

s.lstrip()                                # 只對字符串左邊進行操做
s.rstrip()                               # 只對字符串右邊進行操做

字符串查找

# find("",start ,stop) ,# 指定查找的字符，並能夠選自查找範圍，start -> stop 索引範圍
# 找到匹配字符串返回索引，未找到返回 -1 
s = "this is a long long long string"
print(s.find("a"))                     # 返回索引號   8
print(s.find("long"))                # 返回第一索引號 10
print(s.find("long", 11, -1))     # 從索引11開始查找，第一個long索引爲 15

print(s.count("long", 15, -1))  #從索引15開始查找，只能找到後面兩個long

# 字符串的查找都須要對字符串進行遍歷，當字符串較長，將會耗費較大的時間

# endswith, startswith 判斷字符串是否使用該字符結尾或者開頭
# 例如：檢查一個文件是不是Python文件格式
file_name = "hello.py"
file_name.endswith(".py")  # 是不是.py結尾
"hello".startswith("he")   # 是否 he開頭

字符檢測方法，返回 True或者 False

# 字符檢測方法， 返回 True或者 False

# endswith, startswith 判斷字符串是否使用該字符結尾或者開頭
# 例如：檢查一個文件是不是Python文件格式
file_name = "hello.py"
file_name.endswith(".py")     # 是不是.py結尾
"hello".startswith("he")          # 是否 he開頭

"hell_fa".isidentifier()            # 檢查是否符合標識符規則，是否是字母和下劃線開頭，其餘都是字母數字、下劃線
"abc".isalpha()                     # 是否所有爲字母
"123".isdigit()                       # 10進制數字
"123".isdecimal()                 # (0-9)的數字
"123abc".isalnum()              # 是不是字母或者數字 
"abc".islower()                     # 全小寫
"ABC".isupper()                   # 全大寫
"\n\t\f\r".isspace()                # 只包含空白, \t\n\r\f等均爲空白字符
# 以結果所有爲 True

字符串格式化輸出

1. c語言風格格式化輸出ide

s = "hello world"
print("%s,%r" %(s,s))  # %s 調用字符串對象的 __str__方法輸出，%r會調用__repr__ 方法，輸出不一樣
#  輸出 hello world,   'hello world'

試例：函數

s = "hello world"
class String(str):
    def __str__(self):
        return "__str__:{}".format(super().__repr__())       

    def __repr__(self):
        return "__repr__:{}".format(super().__repr__())

string = String(s)
print("%s, %r" %(string,string))
#  %s 輸出結果： __str__:'hello world', %r 輸出結果： __repr__:'hello world'
# %s 調用了對象的 __str__方法，%r 調用了__repr__ 方法

經常使用輸出形式this

"%d" %10              #'10'
"%#d" %10            #'   10'
"%-05d" %10         #'10   '
"%d" %10              #'10'
# 可經過以上方式設置對齊方式和寬度等

"%f" %10            #'10.000000'
"%5.3f" %10         #'10.000'
"%10.2f" %10        #'     10.00'
"%-10.2f" %10       #'10.00     '

進制轉化編碼

# 無前綴
"%x"%10               # ==> 'a'
"%o"%10               # ==> '12'
# 帶前綴                               
"%#x"%10               # ==> '0xa'
"%#o"%10               # ==> '0o12'

# 對其方式，寬度，顯示精度都可設置

# 科學計數法表示
"%-10.2e" %10       #'1.00e+01  '

# 百分數表示
"%-.2f%%" %10       #'10.00%' 在值後面拼接 %， %% ==> 輸出一個%

format 格式化函數

# format 格式化函數
a,b,c = 1,2,"tom"
"{}, {name}, {}".format(a,b,name=c)  # 位置參數和關鍵字參數傳入 ==> '1, tom, 2'

# 設置填充字符 "#", 對齊方式，寬度
"{}".format(10)           #'10'
"{:#<10}".format(10)      #'10########'   
"{:0>10}".format(10)      #'0000000010'
"{:$^10}".format(10)      #'$$$$10$$$$'
#進制轉化
"{:x}".format(10)         #'a'     16進制
"{:o}".format(10)         #'12'    8進制
"{:b}".format(10)         #'1010'  2進制

#帶進制前綴符輸出
"{:#b}".format(10)        #'0b1010'  0b表示2進制
"{:#x}".format(10)        #'0xa'     0x表示16進制
"{:#o}".format(10)        #'0o12'    0o表示8進制

# 百分號表示，科學計數
"{:.2%}".format(0.5)      #'50.00%'  計算的結果轉化爲 % 形式表示
"{:.2e}".format(0.5)      #'5.00e-01'
"{:.2E}".format(50)       #''5.00E+01'