1.如何拆分含有多種分隔符的字符串¶

#解決方案1.連續使用str.split()，每一次處理一種分隔符號；
def mySplit(s,ds):
    res = [s]
    for i in ds:
        t = []
        # 普通方法
        #for x in res:
        #  t.extend(x.split(i))
 
        # 列表解析
        #[t.extend(y.split(i)) for y in res if y]
 
        # map高階函數,map生成可迭代對象
        for z in map(lambda x: x.split(i), res):
          t.extend(z)
 
        # 以上三個方法均可以解決問題       
        res = t
 
    # 去除空字符串
    return [x for x in res if x]

s="ab;cd|efg|hi,jkl|mn\topq;rst,uvw\txyz"

print(mySplit(s,",;|\t"))

['ab', 'cd', 'efg', 'hi', 'jkl', 'mn', 'opq', 'rst', 'uvw', 'xyz']

#解決方案2.使用正則表達式的re.split()，一次性拆分字符串。
import re

s="ab;cd|efg|hi,jkl|mn\topq;rst,uvw\txyz"

re.split(r'[,;|\t]+',s)

['ab', 'cd', 'efg', 'hi', 'jkl', 'mn', 'opq', 'rst', 'uvw', 'xyz']

2.如何判斷字符串a是否以字符串b開頭或結尾¶

import os,stat

os.listdir('.')

['.ipynb_checkpoints',
 'graph.py',
 'stack.cpp',
 'heap.java',
 'install.sh',
 'quicksort.c',
 '複雜場景下字符串處理相關問題與解決技巧.ipynb']

s = 'heap.java'

s.endswith('.java')

True

#endswith能夠接受一個元組(不能使列表)爲參數，知足其中之一，就會返回True
[name for name in os.listdir('.') if name.endswith(('.sh','.py'))]

['graph.py', 'install.sh']

#st_mode是以文件權限相關的
os.stat('graph.py').st_mode

33252

#轉換成八進制
oct(os.stat('graph.py').st_mode)

'0o100744'

os.chmod('graph.py',os.stat('graph.py').st_mode | stat.S_IXUSR)

ls -l

總用量 24
-rwxr--r-- 1 zhou zhou     0 12月 14 14:55 graph.py*
-rw-r--r-- 1 zhou zhou     0 12月 14 14:56 heap.java
-rw-r--r-- 1 zhou zhou     0 12月 14 14:56 install.sh
-rw-r--r-- 1 zhou zhou     1 12月 14 14:55 quicksort.c
-rw-r--r-- 1 zhou zhou     0 12月 14 14:56 stack.cpp
-rw-r--r-- 1 zhou zhou 17606 12月 14 18:03 複雜場景下字符串處理相關問題與解決技巧.ipynb

3.如何調整字符串中文本的格式¶

cat /var/log/alternatives.log

update-alternatives 2018-12-10 09:40:25: run with --install /usr/share/gnome-shell/theme/gdm3.css gdm3.css /usr/share/gnome-shell/theme/ubuntu.css 10
update-alternatives 2018-12-10 09:40:33: run with --install /usr/bin/gnome-www-browser gnome-www-browser /usr/bin/firefox 40
update-alternatives 2018-12-10 09:40:33: run with --install /usr/bin/x-www-browser x-www-browser /usr/bin/firefox 40
update-alternatives 2018-12-10 09:42:23: run with --install /usr/bin/gstreamer-codec-install gstreamer-codec-install /usr/lib/packagekit/pk-gstreamer-install 80
update-alternatives 2018-12-10 09:43:43: run with --install /usr/bin/x-window-manager x-window-manager /usr/bin/mutter 60 --slave /usr/share/man/man1/x-window-manager.1.gz x-window-manager.1.gz /usr/share/man/man1/mutter.1.gz

log = open('/var/log/alternatives.log').read()

import re
#按照順序編號
print(re.sub('(\d{4})-(\d{2})-(\d{2})',r'\2/\3/\1',log))

update-alternatives 12/10/2018 09:40:25: run with --install /usr/share/gnome-shell/theme/gdm3.css gdm3.css /usr/share/gnome-shell/theme/ubuntu.css 10
update-alternatives 12/10/2018 09:40:33: run with --install /usr/bin/gnome-www-browser gnome-www-browser /usr/bin/firefox 40
update-alternatives 12/10/2018 09:40:33: run with --install /usr/bin/x-www-browser x-www-browser /usr/bin/firefox 40
update-alternatives 12/10/2018 09:42:23: run with --install /usr/bin/gstreamer-codec-install gstreamer-codec-install /usr/lib/packagekit/pk-gstreamer-install 80
update-alternatives 12/10/2018 09:43:43: run with --install /usr/bin/x-window-manager x-window-manager /usr/bin/mutter 60 --slave /usr/share/man/man1/x-window-manager.1.gz x-window-manager.1.gz /usr/share/man/man1/mutter.1.gz

print(re.sub('(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})',r'\g<month>/\g<day>/\g<year>',log))

update-alternatives 12/10/2018 09:40:25: run with --install /usr/share/gnome-shell/theme/gdm3.css gdm3.css /usr/share/gnome-shell/theme/ubuntu.css 10
update-alternatives 12/10/2018 09:40:33: run with --install /usr/bin/gnome-www-browser gnome-www-browser /usr/bin/firefox 40
update-alternatives 12/10/2018 09:40:33: run with --install /usr/bin/x-www-browser x-www-browser /usr/bin/firefox 40
update-alternatives 12/10/2018 09:42:23: run with --install /usr/bin/gstreamer-codec-install gstreamer-codec-install /usr/lib/packagekit/pk-gstreamer-install 80
update-alternatives 12/10/2018 09:43:43: run with --install /usr/bin/x-window-manager x-window-manager /usr/bin/mutter 60 --slave /usr/share/man/man1/x-window-manager.1.gz x-window-manager.1.gz /usr/share/man/man1/mutter.1.gz

4.如何將多個小字符串拼接成一個大的字符串¶

s1 = 'abcdefg'

s2 = '12345'

s1 + s2

'abcdefg12345'

str.__add__(s1,s2)

'abcdefg12345'

s1 > s2

True

str.__gt__(s1,s2)

True

pl = ['<0112>','<32>','<1024x768>','<60>','<1>','<100.0>','<500.0>']

s = ''

for p in pl:
    s += p
    print(s)
    #存在資源浪費

<0112>
<0112><32>
<0112><32><1024x768>
<0112><32><1024x768><60>
<0112><32><1024x768><60><1>
<0112><32><1024x768><60><1><100.0>
<0112><32><1024x768><60><1><100.0><500.0>

s

'<0112><32><1024x768><60><1><100.0><500.0>'

# 不存在臨時變量的浪費
"".join(pl)

'<0112><32><1024x768><60><1><100.0><500.0>'

# 存在數字和字符串
l = ['abc',123,45,'xyz']

# 列表解析
''.join([str(x) for x in l])

'abc12345xyz'

# 生成器表達式,開銷比列表解析式小
''.join(str(x) for x in l)

'abc12345xyz'

5.如何對字符串進行左, 右, 居中對齊¶

s = 'abc'

s.ljust(20)

'abc                 '

s.ljust(20,'=')

'abc================='

s.rjust(20)

'                 abc'

len(s.rjust(20))

20

s.center(20)

'        abc         '

s = 'abc'

format(s,'<20')

'abc                 '

format(s,'>20')

'                 abc'

format(s,'^20')

'        abc         '

d = {
    "a":100,
    "as":0.01,
    "wer":500.0,
    "cc":12
}

d

{'a': 100, 'as': 0.01, 'wer': 500.0, 'cc': 12}

d.keys()

dict_keys(['a', 'as', 'wer', 'cc'])

# 經過map找出key的長度
list(map(len,d.keys()))

[1, 2, 3, 2]

max(list(map(len,d.keys())))

3

w = max(list(map(len,d.keys())))

for k in d:
    print(k.ljust(w),':',d[k])

a   : 100
as  : 0.01
wer : 500.0
cc  : 12

6.如何去掉字符串中不須要的字符¶

s = '  abc  123   '

s.strip()

'abc  123'

s.lstrip()

'abc  123   '

s.rstrip()

'  abc  123'

s = '+++abc---'

s.strip('+-')

'abc'

s = 'abc:123'

s[:3] + s[4:]

'abc123'

s = '\tabc\t123\txyz'

s

'\tabc\t123\txyz'

# 替換單個字符
s.replace('\t','')

'abc123xyz'

s = '\tabc\t123\txyz\ropt\r'

import re

# 替換多個不一樣字符
re.sub('[\t\r]','',s)

'abc123xyzopt'

s = 'abc123def456xyz'

a = s.maketrans('abcxyz','xyzabc')

a

{97: 120, 98: 121, 99: 122, 120: 97, 121: 98, 122: 99}

s.translate(a)

'xyz123def456abc'

t = 'abc\refg\n234\t'

remap = {
    # ord返回ascii值
    ord('\t'): '',
    ord('\n'): '',
    ord('\r'): None
    }

t.translate(remap)

'abcefg234'

import sys
import unicodedata
s = 'āáǎà ōóǒò ēéěè īíǐì'
remap = {
    # ord返回ascii值
    ord('\t'): '',
    ord('\f'): '',
    ord('\r'): None
    }
# 去除\t, \f, \r
a = s.translate(remap)
'''
　　經過使用dict.fromkeys() 方法構造一個字典，每一個Unicode 和音符做爲鍵，對於的值所有爲None
　　而後使用unicodedata.normalize() 將原始輸入標準化爲分解形式字符
　　sys.maxunicode : 給出最大Unicode代碼點的值的整數，即1114111（十六進制的0x10FFFF）。
　　unicodedata.combining:將分配給字符chr的規範組合類做爲整數返回。 若是未定義組合類，則返回0。
'''
cmb_chrs = dict.fromkeys(c for c in range(sys.maxunicode) if unicodedata.combining(chr(c))) #此部分建議拆分開來理解
b = unicodedata.normalize('NFD', a)
'''
　　　調用translate 函數刪除全部重音符
'''
print(b.translate(cmb_chrs))

aaaa oooo eeee iiii

Python複雜場景下字符串處理相關問題與解決技巧

1.如何拆分含有多種分隔符的字符串¶

2.如何判斷字符串a是否以字符串b開頭或結尾¶

3.如何調整字符串中文本的格式¶

4.如何將多個小字符串拼接成一個大的字符串¶

5.如何對字符串進行左, 右, 居中對齊¶

6.如何去掉字符串中不須要的字符¶