原文連接:http://blog.chinaunix.net/uid-25992400-id-3283846.htmlhtml
任何語言都離不開字符,那就會涉及對字符的操做,尤爲是腳本語言更是頻繁,無論是生產環境仍是面試考驗都要面對字符串的操做。 python
字符串屬性方法git
字符串格式輸出對齊面試
1.>>> str='stRINg lEArn'
2.>>>
3.>>> str.center(20) #生成20個字符長度,str排中間
4.' stRINg lEArn '
5.>>>
6.>>> str.ljust(20) #str左對齊
7.'stRINg lEArn '
8.>>>
9.>>> str.rjust(20) #str右對齊
10.' stRINg lEArn'
11.>>>
12.>>> str.zfill(20) #str右對齊,左邊填充0
13.'00000000stRINg lEArn'api
大小寫轉換app
1.>>> str='stRINg lEArn'
2.>>>
3.>>> str.upper() #轉大寫
4.'STRING LEARN'
5.>>>
6.>>> str.lower() #轉小寫
7.'string learn'
8.>>>
9.>>> str.capitalize() #字符串首爲大寫,其他小寫
10.'String learn'
11.>>>
12.>>> str.swapcase() #大小寫對換
13.'STrinG LeaRN'
14.>>>
15.>>> str.title() #以分隔符爲標記,首字符爲大寫,其他爲小寫
16.'String Learn'less
字符串條件判斷ide
1.>>> str='0123'
2.>>> str.isalnum() #是否全是字母和數字,並至少有一個字符
3.True
4.>>> str.isdigit() #是否全是數字,並至少有一個字符
5.True
6.
7.>>> str='abcd'
8.>>> str.isalnum()
9.True
10.>>> str.isalpha() #是否全是字母,並至少有一個字符
11.True
12.>>> str.islower() #是否全是小寫,當全是小寫和數字一塊兒時候,也判斷爲True
13.True
14.
15.>>> str='abcd0123'
16.>>> str.islower() #同上
17.True
18.>>> str.isalnum()
19.True
20.
21.>>> str=' '
22.>>> str.isspace() #是否全是空白字符,並至少有一個字符
23.True
24.>>> str='ABC'
25.>>> str.isupper() #是否全是大寫,當全是大寫和數字一塊兒時候,也判斷爲True
26.True
27.>>> str='Abb Acc'
28.>>> str.istitle() #全部單詞字首都是大寫,標題
29.True
30.
31.>>> str='string learn'
32.>>> str.startswith('str') #判斷字符串以'str'開頭
33.True
34.>>> str.endswith('arn') #判讀字符串以'arn'結尾
35.True函數
字符串搜索定位與替換oop
1.>>> str='string lEARn'
2.>>>
3.>>> str.find('a') #查找字符串,沒有則返回-1,有則返回查到到第一個匹配的索引
4.-1
5.>>> str.find('n')
6.4
7.>>> str.rfind('n') #同上,只是返回的索引是最後一次匹配的
8.11
9.>>>
10.>>> str.index('a') #若是沒有匹配則報錯
11.Traceback (most recent call last):
12. File "<stdin>", line 1, in <module>
13.ValueError: substring not found
14.>>> str.index('n') #同find相似,返回第一次匹配的索引值
15.4
16.>>> str.rindex('n') #返回最後一次匹配的索引值
17.11
18.>>>
19.>>> str.count('a') #字符串中匹配的次數
20.0
21.>>> str.count('n') #同上
22.2
23.>>>
24.>>> str.replace('EAR','ear') #匹配替換
25.'string learn'
26.>>> str.replace('n','N')
27.'striNg lEARN'
28.>>> str.replace('n','N',1)
29.'striNg lEARn'
30.>>>
31.>>>
32.>>> str.strip('n') #刪除字符串首尾匹配的字符,一般用於默認刪除回車符
33.'string lEAR'
34.>>> str.lstrip('n') #左匹配
35.'string lEARn'
36.>>> str.rstrip('n') #右匹配
37.'string lEAR'
38.>>>
39.>>> str=' tab'
40.>>> str.expandtabs() #把製表符轉爲空格
41.' tab'
42.>>> str.expandtabs(2) #指定空格數
43.' tab'
字符串編碼與解碼
1.>>> str='字符串學習'
2.>>> str
3.'xe5xadx97xe7xacxa6xe4xb8xb2xe5xadxa6xe4xb9xa0'
4.>>>
5.>>> str.decode('utf-8') #解碼過程,將utf-8解碼爲unicode
6.u'u5b57u7b26u4e32u5b66u4e60'
7.
8.>>> str.decode('utf-8').encode('gbk') #編碼過程,將unicode編碼爲gbk
9.'xd7xd6xb7xfbxb4xaexd1xa7xcfxb0'
10.>>> str.decode('utf-8').encode('utf-8') #將unicode編碼爲utf-8
11.'xe5xadx97xe7xacxa6xe4xb8xb2xe5xadxa6xe4xb9xa0'
字符串分割變換
1.>>> str='Learn string'
2.>>> '-'.join(str)
3.'L-e-a-r-n- -s-t-r-i-n-g'
4.>>> l1=['Learn','string']
5.>>> '-'.join(l1)
6.'Learn-string'
7.>>>
8.>>> str.split('n')
9.['Lear', ' stri', 'g']
10.>>> str.split('n',1)
11.['Lear', ' string']
12.>>> str.rsplit('n',1)
13.['Learn stri', 'g']
14.>>>
15.>>> str.splitlines()
16.['Learn string']
17.>>>
18.>>> str.partition('n')
19.('Lear', 'n', ' string')
20.>>> str.rpartition('n')
21.('Learn stri', 'n', 'g')
string模塊源代碼
1."""A collection of string operations (most are no longer used).2.3.Warning: most of the code you see here isn't normally used nowadays.4.Beginning with Python 1.6, many of these functions are implemented as5.methods on the standard string object. They used to be implemented by6.a built-in module called strop, but strop is now obsolete itself.7.8.Public module variables:9.10.whitespace -- a string containing all characters considered whitespace11.lowercase -- a string containing all characters considered lowercase letters12.uppercase -- a string containing all characters considered uppercase letters13.letters -- a string containing all characters considered letters14.digits -- a string containing all characters considered decimal digits15.hexdigits -- a string containing all characters considered hexadecimal digits16.octdigits -- a string containing all characters considered octal digits17.punctuation -- a string containing all characters considered punctuation18.printable -- a string containing all characters considered printable19.20."""21.22.# Some strings for ctype-style character classification23.whitespace = ' tnrvf'24.lowercase = 'abcdefghijklmnopqrstuvwxyz'25.uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'26.letters = lowercase + uppercase27.ascii_lowercase = lowercase28.ascii_uppercase = uppercase29.ascii_letters = ascii_lowercase + ascii_uppercase30.digits = '0123456789'31.hexdigits = digits + 'abcdef' + 'ABCDEF'32.octdigits = '01234567'33.punctuation = """!"#$%&'()*+,-./:;<=>?@[]^_`{|}~"""34.printable = digits + letters + punctuation + whitespace35.36.# Case conversion helpers37.# Use str to convert Unicode literal in case of -U38.l = map(chr, xrange(256))39._idmap = str('').join(l)40.del l41.42.# Functions which aren't available as string methods.43.44.# Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def".45.def capwords(s, sep=None):46. """capwords(s [,sep]) -> string47.48. Split the argument into words using split, capitalize each49. word using capitalize, and join the capitalized words using50. join. If the optional second argument sep is absent or None,51. runs of whitespace characters are replaced by a single space52. and leading and trailing whitespace are removed, otherwise53. sep is used to split and join the words.54.55. """56. return (sep or ' ').join(x.capitalize() for x in s.split(sep))57.58.59.# Construct a translation string60._idmapL = None61.def maketrans(fromstr, tostr):62. """maketrans(frm, to) -> string63.64. Return a translation table (a string of 256 bytes long)65. suitable for use in string.translate. The strings frm and to66. must be of the same length.67.68. """69. if len(fromstr) != len(tostr):70. raise ValueError, "maketrans arguments must have same length"71. global _idmapL72. if not _idmapL:73. _idmapL = list(_idmap)74. L = _idmapL[:]75. fromstr = map(ord, fromstr)76. for i in range(len(fromstr)):77. L[fromstr[i]] = tostr[i]78. return ''.join(L)79.80.81.82.####################################################################83.import re as _re84.85.class _multimap:86. """Helper class for combining multiple mappings.87.88. Used by .{safe_,}substitute() to combine the mapping and keyword89. arguments.90. """91. def __init__(self, primary, secondary):92. self._primary = primary93. self._secondary = secondary94.95. def __getitem__(self, key):96. try:97. return self._primary[key]98. except KeyError:99. return self._secondary[key]100.101.102.class _TemplateMetaclass(type):103. pattern = r"""104. %(delim)s(?:105. (?P<escaped>%(delim)s) | # Escape sequence of two delimiters106. (?P<named>%(id)s) | # delimiter and a Python identifier107. {(?P<braced>%(id)s)} | # delimiter and a braced identifier108. (?P<invalid>) # Other ill-formed delimiter exprs109. )110. """111.112. def __init__(cls, name, bases, dct):113. super(_TemplateMetaclass, cls).__init__(name, bases, dct)114. if 'pattern' in dct:115. pattern = cls.pattern116. else:117. pattern = _TemplateMetaclass.pattern % {118. 'delim' : _re.escape(cls.delimiter),119. 'id' : cls.idpattern,120. }121. cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE)122.123.124.class Template:125. """A string class for supporting $-substitutions."""126. __metaclass__ = _TemplateMetaclass127.128. delimiter = '$'129. idpattern = r'[_a-z][_a-z0-9]*'130.131. def __init__(self, template):132. self.template = template133.134. # Search for $$, $identifier, ${identifier}, and any bare $'s135.136. def _invalid(self, mo):137. i = mo.start('invalid')138. lines = self.template[:i].splitlines(True)139. if not lines:140. colno = 1141. lineno = 1142. else:143. colno = i - len(''.join(lines[:-1]))144. lineno = len(lines)145. raise ValueError('Invalid placeholder in string: line %d, col %d' %146. (lineno, colno))147.148. def substitute(self, *args, **kws):149. if len(args) > 1:150. raise TypeError('Too many positional arguments')151. if not args:152. mapping = kws153. elif kws:154. mapping = _multimap(kws, args[0])155. else:156. mapping = args[0]157. # Helper function for .sub()158. def convert(mo):159. # Check the most common path first.160. named = mo.group('named') or mo.group('braced')161. if named is not None:162. val = mapping[named]163. # We use this idiom instead of str() because the latter will164. # fail if val is a Unicode containing non-ASCII characters.165. return '%s' % (val,)166. if mo.group('escaped') is not None:167. return self.delimiter168. if mo.group('invalid') is not None:169. self._invalid(mo)170. raise ValueError('Unrecognized named group in pattern',171. self.pattern)172. return self.pattern.sub(convert, self.template)173.174. def safe_substitute(self, *args, **kws):175. if len(args) > 1:176. raise TypeError('Too many positional arguments')177. if not args:178. mapping = kws179. elif kws:180. mapping = _multimap(kws, args[0])181. else:182. mapping = args[0]183. # Helper function for .sub()184. def convert(mo):185. named = mo.group('named')186. if named is not None:187. try:188. # We use this idiom instead of str() because the latter189. # will fail if val is a Unicode containing non-ASCII190. return '%s' % (mapping[named],)191. except KeyError:192. return self.delimiter + named193. braced = mo.group('braced')194. if braced is not None:195. try:196. return '%s' % (mapping[braced],)197. except KeyError:198. return self.delimiter + '{' + braced + '}'199. if mo.group('escaped') is not None:200. return self.delimiter201. if mo.group('invalid') is not None:202. return self.delimiter203. raise ValueError('Unrecognized named group in pattern',204. self.pattern)205. return self.pattern.sub(convert, self.template)206.207.208.209.####################################################################210.# NOTE: Everything below here is deprecated. Use string methods instead.211.# This stuff will go away in Python 3.0.212.213.# Backward compatible names for exceptions214.index_error = ValueError215.atoi_error = ValueError216.atof_error = ValueError217.atol_error = ValueError218.219.# convert UPPER CASE letters to lower case220.def lower(s):221. """lower(s) -> string222.223. Return a copy of the string s converted to lowercase.224.225. """226. return s.lower()227.228.# Convert lower case letters to UPPER CASE229.def upper(s):230. """upper(s) -> string231.232. Return a copy of the string s converted to uppercase.233.234. """235. return s.upper()236.237.# Swap lower case letters and UPPER CASE238.def swapcase(s):239. """swapcase(s) -> string240.241. Return a copy of the string s with upper case characters242. converted to lowercase and vice versa.243.244. """245. return s.swapcase()246.247.# Strip leading and trailing tabs and spaces248.def strip(s, chars=None):249. """strip(s [,chars]) -> string250.251. Return a copy of the string s with leading and trailing252. whitespace removed.253. If chars is given and not None, remove characters in chars instead.254. If chars is unicode, S will be converted to unicode before stripping.255.256. """257. return s.strip(chars)258.259.# Strip leading tabs and spaces260.def lstrip(s, chars=None):261. """lstrip(s [,chars]) -> string262.263. Return a copy of the string s with leading whitespace removed.264. If chars is given and not None, remove characters in chars instead.265.266. """267. return s.lstrip(chars)268.269.# Strip trailing tabs and spaces270.def rstrip(s, chars=None):271. """rstrip(s [,chars]) -> string272.273. Return a copy of the string s with trailing whitespace removed.274. If chars is given and not None, remove characters in chars instead.275.276. """277. return s.rstrip(chars)278.279.280.# Split a string into a list of space/tab-separated words281.def split(s, sep=None, maxsplit=-1):282. """split(s [,sep [,maxsplit]]) -> list of strings283.284. Return a list of the words in the string s, using sep as the285. delimiter string. If maxsplit is given, splits at no more than286. maxsplit places (resulting in at most maxsplit+1 words). If sep287. is not specified or is None, any whitespace string is a separator.288.289. (split and splitfields are synonymous)290.291. """292. return s.split(sep, maxsplit)293.splitfields = split294.295.# Split a string into a list of space/tab-separated words296.def rsplit(s, sep=None, maxsplit=-1):297. """rsplit(s [,sep [,maxsplit]]) -> list of strings298.299. Return a list of the words in the string s, using sep as the300. delimiter string, starting at the end of the string and working301. to the front. If maxsplit is given, at most maxsplit splits are302. done. If sep is not specified or is None, any whitespace string303. is a separator.304. """305. return s.rsplit(sep, maxsplit)306.307.# Join fields with optional separator308.def join(words, sep = ' '):309. """join(list [,sep]) -> string310.311. Return a string composed of the words in list, with312. intervening occurrences of sep. The default separator is a313. single space.314.315. (joinfields and join are synonymous)316.317. """318. return sep.join(words)319.joinfields = join320.321.# Find substring, raise exception if not found322.def index(s, *args):323. """index(s, sub [,start [,end]]) -> int324.325. Like find but raises ValueError when the substring is not found.326.327. """328. return s.index(*args)329.330.# Find last substring, raise exception if not found331.def rindex(s, *args):332. """rindex(s, sub [,start [,end]]) -> int333.334. Like rfind but raises ValueError when the substring is not found.335.336. """337. return s.rindex(*args)338.339.# Count non-overlapping occurrences of substring340.def count(s, *args):341. """count(s, sub[, start[,end]]) -> int342.343. Return the number of occurrences of substring sub in string344. s[start:end]. Optional arguments start and end are345. interpreted as in slice notation.346.347. """348. return s.count(*args)349.350.# Find substring, return -1 if not found351.def find(s, *args):352. """find(s, sub [,start [,end]]) -> in353.354. Return the lowest index in s where substring sub is found,355. such that sub is contained within s[start,end]. Optional356. arguments start and end are interpreted as in slice notation.357.358. Return -1 on failure.359.360. """361. return s.find(*args)362.363.# Find last substring, return -1 if not found364.def rfind(s, *args):365. """rfind(s, sub [,start [,end]]) -> int366.367. Return the highest index in s where substring sub is found,368. such that sub is contained within s[start,end]. Optional369. arguments start and end are interpreted as in slice notation.370.371. Return -1 on failure.372.373. """374. return s.rfind(*args)375.376.# for a bit of speed377._float = float378._int = int379._long = long380.381.# Convert string to float382.def atof(s):383. """atof(s) -> float384.385. Return the floating point number represented by the string s.386.387. """388. return _float(s)389.390.391.# Convert string to integer392.def atoi(s , base=10):393. """atoi(s [,base]) -> int394.395. Return the integer represented by the string s in the given396. base, which defaults to 10. The string s must consist of one397. or more digits, possibly preceded by a sign. If base is 0, it398. is chosen from the leading characters of s, 0 for octal, 0x or399. 0X for hexadecimal. If base is 16, a preceding 0x or 0X is400. accepted.401.402. """403. return _int(s, base)404.405.406.# Convert string to long integer407.def atol(s, base=10):408. """atol(s [,base]) -> long409.410. Return the long integer represented by the string s in the411. given base, which defaults to 10. The string s must consist412. of one or more digits, possibly preceded by a sign. If base413. is 0, it is chosen from the leading characters of s, 0 for414. octal, 0x or 0X for hexadecimal. If base is 16, a preceding415. 0x or 0X is accepted. A trailing L or l is not accepted,416. unless base is 0.417.418. """419. return _long(s, base)420.421.422.# Left-justify a string423.def ljust(s, width, *args):424. """ljust(s, width[, fillchar]) -> string425.426. Return a left-justified version of s, in a field of the427. specified width, padded with spaces as needed. The string is428. never truncated. If specified the fillchar is used instead of spaces.429.430. """431. return s.ljust(width, *args)432.433.# Right-justify a string434.def rjust(s, width, *args):435. """rjust(s, width[, fillchar]) -> string436.437. Return a right-justified version of s, in a field of the438. specified width, padded with spaces as needed. The string is439. never truncated. If specified the fillchar is used instead of spaces.440.441. """442. return s.rjust(width, *args)443.444.# Center a string445.def center(s, width, *args):446. """center(s, width[, fillchar]) -> string447.448. Return a center version of s, in a field of the specified449. width. padded with spaces as needed. The string is never450. truncated. If specified the fillchar is used instead of spaces.451.452. """453. return s.center(width, *args)454.455.# Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'456.# Decadent feature: the argument may be a string or a number457.# (Use of this is deprecated; it should be a string as with ljust c.s.)458.def zfill(x, width):459. """zfill(x, width) -> string460.461. Pad a numeric string x with zeros on the left, to fill a field462. of the specified width. The string x is never truncated.463.464. """465. if not isinstance(x, basestring):466. x = repr(x)467. return x.zfill(width)468.469.# Expand tabs in a string.470.# Doesn't take non-printing chars into account, but does understand n.471.def expandtabs(s, tabsize=8):472. """expandtabs(s [,tabsize]) -> string473.474. Return a copy of the string s with all tab characters replaced475. by the appropriate number of spaces, depending on the current476. column, and the tabsize (default 8).477.478. """479. return s.expandtabs(tabsize)480.481.# Character translation through look-up table.482.def translate(s, table, deletions=""):483. """translate(s,table [,deletions]) -> string484.485. Return a copy of the string s, where all characters occurring486. in the optional argument deletions are removed, and the487. remaining characters have been mapped through the given488. translation table, which must be a string of length 256. The489. deletions argument is not allowed for Unicode strings.490.491. """492. if deletions or table is None:493. return s.translate(table, deletions)494. else:495. # Add s[:0] so that if s is Unicode and table is an 8-bit string,496. # table is converted to Unicode. This means that table *cannot*497. # be a dictionary -- for that feature, use u.translate() directly.498. return s.translate(table + s[:0])499.500.# Capitalize a string, e.g. "aBc dEf" -> "Abc def".501.def capitalize(s):502. """capitalize(s) -> string503.504. Return a copy of the string s with only its first character505. capitalized.506.507. """508. return s.capitalize()509.510.# Substring replacement (global)511.def replace(s, old, new, maxsplit=-1):512. """replace (str, old, new[, maxsplit]) -> string513.514. Return a copy of string str with all occurrences of substring515. old replaced by new. If the optional argument maxsplit is516. given, only the first maxsplit occurrences are replaced.517.518. """519. return s.replace(old, new, maxsplit)520.521.522.# Try importing optional built-in module "strop" -- if it exists,523.# it redefines some string operations that are 100-1000 times faster.524.# It also defines values for whitespace, lowercase and uppercase525.# that match <ctype.h>'s definitions.526.527.try:528. from strop import maketrans, lowercase, uppercase, whitespace529. letters = lowercase + uppercase530.except ImportError:531. pass # Use the original versions532.533.########################################################################534.# the Formatter class535.# see PEP 3101 for details and purpose of this class536.537.# The hard parts are reused from the C implementation. They're exposed as "_"538.# prefixed methods of str and unicode.539.540.# The overall parser is implemented in str._formatter_parser.541.# The field name parser is implemented in str._formatter_field_name_split542.543.class Formatter(object):544. def format(self, format_string, *args, **kwargs):545. return self.vformat(format_string, args, kwargs)546.547. def vformat(self, format_string, args, kwargs):548. used_args = set()549. result = self._vformat(format_string, args, kwargs, used_args, 2)550. self.check_unused_args(used_args, args, kwargs)551. return result552.553. def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):554. if recursion_depth < 0:555. raise ValueError('Max string recursion exceeded')556. result = []557. for literal_text, field_name, format_spec, conversion in 558. self.parse(format_string):559.560. # output the literal text561. if literal_text:562. result.append(literal_text)563.564. # if there's a field, output it565. if field_name is not None:566. # this is some markup, find the object and do567. # the formatting568.569. # given the field_name, find the object it references570. # and the argument it came from571. obj, arg_used = self.get_field(field_name, args, kwargs)572. used_args.add(arg_used)573.574. # do any conversion on the resulting object575. obj = self.convert_field(obj, conversion)576.577. # expand the format spec, if needed578. format_spec = self._vformat(format_spec, args, kwargs,579. used_args, recursion_depth-1)580.581. # format the object and append to the result582. result.append(self.format_field(obj, format_spec))583.584. return ''.join(result)585.586.587. def get_value(self, key, args, kwargs):588. if isinstance(key, (int, long)):589. return args[key]590. else:591. return kwargs[key]592.593.594. def check_unused_args(self, used_args, args, kwargs):595. pass596.597.598. def format_field(self, value, format_spec):599. return format(value, format_spec)600.601.602. def convert_field(self, value, conversion):603. # do any conversion on the resulting object604. if conversion == 'r':605. return repr(value)606. elif conversion == 's':607. return str(value)608. elif conversion is None:609. return value610. raise ValueError("Unknown converion specifier {0!s}".format(conversion))611.612.613. # returns an iterable that contains tuples of the form:614. # (literal_text, field_name, format_spec, conversion)615. # literal_text can be zero length616. # field_name can be None, in which case there's no617. # object to format and output618. # if field_name is not None, it is looked up, formatted619. # with format_spec and conversion and then used620. def parse(self, format_string):621. return format_string._formatter_parser()622.623.624. # given a field_name, find the object it references.625. # field_name: the field being looked up, e.g. "0.name"626. # or "lookup[3]"627. # used_args: a set of which args have been used628. # args, kwargs: as passed in to vformat629. def get_field(self, field_name, args, kwargs):630. first, rest = field_name._formatter_field_name_split()631.632. obj = self.get_value(first, args, kwargs)633.634. # loop through the rest of the field_name, doing635. # getattr or getitem as needed636. for is_attr, i in rest:637. if is_attr:638. obj = getattr(obj, i)639. else:640. obj = obj[i]641.642. return obj, first