Module string
[hide private]
[frames] | no frames]

Source Code for Module string

  1  """A collection of string operations (most are no longer used). 
  2   
  3  Warning: most of the code you see here isn't normally used nowadays. 
  4  Beginning with Python 1.6, many of these functions are implemented as 
  5  methods on the standard string object. They used to be implemented by 
  6  a built-in module called strop, but strop is now obsolete itself. 
  7   
  8  Public module variables: 
  9   
 10  whitespace -- a string containing all characters considered whitespace 
 11  lowercase -- a string containing all characters considered lowercase letters 
 12  uppercase -- a string containing all characters considered uppercase letters 
 13  letters -- a string containing all characters considered letters 
 14  digits -- a string containing all characters considered decimal digits 
 15  hexdigits -- a string containing all characters considered hexadecimal digits 
 16  octdigits -- a string containing all characters considered octal digits 
 17  punctuation -- a string containing all characters considered punctuation 
 18  printable -- a string containing all characters considered printable 
 19   
 20  """ 
 21   
 22  # Some strings for ctype-style character classification 
 23  whitespace = ' \t\n\r\v\f' 
 24  lowercase = 'abcdefghijklmnopqrstuvwxyz' 
 25  uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 
 26  letters = lowercase + uppercase 
 27  ascii_lowercase = lowercase 
 28  ascii_uppercase = uppercase 
 29  ascii_letters = ascii_lowercase + ascii_uppercase 
 30  digits = '0123456789' 
 31  hexdigits = digits + 'abcdef' + 'ABCDEF' 
 32  octdigits = '01234567' 
 33  punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~""" 
 34  printable = digits + letters + punctuation + whitespace 
 35   
 36  # Case conversion helpers 
 37  # Use str to convert Unicode literal in case of -U 
 38  l = map(chr, xrange(256)) 
 39  _idmap = str('').join(l) 
 40  del l 
 41   
 42  # Functions which aren't available as string methods. 
 43   
 44  # Capitalize the words in a string, e.g. " aBc  dEf " -> "Abc Def". 
45 -def capwords(s, sep=None):
46 """capwords(s, [sep]) -> string 47 48 Split the argument into words using split, capitalize each 49 word using capitalize, and join the capitalized words using 50 join. Note that this replaces runs of whitespace characters by 51 a single space. 52 53 """ 54 return (sep or ' ').join([x.capitalize() for x in s.split(sep)])
55 56 57 # Construct a translation string 58 _idmapL = None
59 -def maketrans(fromstr, tostr):
60 """maketrans(frm, to) -> string 61 62 Return a translation table (a string of 256 bytes long) 63 suitable for use in string.translate. The strings frm and to 64 must be of the same length. 65 66 """ 67 if len(fromstr) != len(tostr): 68 raise ValueError, "maketrans arguments must have same length" 69 global _idmapL 70 if not _idmapL: 71 _idmapL = map(None, _idmap) 72 L = _idmapL[:] 73 fromstr = map(ord, fromstr) 74 for i in range(len(fromstr)): 75 L[fromstr[i]] = tostr[i] 76 return ''.join(L)
77 78 79 80 #################################################################### 81 import re as _re 82
83 -class _multimap:
84 """Helper class for combining multiple mappings. 85 86 Used by .{safe_,}substitute() to combine the mapping and keyword 87 arguments. 88 """
89 - def __init__(self, primary, secondary):
90 self._primary = primary 91 self._secondary = secondary
92
93 - def __getitem__(self, key):
94 try: 95 return self._primary[key] 96 except KeyError: 97 return self._secondary[key]
98 99
100 -class _TemplateMetaclass(type):
101 pattern = r""" 102 %(delim)s(?: 103 (?P<escaped>%(delim)s) | # Escape sequence of two delimiters 104 (?P<named>%(id)s) | # delimiter and a Python identifier 105 {(?P<braced>%(id)s)} | # delimiter and a braced identifier 106 (?P<invalid>) # Other ill-formed delimiter exprs 107 ) 108 """ 109
110 - def __init__(cls, name, bases, dct):
111 super(_TemplateMetaclass, cls).__init__(name, bases, dct) 112 if 'pattern' in dct: 113 pattern = cls.pattern 114 else: 115 pattern = _TemplateMetaclass.pattern % { 116 'delim' : _re.escape(cls.delimiter), 117 'id' : cls.idpattern, 118 } 119 cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE)
120 121
122 -class Template:
123 """A string class for supporting $-substitutions.""" 124 __metaclass__ = _TemplateMetaclass 125 126 delimiter = '$' 127 idpattern = r'[_a-z][_a-z0-9]*' 128
129 - def __init__(self, template):
130 self.template = template
131 132 # Search for $$, $identifier, ${identifier}, and any bare $'s 133
134 - def _invalid(self, mo):
135 i = mo.start('invalid') 136 lines = self.template[:i].splitlines(True) 137 if not lines: 138 colno = 1 139 lineno = 1 140 else: 141 colno = i - len(''.join(lines[:-1])) 142 lineno = len(lines) 143 raise ValueError('Invalid placeholder in string: line %d, col %d' % 144 (lineno, colno))
145
146 - def substitute(self, *args, **kws):
147 if len(args) > 1: 148 raise TypeError('Too many positional arguments') 149 if not args: 150 mapping = kws 151 elif kws: 152 mapping = _multimap(kws, args[0]) 153 else: 154 mapping = args[0] 155 # Helper function for .sub() 156 def convert(mo): 157 # Check the most common path first. 158 named = mo.group('named') or mo.group('braced') 159 if named is not None: 160 val = mapping[named] 161 # We use this idiom instead of str() because the latter will 162 # fail if val is a Unicode containing non-ASCII characters. 163 return '%s' % (val,) 164 if mo.group('escaped') is not None: 165 return self.delimiter 166 if mo.group('invalid') is not None: 167 self._invalid(mo) 168 raise ValueError('Unrecognized named group in pattern', 169 self.pattern)
170 return self.pattern.sub(convert, self.template)
171
172 - def safe_substitute(self, *args, **kws):
173 if len(args) > 1: 174 raise TypeError('Too many positional arguments') 175 if not args: 176 mapping = kws 177 elif kws: 178 mapping = _multimap(kws, args[0]) 179 else: 180 mapping = args[0] 181 # Helper function for .sub() 182 def convert(mo): 183 named = mo.group('named') 184 if named is not None: 185 try: 186 # We use this idiom instead of str() because the latter 187 # will fail if val is a Unicode containing non-ASCII 188 return '%s' % (mapping[named],) 189 except KeyError: 190 return self.delimiter + named 191 braced = mo.group('braced') 192 if braced is not None: 193 try: 194 return '%s' % (mapping[braced],) 195 except KeyError: 196 return self.delimiter + '{' + braced + '}' 197 if mo.group('escaped') is not None: 198 return self.delimiter 199 if mo.group('invalid') is not None: 200 return self.delimiter 201 raise ValueError('Unrecognized named group in pattern', 202 self.pattern)
203 return self.pattern.sub(convert, self.template) 204 205 206 207 #################################################################### 208 # NOTE: Everything below here is deprecated. Use string methods instead. 209 # This stuff will go away in Python 3.0. 210 211 # Backward compatible names for exceptions 212 index_error = ValueError 213 atoi_error = ValueError 214 atof_error = ValueError 215 atol_error = ValueError 216 217 # convert UPPER CASE letters to lower case
218 -def lower(s):
219 """lower(s) -> string 220 221 Return a copy of the string s converted to lowercase. 222 223 """ 224 return s.lower()
225 226 # Convert lower case letters to UPPER CASE
227 -def upper(s):
228 """upper(s) -> string 229 230 Return a copy of the string s converted to uppercase. 231 232 """ 233 return s.upper()
234 235 # Swap lower case letters and UPPER CASE
236 -def swapcase(s):
237 """swapcase(s) -> string 238 239 Return a copy of the string s with upper case characters 240 converted to lowercase and vice versa. 241 242 """ 243 return s.swapcase()
244 245 # Strip leading and trailing tabs and spaces
246 -def strip(s, chars=None):
247 """strip(s [,chars]) -> string 248 249 Return a copy of the string s with leading and trailing 250 whitespace removed. 251 If chars is given and not None, remove characters in chars instead. 252 If chars is unicode, S will be converted to unicode before stripping. 253 254 """ 255 return s.strip(chars)
256 257 # Strip leading tabs and spaces
258 -def lstrip(s, chars=None):
259 """lstrip(s [,chars]) -> string 260 261 Return a copy of the string s with leading whitespace removed. 262 If chars is given and not None, remove characters in chars instead. 263 264 """ 265 return s.lstrip(chars)
266 267 # Strip trailing tabs and spaces
268 -def rstrip(s, chars=None):
269 """rstrip(s [,chars]) -> string 270 271 Return a copy of the string s with trailing whitespace removed. 272 If chars is given and not None, remove characters in chars instead. 273 274 """ 275 return s.rstrip(chars)
276 277 278 # Split a string into a list of space/tab-separated words
279 -def split(s, sep=None, maxsplit=-1):
280 """split(s [,sep [,maxsplit]]) -> list of strings 281 282 Return a list of the words in the string s, using sep as the 283 delimiter string. If maxsplit is given, splits at no more than 284 maxsplit places (resulting in at most maxsplit+1 words). If sep 285 is not specified or is None, any whitespace string is a separator. 286 287 (split and splitfields are synonymous) 288 289 """ 290 return s.split(sep, maxsplit)
291 splitfields = split 292 293 # Split a string into a list of space/tab-separated words
294 -def rsplit(s, sep=None, maxsplit=-1):
295 """rsplit(s [,sep [,maxsplit]]) -> list of strings 296 297 Return a list of the words in the string s, using sep as the 298 delimiter string, starting at the end of the string and working 299 to the front. If maxsplit is given, at most maxsplit splits are 300 done. If sep is not specified or is None, any whitespace string 301 is a separator. 302 """ 303 return s.rsplit(sep, maxsplit)
304 305 # Join fields with optional separator
306 -def join(words, sep = ' '):
307 """join(list [,sep]) -> string 308 309 Return a string composed of the words in list, with 310 intervening occurrences of sep. The default separator is a 311 single space. 312 313 (joinfields and join are synonymous) 314 315 """ 316 return sep.join(words)
317 joinfields = join 318 319 # Find substring, raise exception if not found
320 -def index(s, *args):
321 """index(s, sub [,start [,end]]) -> int 322 323 Like find but raises ValueError when the substring is not found. 324 325 """ 326 return s.index(*args)
327 328 # Find last substring, raise exception if not found
329 -def rindex(s, *args):
330 """rindex(s, sub [,start [,end]]) -> int 331 332 Like rfind but raises ValueError when the substring is not found. 333 334 """ 335 return s.rindex(*args)
336 337 # Count non-overlapping occurrences of substring
338 -def count(s, *args):
339 """count(s, sub[, start[,end]]) -> int 340 341 Return the number of occurrences of substring sub in string 342 s[start:end]. Optional arguments start and end are 343 interpreted as in slice notation. 344 345 """ 346 return s.count(*args)
347 348 # Find substring, return -1 if not found
349 -def find(s, *args):
350 """find(s, sub [,start [,end]]) -> in 351 352 Return the lowest index in s where substring sub is found, 353 such that sub is contained within s[start,end]. Optional 354 arguments start and end are interpreted as in slice notation. 355 356 Return -1 on failure. 357 358 """ 359 return s.find(*args)
360 361 # Find last substring, return -1 if not found
362 -def rfind(s, *args):
363 """rfind(s, sub [,start [,end]]) -> int 364 365 Return the highest index in s where substring sub is found, 366 such that sub is contained within s[start,end]. Optional 367 arguments start and end are interpreted as in slice notation. 368 369 Return -1 on failure. 370 371 """ 372 return s.rfind(*args)
373 374 # for a bit of speed 375 _float = float 376 _int = int 377 _long = long 378 379 # Convert string to float
380 -def atof(s):
381 """atof(s) -> float 382 383 Return the floating point number represented by the string s. 384 385 """ 386 return _float(s)
387 388 389 # Convert string to integer
390 -def atoi(s , base=10):
391 """atoi(s [,base]) -> int 392 393 Return the integer represented by the string s in the given 394 base, which defaults to 10. The string s must consist of one 395 or more digits, possibly preceded by a sign. If base is 0, it 396 is chosen from the leading characters of s, 0 for octal, 0x or 397 0X for hexadecimal. If base is 16, a preceding 0x or 0X is 398 accepted. 399 400 """ 401 return _int(s, base)
402 403 404 # Convert string to long integer
405 -def atol(s, base=10):
406 """atol(s [,base]) -> long 407 408 Return the long integer represented by the string s in the 409 given base, which defaults to 10. The string s must consist 410 of one or more digits, possibly preceded by a sign. If base 411 is 0, it is chosen from the leading characters of s, 0 for 412 octal, 0x or 0X for hexadecimal. If base is 16, a preceding 413 0x or 0X is accepted. A trailing L or l is not accepted, 414 unless base is 0. 415 416 """ 417 return _long(s, base)
418 419 420 # Left-justify a string
421 -def ljust(s, width, *args):
422 """ljust(s, width[, fillchar]) -> string 423 424 Return a left-justified version of s, in a field of the 425 specified width, padded with spaces as needed. The string is 426 never truncated. If specified the fillchar is used instead of spaces. 427 428 """ 429 return s.ljust(width, *args)
430 431 # Right-justify a string
432 -def rjust(s, width, *args):
433 """rjust(s, width[, fillchar]) -> string 434 435 Return a right-justified version of s, in a field of the 436 specified width, padded with spaces as needed. The string is 437 never truncated. If specified the fillchar is used instead of spaces. 438 439 """ 440 return s.rjust(width, *args)
441 442 # Center a string
443 -def center(s, width, *args):
444 """center(s, width[, fillchar]) -> string 445 446 Return a center version of s, in a field of the specified 447 width. padded with spaces as needed. The string is never 448 truncated. If specified the fillchar is used instead of spaces. 449 450 """ 451 return s.center(width, *args)
452 453 # Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03' 454 # Decadent feature: the argument may be a string or a number 455 # (Use of this is deprecated; it should be a string as with ljust c.s.)
456 -def zfill(x, width):
457 """zfill(x, width) -> string 458 459 Pad a numeric string x with zeros on the left, to fill a field 460 of the specified width. The string x is never truncated. 461 462 """ 463 if not isinstance(x, basestring): 464 x = repr(x) 465 return x.zfill(width)
466 467 # Expand tabs in a string. 468 # Doesn't take non-printing chars into account, but does understand \n.
469 -def expandtabs(s, tabsize=8):
470 """expandtabs(s [,tabsize]) -> string 471 472 Return a copy of the string s with all tab characters replaced 473 by the appropriate number of spaces, depending on the current 474 column, and the tabsize (default 8). 475 476 """ 477 return s.expandtabs(tabsize)
478 479 # Character translation through look-up table.
480 -def translate(s, table, deletions=""):
481 """translate(s,table [,deletions]) -> string 482 483 Return a copy of the string s, where all characters occurring 484 in the optional argument deletions are removed, and the 485 remaining characters have been mapped through the given 486 translation table, which must be a string of length 256. The 487 deletions argument is not allowed for Unicode strings. 488 489 """ 490 if deletions: 491 return s.translate(table, deletions) 492 else: 493 # Add s[:0] so that if s is Unicode and table is an 8-bit string, 494 # table is converted to Unicode. This means that table *cannot* 495 # be a dictionary -- for that feature, use u.translate() directly. 496 return s.translate(table + s[:0])
497 498 # Capitalize a string, e.g. "aBc dEf" -> "Abc def".
499 -def capitalize(s):
500 """capitalize(s) -> string 501 502 Return a copy of the string s with only its first character 503 capitalized. 504 505 """ 506 return s.capitalize()
507 508 # Substring replacement (global)
509 -def replace(s, old, new, maxsplit=-1):
510 """replace (str, old, new[, maxsplit]) -> string 511 512 Return a copy of string str with all occurrences of substring 513 old replaced by new. If the optional argument maxsplit is 514 given, only the first maxsplit occurrences are replaced. 515 516 """ 517 return s.replace(old, new, maxsplit)
518 519 520 # Try importing optional built-in module "strop" -- if it exists, 521 # it redefines some string operations that are 100-1000 times faster. 522 # It also defines values for whitespace, lowercase and uppercase 523 # that match <ctype.h>'s definitions. 524 525 try: 526 from strop import maketrans, lowercase, uppercase, whitespace 527 letters = lowercase + uppercase 528 except ImportError: 529 pass # Use the original versions 530