Package SCons :: Package compat :: Module _scons_textwrap
[hide private]
[frames] | no frames]

Source Code for Module SCons.compat._scons_textwrap

  1  """Text wrapping and filling. 
  2  """ 
  3   
  4  # Copyright (C) 1999-2001 Gregory P. Ward. 
  5  # Copyright (C) 2002, 2003 Python Software Foundation. 
  6  # Written by Greg Ward <gward@python.net> 
  7   
  8  __revision__ = "$Id: textwrap.py,v 1.32.8.2 2004/05/13 01:48:15 gward Exp $" 
  9   
 10  import string, re 
 11   
 12  try: 
 13     unicode 
 14  except NameError: 
15 - class unicode:
16 pass
17 18 # Do the right thing with boolean values for all known Python versions 19 # (so this module can be copied to projects that don't depend on Python 20 # 2.3, e.g. Optik and Docutils). 21 try: 22 True, False 23 except NameError: 24 (True, False) = (1, 0) 25 26 __all__ = ['TextWrapper', 'wrap', 'fill'] 27 28 # Hardcode the recognized whitespace characters to the US-ASCII 29 # whitespace characters. The main reason for doing this is that in 30 # ISO-8859-1, 0xa0 is non-breaking whitespace, so in certain locales 31 # that character winds up in string.whitespace. Respecting 32 # string.whitespace in those cases would 1) make textwrap treat 0xa0 the 33 # same as any other whitespace char, which is clearly wrong (it's a 34 # *non-breaking* space), 2) possibly cause problems with Unicode, 35 # since 0xa0 is not in range(128). 36 _whitespace = '\t\n\x0b\x0c\r ' 37
38 -class TextWrapper:
39 """ 40 Object for wrapping/filling text. The public interface consists of 41 the wrap() and fill() methods; the other methods are just there for 42 subclasses to override in order to tweak the default behaviour. 43 If you want to completely replace the main wrapping algorithm, 44 you'll probably have to override _wrap_chunks(). 45 46 Several instance attributes control various aspects of wrapping: 47 width (default: 70) 48 the maximum width of wrapped lines (unless break_long_words 49 is false) 50 initial_indent (default: "") 51 string that will be prepended to the first line of wrapped 52 output. Counts towards the line's width. 53 subsequent_indent (default: "") 54 string that will be prepended to all lines save the first 55 of wrapped output; also counts towards each line's width. 56 expand_tabs (default: true) 57 Expand tabs in input text to spaces before further processing. 58 Each tab will become 1 .. 8 spaces, depending on its position in 59 its line. If false, each tab is treated as a single character. 60 replace_whitespace (default: true) 61 Replace all whitespace characters in the input text by spaces 62 after tab expansion. Note that if expand_tabs is false and 63 replace_whitespace is true, every tab will be converted to a 64 single space! 65 fix_sentence_endings (default: false) 66 Ensure that sentence-ending punctuation is always followed 67 by two spaces. Off by default because the algorithm is 68 (unavoidably) imperfect. 69 break_long_words (default: true) 70 Break words longer than 'width'. If false, those words will not 71 be broken, and some lines might be longer than 'width'. 72 """ 73 74 whitespace_trans = string.maketrans(_whitespace, ' ' * len(_whitespace)) 75 76 unicode_whitespace_trans = {} 77 try: 78 uspace = eval("ord(u' ')") 79 except SyntaxError: 80 # Python1.5 doesn't understand u'' syntax, in which case we 81 # won't actually use the unicode translation below, so it 82 # doesn't matter what value we put in the table. 83 uspace = ord(' ') 84 for x in map(ord, _whitespace): 85 unicode_whitespace_trans[x] = uspace 86 87 # This funky little regex is just the trick for splitting 88 # text up into word-wrappable chunks. E.g. 89 # "Hello there -- you goof-ball, use the -b option!" 90 # splits into 91 # Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option! 92 # (after stripping out empty strings). 93 try: 94 wordsep_re = re.compile(r'(\s+|' # any whitespace 95 r'[^\s\w]*\w{2,}-(?=\w{2,})|' # hyphenated words 96 r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash 97 except re.error: 98 # Pre-2.0 Python versions don't have the (?<= negative look-behind 99 # assertion. It mostly doesn't matter for the simple input 100 # SCons is going to give it, so just leave it out. 101 wordsep_re = re.compile(r'(\s+|' # any whitespace 102 r'-*\w{2,}-(?=\w{2,}))') # hyphenated words 103 104 # XXX will there be a locale-or-charset-aware version of 105 # string.lowercase in 2.3? 106 sentence_end_re = re.compile(r'[%s]' # lowercase letter 107 r'[\.\!\?]' # sentence-ending punct. 108 r'[\"\']?' # optional end-of-quote 109 % string.lowercase) 110 111
112 - def __init__(self, 113 width=70, 114 initial_indent="", 115 subsequent_indent="", 116 expand_tabs=True, 117 replace_whitespace=True, 118 fix_sentence_endings=False, 119 break_long_words=True):
120 self.width = width 121 self.initial_indent = initial_indent 122 self.subsequent_indent = subsequent_indent 123 self.expand_tabs = expand_tabs 124 self.replace_whitespace = replace_whitespace 125 self.fix_sentence_endings = fix_sentence_endings 126 self.break_long_words = break_long_words
127 128 129 # -- Private methods ----------------------------------------------- 130 # (possibly useful for subclasses to override) 131
132 - def _munge_whitespace(self, text):
133 """_munge_whitespace(text : string) -> string 134 135 Munge whitespace in text: expand tabs and convert all other 136 whitespace characters to spaces. Eg. " foo\tbar\n\nbaz" 137 becomes " foo bar baz". 138 """ 139 if self.expand_tabs: 140 text = string.expandtabs(text) 141 if self.replace_whitespace: 142 if type(text) == type(''): 143 text = string.translate(text, self.whitespace_trans) 144 elif isinstance(text, unicode): 145 text = string.translate(text, self.unicode_whitespace_trans) 146 return text
147 148
149 - def _split(self, text):
150 """_split(text : string) -> [string] 151 152 Split the text to wrap into indivisible chunks. Chunks are 153 not quite the same as words; see wrap_chunks() for full 154 details. As an example, the text 155 Look, goof-ball -- use the -b option! 156 breaks into the following chunks: 157 'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ', 158 'use', ' ', 'the', ' ', '-b', ' ', 'option!' 159 """ 160 chunks = self.wordsep_re.split(text) 161 chunks = filter(None, chunks) 162 return chunks
163
164 - def _fix_sentence_endings(self, chunks):
165 """_fix_sentence_endings(chunks : [string]) 166 167 Correct for sentence endings buried in 'chunks'. Eg. when the 168 original text contains "... foo.\nBar ...", munge_whitespace() 169 and split() will convert that to [..., "foo.", " ", "Bar", ...] 170 which has one too few spaces; this method simply changes the one 171 space to two. 172 """ 173 i = 0 174 pat = self.sentence_end_re 175 while i < len(chunks)-1: 176 if chunks[i+1] == " " and pat.search(chunks[i]): 177 chunks[i+1] = " " 178 i = i + 2 179 else: 180 i = i + 1
181
182 - def _handle_long_word(self, chunks, cur_line, cur_len, width):
183 """_handle_long_word(chunks : [string], 184 cur_line : [string], 185 cur_len : int, width : int) 186 187 Handle a chunk of text (most likely a word, not whitespace) that 188 is too long to fit in any line. 189 """ 190 space_left = max(width - cur_len, 1) 191 192 # If we're allowed to break long words, then do so: put as much 193 # of the next chunk onto the current line as will fit. 194 if self.break_long_words: 195 cur_line.append(chunks[0][0:space_left]) 196 chunks[0] = chunks[0][space_left:] 197 198 # Otherwise, we have to preserve the long word intact. Only add 199 # it to the current line if there's nothing already there -- 200 # that minimizes how much we violate the width constraint. 201 elif not cur_line: 202 cur_line.append(chunks.pop(0))
203 204 # If we're not allowed to break long words, and there's already 205 # text on the current line, do nothing. Next time through the 206 # main loop of _wrap_chunks(), we'll wind up here again, but 207 # cur_len will be zero, so the next line will be entirely 208 # devoted to the long word that we can't handle right now. 209
210 - def _wrap_chunks(self, chunks):
211 """_wrap_chunks(chunks : [string]) -> [string] 212 213 Wrap a sequence of text chunks and return a list of lines of 214 length 'self.width' or less. (If 'break_long_words' is false, 215 some lines may be longer than this.) Chunks correspond roughly 216 to words and the whitespace between them: each chunk is 217 indivisible (modulo 'break_long_words'), but a line break can 218 come between any two chunks. Chunks should not have internal 219 whitespace; ie. a chunk is either all whitespace or a "word". 220 Whitespace chunks will be removed from the beginning and end of 221 lines, but apart from that whitespace is preserved. 222 """ 223 lines = [] 224 if self.width <= 0: 225 raise ValueError("invalid width %r (must be > 0)" % self.width) 226 227 while chunks: 228 229 # Start the list of chunks that will make up the current line. 230 # cur_len is just the length of all the chunks in cur_line. 231 cur_line = [] 232 cur_len = 0 233 234 # Figure out which static string will prefix this line. 235 if lines: 236 indent = self.subsequent_indent 237 else: 238 indent = self.initial_indent 239 240 # Maximum width for this line. 241 width = self.width - len(indent) 242 243 # First chunk on line is whitespace -- drop it, unless this 244 # is the very beginning of the text (ie. no lines started yet). 245 if string.strip(chunks[0]) == '' and lines: 246 del chunks[0] 247 248 while chunks: 249 l = len(chunks[0]) 250 251 # Can at least squeeze this chunk onto the current line. 252 if cur_len + l <= width: 253 cur_line.append(chunks.pop(0)) 254 cur_len = cur_len + l 255 256 # Nope, this line is full. 257 else: 258 break 259 260 # The current line is full, and the next chunk is too big to 261 # fit on *any* line (not just this one). 262 if chunks and len(chunks[0]) > width: 263 self._handle_long_word(chunks, cur_line, cur_len, width) 264 265 # If the last chunk on this line is all whitespace, drop it. 266 if cur_line and string.strip(cur_line[-1]) == '': 267 del cur_line[-1] 268 269 # Convert current line back to a string and store it in list 270 # of all lines (return value). 271 if cur_line: 272 lines.append(indent + string.join(cur_line, '')) 273 274 return lines
275 276 277 # -- Public interface ---------------------------------------------- 278
279 - def wrap(self, text):
280 """wrap(text : string) -> [string] 281 282 Reformat the single paragraph in 'text' so it fits in lines of 283 no more than 'self.width' columns, and return a list of wrapped 284 lines. Tabs in 'text' are expanded with string.expandtabs(), 285 and all other whitespace characters (including newline) are 286 converted to space. 287 """ 288 text = self._munge_whitespace(text) 289 indent = self.initial_indent 290 chunks = self._split(text) 291 if self.fix_sentence_endings: 292 self._fix_sentence_endings(chunks) 293 return self._wrap_chunks(chunks)
294
295 - def fill(self, text):
296 """fill(text : string) -> string 297 298 Reformat the single paragraph in 'text' to fit in lines of no 299 more than 'self.width' columns, and return a new string 300 containing the entire wrapped paragraph. 301 """ 302 return string.join(self.wrap(text), "\n")
303 304 305 # -- Convenience interface --------------------------------------------- 306
307 -def wrap(text, width=70, **kwargs):
308 """Wrap a single paragraph of text, returning a list of wrapped lines. 309 310 Reformat the single paragraph in 'text' so it fits in lines of no 311 more than 'width' columns, and return a list of wrapped lines. By 312 default, tabs in 'text' are expanded with string.expandtabs(), and 313 all other whitespace characters (including newline) are converted to 314 space. See TextWrapper class for available keyword args to customize 315 wrapping behaviour. 316 """ 317 kw = kwargs.copy() 318 kw['width'] = width 319 w = apply(TextWrapper, (), kw) 320 return w.wrap(text)
321
322 -def fill(text, width=70, **kwargs):
323 """Fill a single paragraph of text, returning a new string. 324 325 Reformat the single paragraph in 'text' to fit in lines of no more 326 than 'width' columns, and return a new string containing the entire 327 wrapped paragraph. As with wrap(), tabs are expanded and other 328 whitespace characters converted to space. See TextWrapper class for 329 available keyword args to customize wrapping behaviour. 330 """ 331 kw = kwargs.copy() 332 kw['width'] = width 333 w = apply(TextWrapper, (), kw) 334 return w.fill(text)
335 336 337 # -- Loosely related functionality ------------------------------------- 338
339 -def dedent(text):
340 """dedent(text : string) -> string 341 342 Remove any whitespace than can be uniformly removed from the left 343 of every line in `text`. 344 345 This can be used e.g. to make triple-quoted strings line up with 346 the left edge of screen/whatever, while still presenting it in the 347 source code in indented form. 348 349 For example: 350 351 def test(): 352 # end first line with \ to avoid the empty line! 353 s = '''\ 354 hello 355 world 356 ''' 357 print repr(s) # prints ' hello\n world\n ' 358 print repr(dedent(s)) # prints 'hello\n world\n' 359 """ 360 lines = text.expandtabs().split('\n') 361 margin = None 362 for line in lines: 363 content = line.lstrip() 364 if not content: 365 continue 366 indent = len(line) - len(content) 367 if margin is None: 368 margin = indent 369 else: 370 margin = min(margin, indent) 371 372 if margin is not None and margin > 0: 373 for i in range(len(lines)): 374 lines[i] = lines[i][margin:] 375 376 return string.join(lines, '\n')
377 378 # Local Variables: 379 # tab-width:4 380 # indent-tabs-mode:nil 381 # End: 382 # vim: set expandtab tabstop=4 shiftwidth=4: 383