SCons.compat._scons

39 """ 40 Object for wrapping/filling text. The public interface consists of 41 the wrap() and fill() methods; the other methods are just there for 42 subclasses to override in order to tweak the default behaviour. 43 If you want to completely replace the main wrapping algorithm, 44 you'll probably have to override _wrap_chunks(). 45 46 Several instance attributes control various aspects of wrapping: 47 width (default: 70) 48 the maximum width of wrapped lines (unless break_long_words 49 is false) 50 initial_indent (default: "") 51 string that will be prepended to the first line of wrapped 52 output. Counts towards the line's width. 53 subsequent_indent (default: "") 54 string that will be prepended to all lines save the first 55 of wrapped output; also counts towards each line's width. 56 expand_tabs (default: true) 57 Expand tabs in input text to spaces before further processing. 58 Each tab will become 1 .. 8 spaces, depending on its position in 59 its line. If false, each tab is treated as a single character. 60 replace_whitespace (default: true) 61 Replace all whitespace characters in the input text by spaces 62 after tab expansion. Note that if expand_tabs is false and 63 replace_whitespace is true, every tab will be converted to a 64 single space! 65 fix_sentence_endings (default: false) 66 Ensure that sentence-ending punctuation is always followed 67 by two spaces. Off by default because the algorithm is 68 (unavoidably) imperfect. 69 break_long_words (default: true) 70 Break words longer than 'width'. If false, those words will not 71 be broken, and some lines might be longer than 'width'. 72 """ 73 74 whitespace_trans = string.maketrans(_whitespace, ' ' * len(_whitespace)) 75 76 unicode_whitespace_trans = {} 77 try: 78 uspace = eval("ord(u' ')") 79 except SyntaxError: 80 # Python1.5 doesn't understand u'' syntax, in which case we 81 # won't actually use the unicode translation below, so it 82 # doesn't matter what value we put in the table. 83 uspace = ord(' ') 84 for x in map(ord, _whitespace): 85 unicode_whitespace_trans[x] = uspace 86 87 # This funky little regex is just the trick for splitting 88 # text up into word-wrappable chunks. E.g. 89 # "Hello there -- you goof-ball, use the -b option!" 90 # splits into 91 # Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option! 92 # (after stripping out empty strings). 93 try: 94 wordsep_re = re.compile(r'(\s+|' # any whitespace 95 r'[^\s\w]*\w{2,}-(?=\w{2,})|' # hyphenated words 96 r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash 97 except re.error: 98 # Pre-2.0 Python versions don't have the (?<= negative look-behind 99 # assertion. It mostly doesn't matter for the simple input 100 # SCons is going to give it, so just leave it out. 101 wordsep_re = re.compile(r'(\s+|' # any whitespace 102 r'-*\w{2,}-(?=\w{2,}))') # hyphenated words 103 104 # XXX will there be a locale-or-charset-aware version of 105 # string.lowercase in 2.3? 106 sentence_end_re = re.compile(r'[%s]' # lowercase letter 107 r'[\.\!\?]' # sentence-ending punct. 108 r'[\"\']?' # optional end-of-quote 109 % string.lowercase) 110 111

112 - def __init__(self, 113 width=70, 114 initial_indent="", 115 subsequent_indent="", 116 expand_tabs=True, 117 replace_whitespace=True, 118 fix_sentence_endings=False, 119 break_long_words=True):

120 self.width = width 121 self.initial_indent = initial_indent 122 self.subsequent_indent = subsequent_indent 123 self.expand_tabs = expand_tabs 124 self.replace_whitespace = replace_whitespace 125 self.fix_sentence_endings = fix_sentence_endings 126 self.break_long_words = break_long_words

127 128 129 # -- Private methods ----------------------------------------------- 130 # (possibly useful for subclasses to override) 131

132 - def _munge_whitespace(self, text):

133 """_munge_whitespace(text : string) -> string 134 135 Munge whitespace in text: expand tabs and convert all other 136 whitespace characters to spaces. Eg. " foo\tbar\n\nbaz" 137 becomes " foo bar baz". 138 """ 139 if self.expand_tabs: 140 text = string.expandtabs(text) 141 if self.replace_whitespace: 142 if type(text) == type(''): 143 text = string.translate(text, self.whitespace_trans) 144 elif isinstance(text, unicode): 145 text = string.translate(text, self.unicode_whitespace_trans) 146 return text

147 148

149 - def _split(self, text):

150 """_split(text : string) -> [string] 151 152 Split the text to wrap into indivisible chunks. Chunks are 153 not quite the same as words; see wrap_chunks() for full 154 details. As an example, the text 155 Look, goof-ball -- use the -b option! 156 breaks into the following chunks: 157 'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ', 158 'use', ' ', 'the', ' ', '-b', ' ', 'option!' 159 """ 160 chunks = self.wordsep_re.split(text) 161 chunks = filter(None, chunks) 162 return chunks

163

164 - def _fix_sentence_endings(self, chunks):

165 """_fix_sentence_endings(chunks : [string]) 166 167 Correct for sentence endings buried in 'chunks'. Eg. when the 168 original text contains "... foo.\nBar ...", munge_whitespace() 169 and split() will convert that to [..., "foo.", " ", "Bar", ...] 170 which has one too few spaces; this method simply changes the one 171 space to two. 172 """ 173 i = 0 174 pat = self.sentence_end_re 175 while i < len(chunks)-1: 176 if chunks[i+1] == " " and pat.search(chunks[i]): 177 chunks[i+1] = " " 178 i = i + 2 179 else: 180 i = i + 1

181

182 - def _handle_long_word(self, chunks, cur_line, cur_len, width):

183 """_handle_long_word(chunks : [string], 184 cur_line : [string], 185 cur_len : int, width : int) 186 187 Handle a chunk of text (most likely a word, not whitespace) that 188 is too long to fit in any line. 189 """ 190 space_left = max(width - cur_len, 1) 191 192 # If we're allowed to break long words, then do so: put as much 193 # of the next chunk onto the current line as will fit. 194 if self.break_long_words: 195 cur_line.append(chunks[0][0:space_left]) 196 chunks[0] = chunks[0][space_left:] 197 198 # Otherwise, we have to preserve the long word intact. Only add 199 # it to the current line if there's nothing already there -- 200 # that minimizes how much we violate the width constraint. 201 elif not cur_line: 202 cur_line.append(chunks.pop(0))

203 204 # If we're not allowed to break long words, and there's already 205 # text on the current line, do nothing. Next time through the 206 # main loop of _wrap_chunks(), we'll wind up here again, but 207 # cur_len will be zero, so the next line will be entirely 208 # devoted to the long word that we can't handle right now. 209

210 - def _wrap_chunks(self, chunks):

211 """_wrap_chunks(chunks : [string]) -> [string] 212 213 Wrap a sequence of text chunks and return a list of lines of 214 length 'self.width' or less. (If 'break_long_words' is false, 215 some lines may be longer than this.) Chunks correspond roughly 216 to words and the whitespace between them: each chunk is 217 indivisible (modulo 'break_long_words'), but a line break can 218 come between any two chunks. Chunks should not have internal 219 whitespace; ie. a chunk is either all whitespace or a "word". 220 Whitespace chunks will be removed from the beginning and end of 221 lines, but apart from that whitespace is preserved. 222 """ 223 lines = [] 224 if self.width <= 0: 225 raise ValueError("invalid width %r (must be > 0)" % self.width) 226 227 while chunks: 228 229 # Start the list of chunks that will make up the current line. 230 # cur_len is just the length of all the chunks in cur_line. 231 cur_line = [] 232 cur_len = 0 233 234 # Figure out which static string will prefix this line. 235 if lines: 236 indent = self.subsequent_indent 237 else: 238 indent = self.initial_indent 239 240 # Maximum width for this line. 241 width = self.width - len(indent) 242 243 # First chunk on line is whitespace -- drop it, unless this 244 # is the very beginning of the text (ie. no lines started yet). 245 if string.strip(chunks[0]) == '' and lines: 246 del chunks[0] 247 248 while chunks: 249 l = len(chunks[0]) 250 251 # Can at least squeeze this chunk onto the current line. 252 if cur_len + l <= width: 253 cur_line.append(chunks.pop(0)) 254 cur_len = cur_len + l 255 256 # Nope, this line is full. 257 else: 258 break 259 260 # The current line is full, and the next chunk is too big to 261 # fit on *any* line (not just this one). 262 if chunks and len(chunks[0]) > width: 263 self._handle_long_word(chunks, cur_line, cur_len, width) 264 265 # If the last chunk on this line is all whitespace, drop it. 266 if cur_line and string.strip(cur_line[-1]) == '': 267 del cur_line[-1] 268 269 # Convert current line back to a string and store it in list 270 # of all lines (return value). 271 if cur_line: 272 lines.append(indent + string.join(cur_line, '')) 273 274 return lines

275 276 277 # -- Public interface ---------------------------------------------- 278

279 - def wrap(self, text):

280 """wrap(text : string) -> [string] 281 282 Reformat the single paragraph in 'text' so it fits in lines of 283 no more than 'self.width' columns, and return a list of wrapped 284 lines. Tabs in 'text' are expanded with string.expandtabs(), 285 and all other whitespace characters (including newline) are 286 converted to space. 287 """ 288 text = self._munge_whitespace(text) 289 indent = self.initial_indent 290 chunks = self._split(text) 291 if self.fix_sentence_endings: 292 self._fix_sentence_endings(chunks) 293 return self._wrap_chunks(chunks)

294

295 - def fill(self, text):

296 """fill(text : string) -> string 297 298 Reformat the single paragraph in 'text' to fit in lines of no 299 more than 'self.width' columns, and return a new string 300 containing the entire wrapped paragraph. 301 """ 302 return string.join(self.wrap(text), "\n")

Source Code for Module SCons.compat._scons_textwrap