Package SCons :: Package compat :: Module _scons_shlex
[hide private]
[frames] | no frames]

Source Code for Module SCons.compat._scons_shlex

  1  # -*- coding: iso-8859-1 -*- 
  2  """A lexical analyzer class for simple shell-like syntaxes.""" 
  3   
  4  # Module and documentation by Eric S. Raymond, 21 Dec 1998 
  5  # Input stacking and error message cleanup added by ESR, March 2000 
  6  # push_source() and pop_source() made explicit by ESR, January 2001. 
  7  # Posix compliance, split(), string arguments, and 
  8  # iterator interface by Gustavo Niemeyer, April 2003. 
  9   
 10  import os.path 
 11  import sys 
 12  #from collections import deque 
 13   
14 -class deque:
15 - def __init__(self):
16 self.data = []
17 - def __len__(self):
18 return len(self.data)
19 - def appendleft(self, item):
20 self.data.insert(0, item)
21 - def popleft(self):
22 return self.data.pop(0)
23 24 try: 25 basestring 26 except NameError: 27 import types
28 - def is_basestring(s):
29 return type(s) is types.StringType
30 else:
31 - def is_basestring(s):
32 return isinstance(s, basestring)
33 34 try: 35 from cStringIO import StringIO 36 except ImportError: 37 from StringIO import StringIO 38 39 __all__ = ["shlex", "split"] 40
41 -class shlex:
42 "A lexical analyzer class for simple shell-like syntaxes."
43 - def __init__(self, instream=None, infile=None, posix=False):
44 if is_basestring(instream): 45 instream = StringIO(instream) 46 if instream is not None: 47 self.instream = instream 48 self.infile = infile 49 else: 50 self.instream = sys.stdin 51 self.infile = None 52 self.posix = posix 53 if posix: 54 self.eof = None 55 else: 56 self.eof = '' 57 self.commenters = '#' 58 self.wordchars = ('abcdfeghijklmnopqrstuvwxyz' 59 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_') 60 if self.posix: 61 self.wordchars = self.wordchars + ('ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ' 62 'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ') 63 self.whitespace = ' \t\r\n' 64 self.whitespace_split = False 65 self.quotes = '\'"' 66 self.escape = '\\' 67 self.escapedquotes = '"' 68 self.state = ' ' 69 self.pushback = deque() 70 self.lineno = 1 71 self.debug = 0 72 self.token = '' 73 self.filestack = deque() 74 self.source = None 75 if self.debug: 76 print 'shlex: reading from %s, line %d' \ 77 % (self.instream, self.lineno)
78
79 - def push_token(self, tok):
80 "Push a token onto the stack popped by the get_token method" 81 if self.debug >= 1: 82 print "shlex: pushing token " + repr(tok) 83 self.pushback.appendleft(tok)
84
85 - def push_source(self, newstream, newfile=None):
86 "Push an input source onto the lexer's input source stack." 87 if is_basestring(newstream): 88 newstream = StringIO(newstream) 89 self.filestack.appendleft((self.infile, self.instream, self.lineno)) 90 self.infile = newfile 91 self.instream = newstream 92 self.lineno = 1 93 if self.debug: 94 if newfile is not None: 95 print 'shlex: pushing to file %s' % (self.infile,) 96 else: 97 print 'shlex: pushing to stream %s' % (self.instream,)
98
99 - def pop_source(self):
100 "Pop the input source stack." 101 self.instream.close() 102 (self.infile, self.instream, self.lineno) = self.filestack.popleft() 103 if self.debug: 104 print 'shlex: popping to %s, line %d' \ 105 % (self.instream, self.lineno) 106 self.state = ' '
107
108 - def get_token(self):
109 "Get a token from the input stream (or from stack if it's nonempty)" 110 if self.pushback: 111 tok = self.pushback.popleft() 112 if self.debug >= 1: 113 print "shlex: popping token " + repr(tok) 114 return tok 115 # No pushback. Get a token. 116 raw = self.read_token() 117 # Handle inclusions 118 if self.source is not None: 119 while raw == self.source: 120 spec = self.sourcehook(self.read_token()) 121 if spec: 122 (newfile, newstream) = spec 123 self.push_source(newstream, newfile) 124 raw = self.get_token() 125 # Maybe we got EOF instead? 126 while raw == self.eof: 127 if not self.filestack: 128 return self.eof 129 else: 130 self.pop_source() 131 raw = self.get_token() 132 # Neither inclusion nor EOF 133 if self.debug >= 1: 134 if raw != self.eof: 135 print "shlex: token=" + repr(raw) 136 else: 137 print "shlex: token=EOF" 138 return raw
139
140 - def read_token(self):
141 quoted = False 142 escapedstate = ' ' 143 while True: 144 nextchar = self.instream.read(1) 145 if nextchar == '\n': 146 self.lineno = self.lineno + 1 147 if self.debug >= 3: 148 print "shlex: in state", repr(self.state), \ 149 "I see character:", repr(nextchar) 150 if self.state is None: 151 self.token = '' # past end of file 152 break 153 elif self.state == ' ': 154 if not nextchar: 155 self.state = None # end of file 156 break 157 elif nextchar in self.whitespace: 158 if self.debug >= 2: 159 print "shlex: I see whitespace in whitespace state" 160 if self.token or (self.posix and quoted): 161 break # emit current token 162 else: 163 continue 164 elif nextchar in self.commenters: 165 self.instream.readline() 166 self.lineno = self.lineno + 1 167 elif self.posix and nextchar in self.escape: 168 escapedstate = 'a' 169 self.state = nextchar 170 elif nextchar in self.wordchars: 171 self.token = nextchar 172 self.state = 'a' 173 elif nextchar in self.quotes: 174 if not self.posix: 175 self.token = nextchar 176 self.state = nextchar 177 elif self.whitespace_split: 178 self.token = nextchar 179 self.state = 'a' 180 else: 181 self.token = nextchar 182 if self.token or (self.posix and quoted): 183 break # emit current token 184 else: 185 continue 186 elif self.state in self.quotes: 187 quoted = True 188 if not nextchar: # end of file 189 if self.debug >= 2: 190 print "shlex: I see EOF in quotes state" 191 # XXX what error should be raised here? 192 raise ValueError, "No closing quotation" 193 if nextchar == self.state: 194 if not self.posix: 195 self.token = self.token + nextchar 196 self.state = ' ' 197 break 198 else: 199 self.state = 'a' 200 elif self.posix and nextchar in self.escape and \ 201 self.state in self.escapedquotes: 202 escapedstate = self.state 203 self.state = nextchar 204 else: 205 self.token = self.token + nextchar 206 elif self.state in self.escape: 207 if not nextchar: # end of file 208 if self.debug >= 2: 209 print "shlex: I see EOF in escape state" 210 # XXX what error should be raised here? 211 raise ValueError, "No escaped character" 212 # In posix shells, only the quote itself or the escape 213 # character may be escaped within quotes. 214 if escapedstate in self.quotes and \ 215 nextchar != self.state and nextchar != escapedstate: 216 self.token = self.token + self.state 217 self.token = self.token + nextchar 218 self.state = escapedstate 219 elif self.state == 'a': 220 if not nextchar: 221 self.state = None # end of file 222 break 223 elif nextchar in self.whitespace: 224 if self.debug >= 2: 225 print "shlex: I see whitespace in word state" 226 self.state = ' ' 227 if self.token or (self.posix and quoted): 228 break # emit current token 229 else: 230 continue 231 elif nextchar in self.commenters: 232 self.instream.readline() 233 self.lineno = self.lineno + 1 234 if self.posix: 235 self.state = ' ' 236 if self.token or (self.posix and quoted): 237 break # emit current token 238 else: 239 continue 240 elif self.posix and nextchar in self.quotes: 241 self.state = nextchar 242 elif self.posix and nextchar in self.escape: 243 escapedstate = 'a' 244 self.state = nextchar 245 elif nextchar in self.wordchars or nextchar in self.quotes \ 246 or self.whitespace_split: 247 self.token = self.token + nextchar 248 else: 249 self.pushback.appendleft(nextchar) 250 if self.debug >= 2: 251 print "shlex: I see punctuation in word state" 252 self.state = ' ' 253 if self.token: 254 break # emit current token 255 else: 256 continue 257 result = self.token 258 self.token = '' 259 if self.posix and not quoted and result == '': 260 result = None 261 if self.debug > 1: 262 if result: 263 print "shlex: raw token=" + repr(result) 264 else: 265 print "shlex: raw token=EOF" 266 return result
267
268 - def sourcehook(self, newfile):
269 "Hook called on a filename to be sourced." 270 if newfile[0] == '"': 271 newfile = newfile[1:-1] 272 # This implements cpp-like semantics for relative-path inclusion. 273 if is_basestring(self.infile) and not os.path.isabs(newfile): 274 newfile = os.path.join(os.path.dirname(self.infile), newfile) 275 return (newfile, open(newfile, "r"))
276
277 - def error_leader(self, infile=None, lineno=None):
278 "Emit a C-compiler-like, Emacs-friendly error-message leader." 279 if infile is None: 280 infile = self.infile 281 if lineno is None: 282 lineno = self.lineno 283 return "\"%s\", line %d: " % (infile, lineno)
284
285 - def __iter__(self):
286 return self
287
288 - def next(self):
289 token = self.get_token() 290 if token == self.eof: 291 raise StopIteration 292 return token
293
294 -def split(s, comments=False):
295 lex = shlex(s, posix=True) 296 lex.whitespace_split = True 297 if not comments: 298 lex.commenters = '' 299 #return list(lex) 300 result = [] 301 while True: 302 token = lex.get_token() 303 if token == lex.eof: 304 break 305 result.append(token) 306 return result
307 308 if __name__ == '__main__': 309 if len(sys.argv) == 1: 310 lexer = shlex() 311 else: 312 file = sys.argv[1] 313 lexer = shlex(open(file), file) 314 while 1: 315 tt = lexer.get_token() 316 if tt: 317 print "Token: " + repr(tt) 318 else: 319 break 320 321 # Local Variables: 322 # tab-width:4 323 # indent-tabs-mode:nil 324 # End: 325 # vim: set expandtab tabstop=4 shiftwidth=4: 326