## Begin parser from string import * from yapps import SyntaxError, NoMoreTokens from yapps import Scanner class ParserDescScanner(Scanner): def __init__(self, str): Scanner.__init__(self,[ ('"ignore"', 'ignore'), ('"token"', 'token'), ('"->"', '->'), ('"rule"', 'rule'), ('"|"', '|'), ('"parser"', 'parser'), ('":"', ':'), ('"option"', 'option'), ('"[ \\t\\n\\r]+"', '[ \011\012\015]+'), ('END', '$'), ('ATTR', '<<\\([^>]+\\|>[^>]\\)*>>'), ('ID', '[a-zA-Z_][a-zA-Z_0-9]*'), ('STR', '\\("\\([^\\"]+\\|\\\\.\\)*"\\)\\|\\(\'\\([^\\\']+\\|\\\\.\\)*\'\\)'), ], ['"[ \\t\\n\\r]+"'], str) class ParserDesc: def __init__(self, scanner): self.scanner = scanner def token(self, _pos_, type): tok = self.scanner.token(_pos_) if tok[2] != type: raise SyntaxError(tok[0], 'Trying to find '+type) return tok[3] def Options(self,_pos_, v): while 1: _start_,_,_token_,_text_ = self.scanner.token(_pos_) if _token_ == '"option"': self.token(1+_pos_,'":"') STR = self.token(2+_pos_,'STR') _pos_ = 3+_pos_ v = append(v,eval(STR)) elif _token_ in ['"token"', '"ignore"', '"rule"', 'END']: return v, _pos_ else: raise SyntaxError(_start_, 'Could not match Options') def Subs(self,_pos_, u,v): while 1: _start_,_,_token_,_text_ = self.scanner.token(_pos_) if _token_ == 'STR': STR = _text_ _pos_ = 1+_pos_ u,v = append(u,STR), append(v,'') elif _token_ == 'ID': ID = _text_ OptParam,_pos_ = self.OptParam(1+_pos_) u,v = append(u,ID), append(v,OptParam) elif _token_ == '"->"': return ((u, v)), _pos_ else: raise SyntaxError(_start_, 'Could not match Subs') def Parser(self,_pos_=0): _start_,_,_token_,_text_ = self.scanner.token(_pos_) ID = self.token(1+_pos_,'ID') self.token(2+_pos_,'":"') Options,_pos_ = self.Options(3+_pos_, []) Tokens,_pos_ = self.Tokens(_pos_, []) Rules,_pos_ = self.Rules(_pos_, []) self.token(_pos_,'END') return ((ID,Options,Tokens,Rules)), 1+_pos_ def OptParam(self,_pos_=0): _start_,_,_token_,_text_ = self.scanner.token(_pos_) if _token_ == 'ATTR': return self.Attr(_pos_) elif _token_ in ['STR', 'ID', '":"', '"->"']: return '', _pos_ else: raise SyntaxError(_start_, 'Could not match OptParam') def Clauses(self,_pos_, v): while 1: _start_,_,_token_,_text_ = self.scanner.token(_pos_) if _token_ == '"|"': Clause,_pos_ = self.Clause(1+_pos_) v = append(v,Clause) elif _token_ in ['"rule"', 'END']: return v, _pos_ else: raise SyntaxError(_start_, 'Could not match Clauses') def Rules(self,_pos_, v): while 1: _start_,_,_token_,_text_ = self.scanner.token(_pos_) if _token_ == '"rule"': ID = self.token(1+_pos_,'ID') OptParam,_pos_ = self.OptParam(2+_pos_) self.token(_pos_,'":"') Clause,_pos_ = self.Clause(1+_pos_) Clauses,_pos_ = self.Clauses(_pos_, [Clause]) v = append(v,(ID,OptParam,Clauses)) elif _token_ == 'END': return v, _pos_ else: raise SyntaxError(_start_, 'Could not match Rules') def Clause(self,_pos_=0): _start_,_,_token_,_text_ = self.scanner.token(_pos_) Subs,_pos_ = self.Subs(_pos_, [],[]) self.token(_pos_,'"->"') Attr,_pos_ = self.Attr(1+_pos_) return (Clause(Subs[0], Subs[1], Attr)), _pos_ def Attr(self,_pos_=0): _start_,_,_token_,_text_ = self.scanner.token(_pos_) ATTR = _text_ return (ATTR[2:-2]), 1+_pos_ def Tokens(self,_pos_, v): while 1: _start_,_,_token_,_text_ = self.scanner.token(_pos_) if _token_ == '"token"': ID = self.token(1+_pos_,'ID') self.token(2+_pos_,'":"') STR = self.token(3+_pos_,'STR') _pos_ = 4+_pos_ v = append(v,(ID,STR)) elif _token_ == '"ignore"': self.token(1+_pos_,'":"') STR = self.token(2+_pos_,'STR') _pos_ = 3+_pos_ v = append(v,('#ignore',STR)) elif _token_ in ['"rule"', 'END']: return v, _pos_ else: raise SyntaxError(_start_, 'Could not match Tokens') def parse(rule, input): parser = ParserDesc(ParserDescScanner(input)) try: return getattr(parser, rule)()[0] except SyntaxError, s: try: from yapps import print_error print_error(input, s, parser.scanner) except ImportError: print 'Syntax Error',s.msg,'on line',1+count(input[:s.pos], '\n') except NoMoreTokens: print 'Ran out of input'