# -*- coding: UTF-8 -*- import re import sys def make_re (s) : # So there is no need to import re in the calling module. return re.compile(s, re.U) ### Some useful regular expressions re_whitespaces_star = make_re(u"\s*") re_whitespaces_plus = make_re(u"\s+") re_non_whitespaces_star = make_re(u"\S*") re_non_whitespaces_plus = make_re(u"\S+") re_words_plus = make_re(u"[a-zA-Z]+") re_alphanumerics_plus = make_re(u"\w+") re_open_bracket = make_re(u"\(") re_close_bracket = make_re(u"\)") re_open_square = make_re(u"\[") re_close_square = make_re(u"\]") re_open_angle = make_re(u"<") re_close_angle = make_re(u">") re_single_quote = make_re(u"'") re_double_quote = make_re(u'\"') re_comma = make_re(u",") re_dot = make_re(u"\.") re_ampersand = make_re(u"&") re_percent = make_re(u"%") re_hash = make_re(u"#") re_equal_sign = make_re(u"=") re_forward_slash = make_re(u"/") # re_backward_slash??? re_any_but_double_quote = make_re(u'[^\"]*') re_integer = make_re(u"[0-9]+") re_float = make_re(u"([+-]*[0-9]*\.)?[0-9]+(e[+-][0-9]+)?") ### Exceptions class DigestException (Exception) : pass ### Functions def digest (regexp, inp, pos, exception_on_fail=False) : # Returns a tuple (unicode, pos) # Typical use: s, pos = digest (regexp, inp, pos) match = regexp.match(inp, pos) if match == None : if exception_on_fail : before = unicode(inp[max(0, pos-20):pos]) after = unicode(inp[pos:min(len(inp), pos+20)]) raise DigestException(before, after) else : return (u"", pos) else : pos2 = match.end() return (inp[pos:pos2], pos2) def discard (regexp, inp, pos, exception_on_fail=False) : # Returns an integer # Typical use: pos = discard (regexp, inp, pos) match = regexp.match(inp, pos) if match == None : if exception_on_fail : before = unicode(inp[max(0, pos-20):pos]) after = unicode(inp[pos:min(len(inp), pos+20)]) raise DigestException(before, after) else : print "No match." return pos else : return match.end()