4 # - better checks before starting to parse (ex: no double spaces ?)
5 # - better error messages
6 # - document expressions
8 # - reduce binary tree in tree
10 # - better comments and names (operator is valid for '(' ?)
13 # - tests only if main
14 # - "not" operator? useful?
15 # - we could completely remove all_ops
19 def debug(*args, **kvargs):
21 print(*args, **kvargs)
23 class PycolExprParser(object):
24 def __init__(self, binary_ops = None, unary_ops = None,
25 group_ops = None, var_chars = None):
26 if binary_ops == None:
28 self.binary_ops = [ "|", "/", " " ]
30 self.binary_ops = binary_ops
34 self.unary_ops = unary_ops
36 self.group_ops = { "(" : ")", "[" : "]" }
38 self.group_ops = group_ops
40 self.var_chars = "<>-_abcdefghijklmnopqrstuvwxyz" + \
41 "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
43 self.var_chars = var_chars
46 self.all_ops += self.binary_ops
47 self.all_ops += self.unary_ops
48 self.all_ops += self.group_ops.keys()
49 self.all_ops += self.group_ops.values()
51 # return operator priority (high value = high priority)
52 def op_prio(self, op):
53 assert(isinstance(op, str))
54 if not op in self.binary_ops:
56 return self.binary_ops.index(op) + 1
58 # The argument tokens is an list of tokens that must start with an
59 # opening group operator, for instance an opening bracket '('. This
60 # function returns the list of tokens representing the inbound
61 # expression without the first opening operator and its corresponding
63 def get_group(self, tokens):
64 debug(tokens, self.group_ops.keys())
65 assert(tokens[0] in self.group_ops.keys())
68 for i, t in enumerate(tokens):
70 if t in self.group_ops.keys():
72 elif t == self.group_ops[stack[-1]]:
76 assert(len(stack) == 0)
80 # convert a string into a list of tokens
81 def tokenize(self, s):
82 debug("tokenize(%s)"%(s))
85 # try to parse an operator
87 for op in self.all_ops:
95 # try to parse a variable name
98 if c in self.var_chars:
104 debug("invalid token")
106 tokens.append(s[:length])
108 debug("tokenize() return %s"%(tokens))
111 def binary_parse(self, tokens):
115 while len(tokens) > 0:
117 exp = PycolBinExpr(t, self)
118 debug("token '%s'"%t)
120 if t in self.binary_ops:
121 # if it is the first node, error
123 debug("expression starts with a binary operator")
126 # the rightest leaf must be a variable
128 while tmp.right != None:
131 debug("the rightest leaf must be a variable")
134 # if new node priority is higher than root node, it
135 # becomes the new root
136 if self.op_prio(t) > self.op_prio(top.op):
139 # else, exp is placed at the "rightest" leaf. Check
143 while tmp.right != None and \
144 self.op_prio(exp.op) < self.op_prio(tmp.right.op):
147 if tmp.right == None:
148 debug("invalid right node")
154 elif t in self.group_ops:
155 # if operator is an opening bracket, recursively call
156 # ourself with the sub-expression
157 gr = self.get_group([t] + tokens)
158 exp.right = self.binary_parse(gr[:])
159 if exp.right == None:
161 tokens = tokens[len(gr) + 1:]
162 debug("subexp parsed, tokens=%s"%tokens)
164 # if it is the first node
169 # exp is placed at the "rightest" leaf. Check
170 # that our parent is not a variable
172 while tmp.right != None:
175 debug("the rightest leaf must not be a variable")
180 else: # unary operator or variable
181 debug("unary operator or variable")
183 # if it is the first node
188 # exp is placed at the "rightest" leaf. Check
189 # that our parent is not a variable
191 while tmp.right != None:
194 debug("the rightest leaf must not be a variable")
203 # the rightest leaf must be a variable
205 while tmp.right != None:
208 debug("the rightest leaf must be a variable")
213 def parse(self, tokens):
214 bin_expr = self.binary_parse(tokens)
215 expr = PycolExpr(bin_expr)
219 class PycolBinExpr(object):
220 def __init__(self, op, parser):
221 debug("PycolBinExpr(%s)"%op)
227 def dump(self, level=0):
228 print("vertice %d %s@0x%x"%(level, self.op, self.__hash__()))
230 self.left.dump(level+1)
232 self.right.dump(level+1)
235 # it's a variable name
239 # dump left expression if it's a binary operator (in case of
240 # unary operator, we only use the right child)
241 if self.op in self.parser.binary_ops:
242 return "%s%s%s"%(self.left.to_str(), self.op, self.right.to_str())
245 if self.op in self.parser.group_ops.keys():
246 return "%s%s%s"%(self.op, self.right.to_str(),
247 self.parser.group_ops[self.op])
250 return "%s%s"%(self.op, self.right)
253 return not self.op in self.parser.all_ops
255 class PycolExpr(object):
256 def __init__(self, bin_expr):
257 self.op = bin_expr.op
258 self.parser = bin_expr.parser
260 if bin_expr.left != None:
261 self.children.append(PycolExpr(bin_expr.left))
262 if bin_expr.right != None:
263 self.children.append(PycolExpr(bin_expr.right))
265 def dump(self, level=0):
266 print("vertice %d %s@0x%x"%(level, self.op, self.__hash__()))
267 for c in self.children:
271 # it's a variable name
277 if self.op in self.parser.binary_ops:
278 for i, c in enumerate(self.children):
280 s += "%s"%(c.to_str())
282 s += "%s%s"%(self.op, c.to_str())
286 if self.op in self.parser.group_ops.keys():
287 return "%s%s%s"%(self.op, self.children[0].to_str(),
288 self.parser.group_ops[self.op])
291 return "%s%s"%(self.op, self.children[0])
294 return not self.op in self.parser.all_ops
297 for c in self.children:
300 for c in self.children:
302 children += c.children
305 self.children = children
307 if __name__ == '__main__':
308 s = "[opt1/opt2] (<desdes>|a|(b|c)|[x]) (no bidule)|coin"
312 parser = PycolExprParser()
313 tokens = parser.tokenize(s)
314 e = parser.parse(tokens)
321 #print(get_group(tokenize("(fr fr (fr<3>)(xe)) sss")))
322 #print(get_group(tokenize("<(fr fr> (fr<3>)(xe))> sss")))