#!/usr/bin/python3

# todo
# - better checks before starting to parse (ex: no double spaces ?)
# - better error messages
# - document expressions
# - unit tests
# - reduce binary tree in tree
# - handle quotes?
# - better comments and names (operator is valid for '(' ?)
# - ignore chars
# - pylint
# - tests only if main
# - "not" operator? useful?
# - we could completely remove all_ops

DEBUG = False

def debug(*args, **kvargs):
    if DEBUG:
        print(*args, **kvargs)

class PycolExprParser(object):
    def __init__(self, binary_ops = None, unary_ops = None,
                 group_ops = None, var_chars = None):
        if binary_ops == None:
            # ordered by priority
            self.binary_ops = [ "|", "/", " " ]
        else:
            self.binary_ops = binary_ops
        if unary_ops == None:
            self.unary_ops = [ ]
        else:
            self.unary_ops = unary_ops
        if group_ops == None:
            self.group_ops = { "(" : ")", "[" : "]" }
        else:
            self.group_ops = group_ops
        if var_chars == None:
            self.var_chars = "<>-_abcdefghijklmnopqrstuvwxyz" + \
                             "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
        else:
            self.var_chars = var_chars

        self.all_ops = []
        self.all_ops += self.binary_ops
        self.all_ops += self.unary_ops
        self.all_ops += self.group_ops.keys()
        self.all_ops += self.group_ops.values()

    # return operator priority (high value = high priority)
    def op_prio(self, op):
        assert(isinstance(op, str))
        if not op in self.binary_ops:
            return 0
        return self.binary_ops.index(op) + 1

    # The argument tokens is an list of tokens that must start with an
    # opening group operator, for instance an opening bracket '('. This
    # function returns the list of tokens representing the inbound
    # expression without the first opening operator and its corresponding
    # closing operator
    def get_group(self, tokens):
        debug(tokens, self.group_ops.keys())
        assert(tokens[0] in self.group_ops.keys())
        stack = []

        for i, t in enumerate(tokens):
            debug((i,t))
            if t in self.group_ops.keys():
                stack.append(t)
            elif t == self.group_ops[stack[-1]]:
                stack.pop()
                if len(stack) == 0:
                    break
        assert(len(stack) == 0)
        debug(tokens[1:i])
        return tokens[1:i]

    # convert a string into a list of tokens
    def tokenize(self, s):
        debug("tokenize(%s)"%(s))
        tokens = []
        while s != "":
            # try to parse an operator
            op_found = False
            for op in self.all_ops:
                if s.startswith(op):
                    tokens.append(op)
                    s = s[len(op):]
                    op_found = True
                    break
            if op_found:
                continue
            # try to parse a variable name
            length = 0
            for c in s:
                if c in self.var_chars:
                    length += 1
                else:
                    break
            if length == 0:
                debug(tokens)
                debug("invalid token")
                excp()
            tokens.append(s[:length])
            s = s[length:]
        debug("tokenize() return %s"%(tokens))
        return tokens

    def binary_parse(self, tokens):
        top = None
        exp = None
        debug(tokens)
        while len(tokens) > 0:
            t = tokens.pop(0)
            exp = PycolBinExpr(t, self)
            debug("token '%s'"%t)

            if t in self.binary_ops:
                # if it is the first node, error
                if top == None:
                    debug("expression starts with a binary operator")
                    exce()

                # the rightest leaf must be a variable
                tmp = top
                while tmp.right != None:
                    tmp = tmp.right
                if not tmp.is_var():
                    debug("the rightest leaf must be a variable")
                    exce()

                # if new node priority is higher than root node, it
                # becomes the new root
                if self.op_prio(t) > self.op_prio(top.op):
                    exp.left = top
                    top = exp
                # else, exp is placed at the "rightest" leaf. Check
                # that blabla XXX
                else:
                    tmp = top
                    while tmp.right != None and \
                          self.op_prio(exp.op) < self.op_prio(tmp.right.op):
                        tmp = tmp.right

                    if tmp.right == None:
                        debug("invalid right node")
                        exce()

                    exp.left = tmp.right
                    tmp.right = exp

            elif t in self.group_ops:
                # if operator is an opening bracket, recursively call
                # ourself with the sub-expression
                gr = self.get_group([t] + tokens)
                exp.right = self.binary_parse(gr[:])
                if exp.right == None:
                    exce()
                tokens = tokens[len(gr) + 1:]
                debug("subexp parsed, tokens=%s"%tokens)

                # if it is the first node
                if top == None:
                    top = exp;
                    continue

                # exp is placed at the "rightest" leaf. Check
                # that our parent is not a variable
                tmp = top
                while tmp.right != None:
                    tmp = tmp.right
                if tmp.is_var():
                    debug("the rightest leaf must not be a variable")
                    exce()

                tmp.right = exp

            else: # unary operator or variable
                debug("unary operator or variable")

                # if it is the first node
                if top == None:
                    top = exp;
                    continue

                # exp is placed at the "rightest" leaf. Check
                # that our parent is not a variable
                tmp = top
                while tmp.right != None:
                    tmp = tmp.right
                if tmp.is_var():
                    debug("the rightest leaf must not be a variable")
                    exce()

                tmp.right = exp

        if top == None:
            debug("no top node")
            exce()

        # the rightest leaf must be a variable
        tmp = top
        while tmp.right != None:
            tmp = tmp.right
        if not tmp.is_var():
            debug("the rightest leaf must be a variable")
            exce()

        return top

    def parse(self, tokens):
        bin_expr = self.binary_parse(tokens)
        expr = PycolExpr(bin_expr)
        expr.reduce()
        return expr

class PycolBinExpr(object):
    def __init__(self, op, parser):
        debug("PycolBinExpr(%s)"%op)
        self.op = op
        self.parser = parser
        self.left = None
        self.right = None

    def dump(self, level=0):
        print("vertice %d %s@0x%x"%(level, self.op, self.__hash__()))
        if self.left:
            self.left.dump(level+1)
        if self.right:
            self.right.dump(level+1)

    def to_str(self):
        # it's a variable name
        if self.is_var():
            return self.op
        s = ""
        # dump left expression if it's a binary operator (in case of
        # unary operator, we only use the right child)
        if self.op in self.parser.binary_ops:
            return "%s%s%s"%(self.left.to_str(), self.op, self.right.to_str())

        # group operator
        if self.op in self.parser.group_ops.keys():
            return "%s%s%s"%(self.op, self.right.to_str(),
                             self.parser.group_ops[self.op])

        # unary
        return "%s%s"%(self.op, self.right)

    def is_var(self):
        return not self.op in self.parser.all_ops

class PycolExpr(object):
    def __init__(self, bin_expr):
        self.op = bin_expr.op
        self.parser = bin_expr.parser
        self.children = []
        if bin_expr.left != None:
            self.children.append(PycolExpr(bin_expr.left))
        if bin_expr.right != None:
            self.children.append(PycolExpr(bin_expr.right))

    def dump(self, level=0):
        print("vertice %d %s@0x%x"%(level, self.op, self.__hash__()))
        for c in self.children:
            c.dump(level+1)

    def to_str(self):
        # it's a variable name
        if self.is_var():
            return self.op
        s = ""

        # binary operators
        if self.op in self.parser.binary_ops:
            for i, c in enumerate(self.children):
                if i == 0:
                    s += "%s"%(c.to_str())
                else:
                    s += "%s%s"%(self.op, c.to_str())
            return s

        # group operator
        if self.op in self.parser.group_ops.keys():
            return "%s%s%s"%(self.op, self.children[0].to_str(),
                             self.parser.group_ops[self.op])

        # unary
        return "%s%s"%(self.op, self.children[0])

    def is_var(self):
        return not self.op in self.parser.all_ops

    def reduce(self):
        for c in self.children:
            c.reduce()
        children = []
        for c in self.children:
            if c.op == self.op:
                children += c.children
            else:
                children += [c]
        self.children = children

if __name__ == '__main__':
    s = "[opt1/opt2] (<desdes>|a|(b|c)|[x]) (no bidule)|coin"
    #s = "x|y"
    #tokenize(s)

    parser = PycolExprParser()
    tokens = parser.tokenize(s)
    e = parser.parse(tokens)
    print(e.dump())
    print(e.to_str())
    e.reduce()
    print(e.dump())
    print(e.to_str())

    #print(get_group(tokenize("(fr fr (fr<3>)(xe)) sss")))
    #print(get_group(tokenize("<(fr fr> (fr<3>)(xe))> sss")))

