import re

class node(object):
    parent = None
    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)

    def __repr__(self):
        return '<%s node>' % (self.__class__.__name__,)

class element(node):
    name = None
    namespace = None
    children = None

    def __init__(self, **kwargs):
        self.children = []
        self.attributes = []
        super(element, self).__init__(**kwargs)

    def __repr__(self):
        return '<element %s:%s>' % (self.namespace or 'null', self.name)

class document(element):
    root = None

class text(node):
    value = None

class attribute(node):
    name = None
    namespace = None
    value = None

    def __repr__(self):
        return '<element %s:%s>' % (self.namespace or 'null', self.name)

class exmlparser(object):
    def __init__(self, document):
        self.document = document
        charset = self.get_charset(document)
        self.indentsize = self.get_indent(document)

    def parse(self):
        self.currline = 0
        doc = document()
        self.state = {'indent': -1, 'current': doc}
        for line in self.document.split('\n'):
            self._parse_line(line, doc)
        return doc

    _reg_charset = re.compile('^#( -[*]- (en)?coding[:] (.*) -[*]-)?$')
    def get_charset(self, document):
        for line in document.split('\n'):
            match = self._reg_charset.search(line)
            if not match:
                break
            groups = match.groups()
            if not groups:
                # still a hash, but no coding info
                continue
            return groups[2]
        return 'UTF-8'

    _reg_indent = re.compile('^( +)')
    def get_indent(self, document):
        for line in document.split('\n'):
            match = self._reg_indent.match(line)
            if match:
                return len(match.group(1))
        return 1 # no indentation in document - must be very simple :)

    def _parse_line(self, line, doc):
        self.currline += 1
        if not line.strip():
            return

        state = self.state
        current = state['current']
        for handler in (self.handle_multiline_comment,
                        self.handle_comment,
                        self.handle_indent_new,
                        ):
            current, line = handler(current, line)
            if not line:
                break
        currname = (isinstance(current, element) and current.name or
                    type(current).__name__)
        state['current'] = current

    def handle_multiline_comment(self, current, line):
        # handle multi-line strings
        mc = self.state.get('multilinechar')
        if mc is not None:
            if line.strip().endswith(mc):
                line = line.rstrip()[:-len(mc)]
                del self.state['multilinechar']
            else:
                if line and line[-1] == '\\':
                    line = line[:-1]
                else:
                    line += '\n'
            current.value += line
            line = ''
        return current, line

    # kinda complex, need to get unquoted hashes
    _reg_comment = re.compile(
        r'^[^\'"#]*'
        r'((\'([^\']|(\\\\)|(\\\'))*\')|("([^"]|(\\\\)|(\\"))*"))*'
        r'[^\'"#]*([#].*)$')
    def handle_comment(self, current, line):
        # deal with empty lines, comments, etc.
        match = self._reg_comment.match(line)
        if match:
            groups = match.groups()
            comment = groups[-1].strip()
            line = line[:-len(comment)].rstrip()
        return current, line

    def handle_indent_new(self, current, line):
        # deal with indentation
        # first find out current previous and current indent
        previndent = self.state['indent']
        currspaces = len(line) - len(line.lstrip())
        assert (not len(line[:currspaces]) % self.indentsize), (
            'unexpected amount of whitespace - use consistent indentation!'
            ' (line %s)' % (self.currline,))
        line = line[currspaces:]
        currindent = currspaces / self.indentsize

        if currindent <= previndent:
            # close block(s)
            for i in range(previndent - currindent):
                current = current.parent
        elif currindent > previndent:
            assert (currindent - previndent == 1), \
                'too much indentation (line %s)' % (self.currline,)
        if line[0] in '\'"':
            node = self.handle_new_text(current, line)
            if current.children and isinstance(current.children[-1], text):
                # concatenate to previous child
                current.children[-1].value += node.value
                current = current.children[-1]
                currindent += 1
            else:
                node.parent = current
                current.children.append(node)
                current = node
                currindent += 1
        elif line[0] == '[':
            node = self.handle_new_element(current, line)
            current.children.append(node)
            current = node
        else:
            # XXX support some data types here
            raise AssertionError(
                'unexpected data (line %s)' % (self.currline,))
        self.state['indent'] = currindent
        return current, ''

    def handle_new_text(self, current, line):
        if not isinstance(current, element):
            raise AssertionError(
                'can not attach a node here (line %s)' % (self.currline,))
        # create new text node
        quote = line[0]
        mc = 3 * quote
        if line.startswith(mc):
            line = line[3:]
            if line.endswith(mc):
                line = line[:-3]
            else:
                self.state['multilinechar'] = mc
            if line and line[-1] == '\\':
                line = line[:-1]
            else:
                line += '\n'
            line = eval(mc + line + mc)
        else:
            line = eval(line)
        node = text(value=line)
        return node

    _reg_elnode = re.compile(
        r'^\[(\w+(\.\w+)?)'
        r'( \w+(\.\w+)?='
        r'((\'([^\'\\]|(\\\\)|(\\\'))*\')|("([^"\\]|(\\\\)|(\\"))*")))*\]$')
    _reg_attr = re.compile(
        r'^ (\w+(\.\w+)?)='
        r'((\'([^\\\']|(\\\\)|(\\\'))*\')|("([^"\\]|(\\\\)|(\\"))*"))')
    def handle_new_element(self, current, line):
        # create new element node
        if not isinstance(current, element):
            raise AssertionError(
                'can not attach a node here (line %s)' % (self.currline,))
        match = self._reg_elnode.match(line.rstrip())
        assert match, \
            'invalid opening tag (line %s)' % (self.currline,)
        ns = None
        name = match.group(1)
        line = line[len(name)+1:-1]
        if '.' in name:
            assert name.count('.') == 1, \
                'invalid name (line %s)' % (self.currline,)
            ns, name = name.split('.')
        node = element(name=name, namespace=ns)
        node.parent = current
        if line:
            # attributes
            while True:
                match = self._reg_attr.search(line)
                if not match:
                    break
                line = line.replace(match.group(0), '')
                groups = match.groups()
                ns = None
                name = groups[0]
                if '.' in name:
                    ns, name = name.split('.', 1)
                node.attributes.append(attribute(
                    ns=ns, name=name, value=eval(groups[2])))
        assert not line, 'garbage in element node (line %s)' % (self.currline,)
        return node

if __name__ == '__main__':
    import sys
    if len(sys.argv) != 2:
        print 'usage: %s <exmldoc>' % (sys.argv[0],)
        sys.exit(1)
    def printnode(node, depth=-1):
        indent = depth * 2 * ' '
        if isinstance(node, element):
            if not isinstance(node, document):
                nodename = (node.namespace and
                            node.namespace + ':' + node.name or
                            node.name)
                start = indent + '<' + nodename
                if node.children:
                    start += '>'
                print start
            for child in node.children:
                printnode(child, depth+1)
            if not node.children:
                print '/>'
            elif not isinstance(node, document):
                print indent + '</%s>' % (nodename,)
        else:
            print indent + node.value
    p = exmlparser(open(sys.argv[1]).read())
    doc = p.parse()
    printnode(doc)

