import codecs 
import spacy 
import sys


nlp = spacy.load('en')

def read_lines(fname):
    for line in codecs.open(fname, encoding="utf8"):
        sent_id, sent = line.strip().split("\t")
        sent = sent.replace("-LRB-","(")
        sent = sent.replace("-RRB-",")")
        yield sent_id, sent

for sent_id, sent_str in read_lines(sys.argv[1]):
    sent = nlp(sent_str)
    print "#id:",sent_id
    print "#text:",sent.text
    for word in sent:
        head_id = str(word.head.i+1)        # we want ids to be 1 based
        if word == word.head:               # and the ROOT to be 0.
            assert(word.dep_=="ROOT"),word.dep_
            head_id = "0" # root
        print "\t".join([str(word.i+1), word.text, word.lemma_, word.tag_, word.pos_, head_id, word.dep_, word.ent_iob_, word.ent_type_])
    print
    # print "#", Noun Chunks:
    # for np in sent.noun_chunks:
    #    print(np.text, np.root.text, np.root.dep_, np.root.head.text)
    # print "#", named entities:
    # for ne in sent.ents:
    #    print(ne.text, ne.root.ent_type_, ne.root.text, ne.root.dep_, ne.root.head.text)