Temporary draft by tt
intxt="""Barack Obama went to China yesterday. He lives in Grand Hyatt Beijing. This is a superb hotel."""
intxt="""Barack Obama went to China yesterday. He lives in Grand Hyatt Beijing."""
nertable=[
[["Barack","Obama"],"Barack Obama","ner_noun","http://en.wikipedia.org/wiki/Barack_Obama", "http://conceptnet5.media.mit.edu/web/c/en/person"], [["China"],"China","ner_noun","http://en.wikipedia.org/wiki/China", "http://conceptnet5.media.mit.edu/web/c/en/country"], [["Grand","Hyatt","Beijing"],"Grand Hyatt Beijing","ner_noun","https://en.wikipedia.org/wiki/Grand_Hyatt_Beijing", "http://conceptnet5.media.mit.edu/web/c/en/company"]
]
postable=[
[["went"],"go","verb","http://conceptnet5.media.mit.edu/web/c/en/go","past"], [["to"],"to","preposition","http://conceptnet5.media.mit.edu/web/c/en/to",None], [["yesterday"],"yesterday","adverb","http://conceptnet5.media.mit.edu/web/c/en/yesterday",None], [["he"],"he","pronoun","http://conceptnet5.media.mit.edu/data/web/c/en/this",None], [["lives"],"live","verb","http://conceptnet5.media.mit.edu/web/c/en/live",None], [["in"],"in","preposition","http://conceptnet5.media.mit.edu/web/c/en/in",None], [["this"],"this","pronoun","http://conceptnet5.media.mit.edu/web/c/en/this",None], [["is"],"be","verb","http://conceptnet5.media.mit.edu/web/c/en/type/v/identify_as_belonging_to_a_certain_type",None], [["superb"],"superb","adjective","http://conceptnet5.media.mit.edu/web/c/en/superb",None], [["hotel"],"hotel","noun","http://conceptnet5.media.mit.edu/web/c/en/hotel",None]
]
idnum=0
- [barack,action1,china] "to china", "went ... yesterday"
- [action1,activity,moveto]
- [action1,time,past]
- [he,action2, grandhyattbeijing]
- [action2,activity,live_in]
- [action2,time,current]
- TODO:
- sentencetable=[
- [["noun","verb","noun"],0,1,2]
def main(txt):
splitted=split_text(txt) print("splitted:") print(splitted) nerred=ner_text(splitted) print("nerred:") print(nerred) posed=pos_text(nerred) print("posed:") print(posed) pretty_print(posed) rdf=simple_rdf(posed) print("rdf:") print(rdf) pretty_print(rdf)
def ner_text(slst):
rlst=[] for sent in slst: srlst=[] i=0 while i<len(sent): tmp=sent_has_name_at(sent,i) if tmp: srlst.append(tmp[0]) i=tmp[1] else: srlst.append(sent[i]) i+=1 rlst.append(srlst) return rlst
def sent_has_name_at(sent,i):
if not sent: return 0 if i>=len(sent): return 0 for known in nertable: phrase=known[0] j=0 while j<len(phrase): if i+j>=len(sent): break if sent[i+j]!=phrase[j]: break j+=1 if j==len(phrase): res=[known,i+len(phrase)-1] return res
def pos_text(slst):
rlst=[] for sent in slst: srlst=[] i=0 while i<len(sent): if type(sent[i])==type([0]): srlst.append(sent[i]) i+=1 continue tmp=sent_has_pos_at(sent,i) if tmp: srlst.append(tmp[0]) i=tmp[1] else: srlst.append(sent[i]) i+=1 rlst.append(srlst) return rlst
def sent_has_pos_at(sent,i):
if not sent: return 0 if i>=len(sent): return 0 for known in postable: phrase=known[0] j=0 while j<len(phrase): if i+j>=len(sent): break if sent[i+j]!=phrase[j] and sent[i+j].lower()!=phrase[j]: break j+=1 if j==len(phrase): res=[known,i+len(phrase)-1] return res
def split_text(txt):
sentlst=txt.replace(","," ").split(".") wlst=[] for s in sentlst: if not s: continue sp=s.replace("."," ").replace("\n"," ").split(" ") tmp=[] for w in sp: w1=w.strip() if w1: tmp.append(w1) wlst.append(tmp) return wlst
def pretty_print(sentlst):
for sent in sentlst: print("sentence: ") if type(sent)==type([1]): for phrase in sent: print(" "+str(phrase))
def simple_rdf(sentlst):
done=[] prevsent=None for sent in sentlst: ns=simple_rdf_sentence(sent,prevsent) if ns: done+=ns prevsent=sent return done
def simple_rdf_sentence(sent,prevsent):
verbs=[] adverbs=[] nouns=[] adjectives=[] for phrase in sent: if type(phrase)!=type([1]): continue if phrase[2]=="verb": verbs.append(phrase) elif phrase[2]=="adverb": adverbs.append(phrase) elif phrase[2] in ["ner_noun","noun"]: nouns.append(phrase) elif phrase[2] in ["pronoun"]: candidates=get_candidate_nouns(prevsent) if candidates and len(candidates)==1: nouns.append(candidates[0]) elif candidates: nouns.append(candidates) elif phrase[2] in ["adjective"]: adjectives.append(phrase) if verbs and len(nouns)>1: print("nouns: "+str(nouns)) print("verbs: "+str(verbs)) print("adverbs: "+str(adverbs)) print("adjectives: "+str(adjectives)) if adverbs: lid=create_local_id() rdf=[[nouns[0][3],"id:action",lid], [lid,"id:isactivity",verbs[0][3]], [lid,"id:extrainfo",adverbs[0][3]] ] elif adjectives: lid=create_local_id() rdf=[[nouns[0][3],verbs[0][3],lid], [lid,"id:isobject",nouns[1][3]], [lid,"id:extrainfo",adjectives[0][3]] ] else: print(str(["nouns",nouns[0],"verbs",verbs[0],"nouns",nouns[1]])) print("n:"+str(nouns[0])) print("v:"+str(verbs[0])) print("n:"+str(nouns[1])) if type(nouns[0])=[ rdf=[[nouns[0][3],verbs[0][3],nouns[1][3]]] if type(nouns[0])==type([1]) and len(nouns[0])>3 and nouns[0][4]: rdf.append([nouns[0][3],"rdf:type",nouns[0][4]]) if type(nouns[0])==type([1]) and len(nouns[1])>3 and nouns[1][4]: rdf.append([nouns[1][3],"rdf:type",nouns[1][4]]) else: rdf=None return rdf
def get_candidate_nouns(sent):
lst=[] for phrase in sent: if phrase[2] in ["ner_noun","noun"]: lst.append(phrase) return lst
def create_local_id():
global idnum idnum+=1 return "id:local_"+str(idnum)
main(intxt)
NER [http://en.wikipedia.org/wiki/Barack_Obama, http://en.wikipedia.org/wiki/china], id:type http://conceptnet5.media.mit.edu/web/c/en/person
id:local_1 id:isactivity http://conceptnet5.media.mit.edu/web/c/en/go
pick just one interpretation: 1. represent it in otter format 2. we add common sense rules about the domain of news and politics (ontology: like a taxonomy, can be a bit more complex)
3. we pose a question in otter format 4. run otter to find answer
a loop where you try out different interpretations of
he,this,she,it,...
1. represent it in otter format 2. we add common sense rules about the domain of news and politics (ontology: like a taxonomy, can be a bit more complex)
4. run otter to find if there is a contradiction 5. if yes, eliminate this interpretation
rdf("http://en.wikipedia.org/wiki/Barack_Obama", "id:type", "http://conceptnet5.media.mit.edu/web/c/en/person").
-rdf(X,"id:type", "http://conceptnet5.media.mit.edu/web/c/en/person") | rdf(X,"id:type", "http://conceptnet5.media.mit.edu/web/c/en/animal").
-rdf(X,"id:type", "http://conceptnet5.media.mit.edu/web/c/en/animal") | rdf(X,"id:type", "mortal"). -rdf("http://en.wikipedia.org/wiki/Barack_Obama", "id:type", "mortal").
-rdf(X,"id:type", "http://conceptnet5.media.mit.edu/web/c/en/person") | rdf(X,"id:CapableOf", "id:eat").
person -> CapableOf -> eat
person CapableOf eat people can eating
[http://en.wikipedia.org/wiki/Barack_Obama, http://en.wikipedia.org/wiki/china], id:type http://conceptnet5.media.mit.edu/web/c/en/person