Lab 2 example code converted to give
Mine navigeerimisribale
Mine otsikasti
intxt="""Barack Obama went to China yesterday. He lives in Grand Hyatt Beijing. This is a superb hotel.""" nertable=[ [["Barack","Obama"],"Barack Obama","ner_noun","http://en.wikipedia.org/wiki/Barack_Obama","person"], [["China"],"China","ner_noun","http://en.wikipedia.org/wiki/China","country"], [["Grand","Hyatt","Beijing"],"Grand Hyatt Beijing","ner_noun","https://en.wikipedia.org/wiki/Grand_Hyatt_Beijing","company"] ] postable=[ [["went"],"go","verb","http://conceptnet5.media.mit.edu/web/c/en/go","past"], [["to"],"to","preposition","http://conceptnet5.media.mit.edu/web/c/en/to",None], [["yesterday"],"yesterday","adverb","http://conceptnet5.media.mit.edu/web/c/en/yesterday",None], [["he"],"he","pronoun","http://conceptnet5.media.mit.edu/data/web/c/en/this",None], [["lives"],"live","verb","http://conceptnet5.media.mit.edu/web/c/en/live",None], [["in"],"in","preposition","http://conceptnet5.media.mit.edu/web/c/en/in",None], [["this"],"this","pronoun","http://conceptnet5.media.mit.edu/web/c/en/this",None], [["is"],"be","verb","http://conceptnet5.media.mit.edu/web/c/en/type/v/identify_as_belonging_to_a_certain_type",None], [["superb"],"superb","adjective","http://conceptnet5.media.mit.edu/web/c/en/superb",None], [["hotel"],"hotel","noun","http://conceptnet5.media.mit.edu/web/c/en/hotel",None] ] idnum=0 # [barack,action1,china] "to china", "went ... yesterday" # [action1,activity,moveto] # [action1,time,past] # [he,action2, grandhyattbeijing] # [action2,activity,live_in] # [action2,time,current] # TODO: #sentencetable=[ # [["noun","verb","noun"],[[0,1,2]]] def main(txt): splitted=split_text(txt) #print("splitted:") #print(splitted) nerred=ner_text(splitted) #print("nerred:") #print(nerred) posed=pos_text(nerred) #print("posed:") #print(posed) #pretty_print(posed) rdf=simple_rdf(posed) #print("rdf:") #print(rdf) #pretty_print(rdf) print_otter_prefix() otter_print(rdf) print_otter_suffix() def ner_text(slst): rlst=[] for sent in slst: srlst=[] i=0 while i<len(sent): tmp=sent_has_name_at(sent,i) if tmp: srlst.append(tmp[0]) i=tmp[1] else: srlst.append(sent[i]) i+=1 rlst.append(srlst) return rlst def sent_has_name_at(sent,i): if not sent: return 0 if i>=len(sent): return 0 for known in nertable: phrase=known[0] j=0 while j<len(phrase): if i+j>=len(sent): break if sent[i+j]!=phrase[j]: break j+=1 if j==len(phrase): res=[known,i+len(phrase)-1] return res def pos_text(slst): rlst=[] for sent in slst: srlst=[] i=0 while i<len(sent): if type(sent[i])==type([0]): srlst.append(sent[i]) i+=1 continue tmp=sent_has_pos_at(sent,i) if tmp: srlst.append(tmp[0]) i=tmp[1] else: srlst.append(sent[i]) i+=1 rlst.append(srlst) return rlst def sent_has_pos_at(sent,i): if not sent: return 0 if i>=len(sent): return 0 for known in postable: phrase=known[0] j=0 while j<len(phrase): if i+j>=len(sent): break if sent[i+j]!=phrase[j] and sent[i+j].lower()!=phrase[j]: break j+=1 if j==len(phrase): res=[known,i+len(phrase)-1] return res def split_text(txt): sentlst=txt.replace(","," ").split(".") wlst=[] for s in sentlst: if not s: continue sp=s.replace("."," ").replace("\n"," ").split(" ") tmp=[] for w in sp: w1=w.strip() if w1: tmp.append(w1) wlst.append(tmp) return wlst def pretty_print(sentlst): for sent in sentlst: print("sentence: ") if type(sent)==type([1]): for phrase in sent: print(" "+str(phrase)) def print_otter_prefix(): a=""" % clear automatic strategy selection clear(auto). % use capital letters (A,X,T,...) as vars set(prolog_style_variables). % select the search strategy set(hyper_res). % an alternative is to use set(binary_res). set(factor). % select sensible amount of output clear(print_given). % uncomment to see input and process set(print_kept). % this is important: prints all generated and kept clauses assign(stats_level, 0). % just make it stop after N secs assign(max_seconds, 10). list(sos). """ print(a) def print_otter_suffix(): a=""" -rdf(X,"id:type", "http://conceptnet5.media.mit.edu/web/c/en/person") | rdf(X,"id:type", "http://conceptnet5.media.mit.edu/web/c/en/animal"). -rdf(X,"id:type", "http://conceptnet5.media.mit.edu/web/c/en/animal") | rdf(X,"id:type", "mortal"). -rdf(X,"id:type", "http://conceptnet5.media.mit.edu/web/c/en/person") | rdf(X,"id:CapableOf", "id:eat"). -rdf(X , "http://conceptnet5.media.mit.edu/web/c/en/live", Y) | rdf(X,"id:type", "http://conceptnet5.media.mit.edu/web/c/en/animal"). -rdf(X,"http://conceptnet5.media.mit.edu/web/c/en/type/v/identify_as_belonging_to_a_certain_type",Y) | -rdf(Y,"id:isobject", Z) | rdf(X,"id:type",Z). -rdf(X,"http://conceptnet5.media.mit.edu/web/c/en/live",Y) | -rdf(Y,"id:type","http://conceptnet5.media.mit.edu/web/c/en/hotel") | -rdf(Y,"http://conceptnet5.media.mit.edu/web/c/en/type/v/identify_as_belonging_to_a_certain_type",Z) | -rdf(Z,"id:extrainfo","http://conceptnet5.media.mit.edu/web/c/en/superb") | rdf(X,"id:type","http://conceptnet5.media.mit.edu/web/c/en/rich"). end_of_list. """ print(a) def otter_print(sentlst): for sent in sentlst: print("rdf(") if type(sent)==type([1]): i=0 while i<len(sent): phrase=sent[i] if i+1<len(sent): comma="," else: comma="" if type(phrase)==type([1]): print(" "+"\""+str(phrase[0])+"\""+comma) else: print(" "+"\""+str(phrase)+"\""+comma) i+=1 print(").") def simple_rdf(sentlst): done=[] prevsent=None for sent in sentlst: ns=simple_rdf_sentence(sent,prevsent) if ns: done+=ns prevsent=sent return done def simple_rdf_sentence(sent,prevsent): verbs=[] adverbs=[] nouns=[] adjectives=[] for phrase in sent: if type(phrase)!=type([1]): continue if phrase[2]=="verb": verbs.append(phrase[3]) elif phrase[2]=="adverb": adverbs.append(phrase[3]) elif phrase[2] in ["ner_noun","noun"]: nouns.append(phrase[3]) elif phrase[2] in ["pronoun"]: candidates=get_candidate_nouns(prevsent) if candidates and len(candidates)==1: nouns.append(candidates[0]) elif candidates: nouns.append(candidates) elif phrase[2] in ["adjective"]: adjectives.append(phrase[3]) if verbs and len(nouns)>1: if adverbs: lid=create_local_id() rdf=[[nouns[0],"id:action",lid], [lid,"id:isactivity",verbs[0]], [lid,"id:extrainfo",adverbs[0]] ] elif adjectives: lid=create_local_id() rdf=[[nouns[0],verbs[0],lid], [lid,"id:isobject",nouns[1]], [lid,"id:extrainfo",adjectives[0]] ] else: rdf=[[nouns[0],verbs[0],nouns[1]]] else: rdf=None return rdf def get_candidate_nouns(sent): lst=[] for phrase in sent: if phrase[2] in ["ner_noun","noun"]: lst.append(phrase[3]) return lst def create_local_id(): global idnum idnum+=1 return "id:local_"+str(idnum) main(intxt)