Lab 2 example code converted to give
Mine navigeerimisribale
Mine otsikasti
intxt="""Barack Obama went to China yesterday.
He lives in Grand Hyatt Beijing. This is a superb hotel."""
nertable=[
[["Barack","Obama"],"Barack Obama","ner_noun","http://en.wikipedia.org/wiki/Barack_Obama","person"],
[["China"],"China","ner_noun","http://en.wikipedia.org/wiki/China","country"],
[["Grand","Hyatt","Beijing"],"Grand Hyatt Beijing","ner_noun","https://en.wikipedia.org/wiki/Grand_Hyatt_Beijing","company"]
]
postable=[
[["went"],"go","verb","http://conceptnet5.media.mit.edu/web/c/en/go","past"],
[["to"],"to","preposition","http://conceptnet5.media.mit.edu/web/c/en/to",None],
[["yesterday"],"yesterday","adverb","http://conceptnet5.media.mit.edu/web/c/en/yesterday",None],
[["he"],"he","pronoun","http://conceptnet5.media.mit.edu/data/web/c/en/this",None],
[["lives"],"live","verb","http://conceptnet5.media.mit.edu/web/c/en/live",None],
[["in"],"in","preposition","http://conceptnet5.media.mit.edu/web/c/en/in",None],
[["this"],"this","pronoun","http://conceptnet5.media.mit.edu/web/c/en/this",None],
[["is"],"be","verb","http://conceptnet5.media.mit.edu/web/c/en/type/v/identify_as_belonging_to_a_certain_type",None],
[["superb"],"superb","adjective","http://conceptnet5.media.mit.edu/web/c/en/superb",None],
[["hotel"],"hotel","noun","http://conceptnet5.media.mit.edu/web/c/en/hotel",None]
]
idnum=0
# [barack,action1,china] "to china", "went ... yesterday"
# [action1,activity,moveto]
# [action1,time,past]
# [he,action2, grandhyattbeijing]
# [action2,activity,live_in]
# [action2,time,current]
# TODO:
#sentencetable=[
# [["noun","verb","noun"],[[0,1,2]]]
def main(txt):
splitted=split_text(txt)
#print("splitted:")
#print(splitted)
nerred=ner_text(splitted)
#print("nerred:")
#print(nerred)
posed=pos_text(nerred)
#print("posed:")
#print(posed)
#pretty_print(posed)
rdf=simple_rdf(posed)
#print("rdf:")
#print(rdf)
#pretty_print(rdf)
print_otter_prefix()
otter_print(rdf)
print_otter_suffix()
def ner_text(slst):
rlst=[]
for sent in slst:
srlst=[]
i=0
while i<len(sent):
tmp=sent_has_name_at(sent,i)
if tmp:
srlst.append(tmp[0])
i=tmp[1]
else:
srlst.append(sent[i])
i+=1
rlst.append(srlst)
return rlst
def sent_has_name_at(sent,i):
if not sent: return 0
if i>=len(sent): return 0
for known in nertable:
phrase=known[0]
j=0
while j<len(phrase):
if i+j>=len(sent): break
if sent[i+j]!=phrase[j]:
break
j+=1
if j==len(phrase):
res=[known,i+len(phrase)-1]
return res
def pos_text(slst):
rlst=[]
for sent in slst:
srlst=[]
i=0
while i<len(sent):
if type(sent[i])==type([0]):
srlst.append(sent[i])
i+=1
continue
tmp=sent_has_pos_at(sent,i)
if tmp:
srlst.append(tmp[0])
i=tmp[1]
else:
srlst.append(sent[i])
i+=1
rlst.append(srlst)
return rlst
def sent_has_pos_at(sent,i):
if not sent: return 0
if i>=len(sent): return 0
for known in postable:
phrase=known[0]
j=0
while j<len(phrase):
if i+j>=len(sent): break
if sent[i+j]!=phrase[j] and sent[i+j].lower()!=phrase[j]:
break
j+=1
if j==len(phrase):
res=[known,i+len(phrase)-1]
return res
def split_text(txt):
sentlst=txt.replace(","," ").split(".")
wlst=[]
for s in sentlst:
if not s: continue
sp=s.replace("."," ").replace("\n"," ").split(" ")
tmp=[]
for w in sp:
w1=w.strip()
if w1: tmp.append(w1)
wlst.append(tmp)
return wlst
def pretty_print(sentlst):
for sent in sentlst:
print("sentence: ")
if type(sent)==type([1]):
for phrase in sent:
print(" "+str(phrase))
def print_otter_prefix():
a="""
% clear automatic strategy selection
clear(auto).
% use capital letters (A,X,T,...) as vars
set(prolog_style_variables).
% select the search strategy
set(hyper_res). % an alternative is to use set(binary_res).
set(factor).
% select sensible amount of output
clear(print_given). % uncomment to see input and process
set(print_kept). % this is important: prints all generated and kept clauses
assign(stats_level, 0).
% just make it stop after N secs
assign(max_seconds, 10).
list(sos).
"""
print(a)
def print_otter_suffix():
a="""
-rdf(X,"id:type",
"http://conceptnet5.media.mit.edu/web/c/en/person") |
rdf(X,"id:type",
"http://conceptnet5.media.mit.edu/web/c/en/animal").
-rdf(X,"id:type",
"http://conceptnet5.media.mit.edu/web/c/en/animal") |
rdf(X,"id:type",
"mortal").
-rdf(X,"id:type",
"http://conceptnet5.media.mit.edu/web/c/en/person") |
rdf(X,"id:CapableOf",
"id:eat").
-rdf(X , "http://conceptnet5.media.mit.edu/web/c/en/live", Y) |
rdf(X,"id:type", "http://conceptnet5.media.mit.edu/web/c/en/animal").
-rdf(X,"http://conceptnet5.media.mit.edu/web/c/en/type/v/identify_as_belonging_to_a_certain_type",Y) |
-rdf(Y,"id:isobject", Z) |
rdf(X,"id:type",Z).
-rdf(X,"http://conceptnet5.media.mit.edu/web/c/en/live",Y) |
-rdf(Y,"id:type","http://conceptnet5.media.mit.edu/web/c/en/hotel") |
-rdf(Y,"http://conceptnet5.media.mit.edu/web/c/en/type/v/identify_as_belonging_to_a_certain_type",Z) |
-rdf(Z,"id:extrainfo","http://conceptnet5.media.mit.edu/web/c/en/superb") |
rdf(X,"id:type","http://conceptnet5.media.mit.edu/web/c/en/rich").
end_of_list.
"""
print(a)
def otter_print(sentlst):
for sent in sentlst:
print("rdf(")
if type(sent)==type([1]):
i=0
while i<len(sent):
phrase=sent[i]
if i+1<len(sent): comma=","
else: comma=""
if type(phrase)==type([1]):
print(" "+"\""+str(phrase[0])+"\""+comma)
else:
print(" "+"\""+str(phrase)+"\""+comma)
i+=1
print(").")
def simple_rdf(sentlst):
done=[]
prevsent=None
for sent in sentlst:
ns=simple_rdf_sentence(sent,prevsent)
if ns:
done+=ns
prevsent=sent
return done
def simple_rdf_sentence(sent,prevsent):
verbs=[]
adverbs=[]
nouns=[]
adjectives=[]
for phrase in sent:
if type(phrase)!=type([1]): continue
if phrase[2]=="verb":
verbs.append(phrase[3])
elif phrase[2]=="adverb":
adverbs.append(phrase[3])
elif phrase[2] in ["ner_noun","noun"]:
nouns.append(phrase[3])
elif phrase[2] in ["pronoun"]:
candidates=get_candidate_nouns(prevsent)
if candidates and len(candidates)==1:
nouns.append(candidates[0])
elif candidates:
nouns.append(candidates)
elif phrase[2] in ["adjective"]:
adjectives.append(phrase[3])
if verbs and len(nouns)>1:
if adverbs:
lid=create_local_id()
rdf=[[nouns[0],"id:action",lid],
[lid,"id:isactivity",verbs[0]],
[lid,"id:extrainfo",adverbs[0]] ]
elif adjectives:
lid=create_local_id()
rdf=[[nouns[0],verbs[0],lid],
[lid,"id:isobject",nouns[1]],
[lid,"id:extrainfo",adjectives[0]] ]
else:
rdf=[[nouns[0],verbs[0],nouns[1]]]
else:
rdf=None
return rdf
def get_candidate_nouns(sent):
lst=[]
for phrase in sent:
if phrase[2] in ["ner_noun","noun"]:
lst.append(phrase[3])
return lst
def create_local_id():
global idnum
idnum+=1
return "id:local_"+str(idnum)
main(intxt)