Lab 2 example code converted to give

Allikas: Kursused
Redaktsioon seisuga 17. november 2015, kell 16:34 kasutajalt Tanel (arutelu | kaastöö) (Uus lehekülg: '<pre> intxt="""Barack Obama went to China yesterday. He lives in Grand Hyatt Beijing. This is a superb hotel.""" nertable=[ [["Barack","Obama"],"Barack Obama","ner_noun","h...')
(erin) ←Vanem redaktsioon | Viimane redaktsiooni (erin) | Uuem redaktsioon→ (erin)
Mine navigeerimisribale Mine otsikasti

intxt="""Barack Obama went to China yesterday. 
He lives in Grand Hyatt Beijing. This is a superb hotel.""" 

nertable=[
  [["Barack","Obama"],"Barack Obama","ner_noun","http://en.wikipedia.org/wiki/Barack_Obama","person"],
  [["China"],"China","ner_noun","http://en.wikipedia.org/wiki/China","country"],
  [["Grand","Hyatt","Beijing"],"Grand Hyatt Beijing","ner_noun","https://en.wikipedia.org/wiki/Grand_Hyatt_Beijing","company"]
]  

postable=[
  [["went"],"go","verb","http://conceptnet5.media.mit.edu/web/c/en/go","past"],
  [["to"],"to","preposition","http://conceptnet5.media.mit.edu/web/c/en/to",None],
  [["yesterday"],"yesterday","adverb","http://conceptnet5.media.mit.edu/web/c/en/yesterday",None],
  [["he"],"he","pronoun","http://conceptnet5.media.mit.edu/data/web/c/en/this",None],
  [["lives"],"live","verb","http://conceptnet5.media.mit.edu/web/c/en/live",None],
  [["in"],"in","preposition","http://conceptnet5.media.mit.edu/web/c/en/in",None],
  [["this"],"this","pronoun","http://conceptnet5.media.mit.edu/web/c/en/this",None],
  [["is"],"be","verb","http://conceptnet5.media.mit.edu/web/c/en/type/v/identify_as_belonging_to_a_certain_type",None],
  [["superb"],"superb","adjective","http://conceptnet5.media.mit.edu/web/c/en/superb",None],
  [["hotel"],"hotel","noun","http://conceptnet5.media.mit.edu/web/c/en/hotel",None]
]  

idnum=0

# [barack,action1,china]   "to china", "went ... yesterday"
# [action1,activity,moveto]
# [action1,time,past]

# [he,action2, grandhyattbeijing]
# [action2,activity,live_in]
# [action2,time,current]

# TODO:
#sentencetable=[
#  [["noun","verb","noun"],[[0,1,2]]]
  
def main(txt):
  splitted=split_text(txt)
  #print("splitted:")
  #print(splitted)
  nerred=ner_text(splitted)
  #print("nerred:")
  #print(nerred)
  posed=pos_text(nerred)
  #print("posed:")
  #print(posed)
  #pretty_print(posed)
  rdf=simple_rdf(posed)
  #print("rdf:")
  #print(rdf)
  #pretty_print(rdf)
  
  print_otter_prefix()
  otter_print(rdf)
  print_otter_suffix()
  
  
  
def ner_text(slst):
  rlst=[]
  for sent in slst:
    srlst=[]
    i=0
    while i<len(sent):
      tmp=sent_has_name_at(sent,i)
      if tmp:
        srlst.append(tmp[0])
        i=tmp[1]
      else:
        srlst.append(sent[i])
      i+=1  
    rlst.append(srlst)
  return rlst

def sent_has_name_at(sent,i):
  if not sent: return 0
  if i>=len(sent): return 0
  for known in nertable:
    phrase=known[0]
    j=0
    while j<len(phrase):
      if i+j>=len(sent): break
      if sent[i+j]!=phrase[j]:
        break
      j+=1
    if j==len(phrase):
      res=[known,i+len(phrase)-1]
      return res



def pos_text(slst):
  rlst=[]
  for sent in slst:
    srlst=[]
    i=0
    while i<len(sent):
      if type(sent[i])==type([0]): 
        srlst.append(sent[i])
        i+=1
        continue
      tmp=sent_has_pos_at(sent,i)
      if tmp:
        srlst.append(tmp[0])
        i=tmp[1]
      else:
        srlst.append(sent[i])
      i+=1  
    rlst.append(srlst)
  return rlst

def sent_has_pos_at(sent,i):
  if not sent: return 0
  if i>=len(sent): return 0
  for known in postable:
    phrase=known[0]
    j=0
    while j<len(phrase):
      if i+j>=len(sent): break
      if sent[i+j]!=phrase[j] and sent[i+j].lower()!=phrase[j]:
        break
      j+=1
    if j==len(phrase):
      res=[known,i+len(phrase)-1]
      return res

def split_text(txt):
  sentlst=txt.replace(","," ").split(".")
  wlst=[]
  for s in sentlst:
    if not s: continue
    sp=s.replace("."," ").replace("\n"," ").split(" ")
    tmp=[]
    for w in sp:
      w1=w.strip()
      if w1: tmp.append(w1)      
    wlst.append(tmp)
  return wlst

def pretty_print(sentlst):
  for sent in sentlst:
    print("sentence: ")
    if type(sent)==type([1]):
      for phrase in sent:
        print("  "+str(phrase)) 
  
def print_otter_prefix():
  a="""
  % clear automatic strategy selection
clear(auto).

% use capital letters (A,X,T,...) as vars
set(prolog_style_variables).

% select the search strategy
set(hyper_res). % an alternative is to use set(binary_res).
set(factor).

% select sensible amount of output
clear(print_given). % uncomment to see input and process
set(print_kept).  % this is important: prints all generated and kept clauses
assign(stats_level, 0).

% just make it stop after N secs
assign(max_seconds, 10).

list(sos).

  """
  print(a)
  
def print_otter_suffix():  
  a="""
  
-rdf(X,"id:type",
    "http://conceptnet5.media.mit.edu/web/c/en/person") |
 rdf(X,"id:type",
    "http://conceptnet5.media.mit.edu/web/c/en/animal").
  
 -rdf(X,"id:type",
    "http://conceptnet5.media.mit.edu/web/c/en/animal") |
 rdf(X,"id:type",
    "mortal").	  
    
 -rdf(X,"id:type",
    "http://conceptnet5.media.mit.edu/web/c/en/person") |
  rdf(X,"id:CapableOf",
    "id:eat").
  
  -rdf(X , "http://conceptnet5.media.mit.edu/web/c/en/live", Y) | 
  rdf(X,"id:type", "http://conceptnet5.media.mit.edu/web/c/en/animal").    
   
  -rdf(X,"http://conceptnet5.media.mit.edu/web/c/en/type/v/identify_as_belonging_to_a_certain_type",Y) | 
  -rdf(Y,"id:isobject", Z) |
  rdf(X,"id:type",Z).
  
  -rdf(X,"http://conceptnet5.media.mit.edu/web/c/en/live",Y) | 
  -rdf(Y,"id:type","http://conceptnet5.media.mit.edu/web/c/en/hotel") |
  -rdf(Y,"http://conceptnet5.media.mit.edu/web/c/en/type/v/identify_as_belonging_to_a_certain_type",Z) |
  -rdf(Z,"id:extrainfo","http://conceptnet5.media.mit.edu/web/c/en/superb") | 
  rdf(X,"id:type","http://conceptnet5.media.mit.edu/web/c/en/rich").

end_of_list.
  """
  print(a)

def otter_print(sentlst):
  for sent in sentlst:
    print("rdf(")
    if type(sent)==type([1]):
      i=0
      while i<len(sent):
        phrase=sent[i]
        if i+1<len(sent): comma=","
        else: comma=""
        if type(phrase)==type([1]):
          print("  "+"\""+str(phrase[0])+"\""+comma)
        else:
          print("  "+"\""+str(phrase)+"\""+comma)	
         
        i+=1  
    print(").")

def simple_rdf(sentlst):
  done=[]
  prevsent=None
  for sent in sentlst:
    ns=simple_rdf_sentence(sent,prevsent)
    if ns: 
      done+=ns
    prevsent=sent
  return done    

def simple_rdf_sentence(sent,prevsent):
  verbs=[]
  adverbs=[]  
  nouns=[]
  adjectives=[]  
  for phrase in sent:
    if type(phrase)!=type([1]): continue
    if phrase[2]=="verb":
      verbs.append(phrase[3])
    elif phrase[2]=="adverb":
      adverbs.append(phrase[3])  
    elif phrase[2] in ["ner_noun","noun"]:
      nouns.append(phrase[3])      
    elif phrase[2] in ["pronoun"]:
      candidates=get_candidate_nouns(prevsent)
      if candidates and len(candidates)==1:
        nouns.append(candidates[0])   
      elif candidates:
        nouns.append(candidates)        
    elif phrase[2] in ["adjective"]:
      adjectives.append(phrase[3])    
  if verbs and len(nouns)>1:
    if adverbs:
      lid=create_local_id()
      rdf=[[nouns[0],"id:action",lid],
           [lid,"id:isactivity",verbs[0]],  
           [lid,"id:extrainfo",adverbs[0]] ]
    elif adjectives:
      lid=create_local_id()
      rdf=[[nouns[0],verbs[0],lid],
           [lid,"id:isobject",nouns[1]],  
           [lid,"id:extrainfo",adjectives[0]] ]       
    else:
      rdf=[[nouns[0],verbs[0],nouns[1]]]      
  else:
    rdf=None    
  return rdf

def get_candidate_nouns(sent):
  lst=[]
  for phrase in sent:
    if phrase[2] in ["ner_noun","noun"]:
      lst.append(phrase[3])
  return lst

def create_local_id():
  global idnum
  idnum+=1
  return "id:local_"+str(idnum)
  
main(intxt)