<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="et">
	<id>http://courses.cs.taltech.ee/w/index.php?action=history&amp;feed=atom&amp;title=Example_NL_extractor_1</id>
	<title>Example NL extractor 1 - Redigeerimiste ajalugu</title>
	<link rel="self" type="application/atom+xml" href="http://courses.cs.taltech.ee/w/index.php?action=history&amp;feed=atom&amp;title=Example_NL_extractor_1"/>
	<link rel="alternate" type="text/html" href="http://courses.cs.taltech.ee/w/index.php?title=Example_NL_extractor_1&amp;action=history"/>
	<updated>2026-05-21T20:53:57Z</updated>
	<subtitle>Selle lehekülje redigeerimiste ajalugu</subtitle>
	<generator>MediaWiki 1.35.9</generator>
	<entry>
		<id>http://courses.cs.taltech.ee/w/index.php?title=Example_NL_extractor_1&amp;diff=3191&amp;oldid=prev</id>
		<title>Tanel: Uus lehekülg: &#039; &lt;pre&gt;  intxt=&quot;&quot;&quot;Barack Obama went to China yesterday.  He lives in Grand Hyatt Beijing. This is a superb hotel.&quot;&quot;&quot;   nertable=[   [[&quot;Barack&quot;,&quot;Obama&quot;],&quot;Barack Obama&quot;,&quot;ner_noun&quot;,&quot;...&#039;</title>
		<link rel="alternate" type="text/html" href="http://courses.cs.taltech.ee/w/index.php?title=Example_NL_extractor_1&amp;diff=3191&amp;oldid=prev"/>
		<updated>2015-10-20T15:37:19Z</updated>

		<summary type="html">&lt;p&gt;Uus lehekülg: &amp;#039; &amp;lt;pre&amp;gt;  intxt=&amp;quot;&amp;quot;&amp;quot;Barack Obama went to China yesterday.  He lives in Grand Hyatt Beijing. This is a superb hotel.&amp;quot;&amp;quot;&amp;quot;   nertable=[   [[&amp;quot;Barack&amp;quot;,&amp;quot;Obama&amp;quot;],&amp;quot;Barack Obama&amp;quot;,&amp;quot;ner_noun&amp;quot;,&amp;quot;...&amp;#039;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;Uus lehekülg&lt;/b&gt;&lt;/p&gt;&lt;div&gt;&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
intxt=&amp;quot;&amp;quot;&amp;quot;Barack Obama went to China yesterday. &lt;br /&gt;
He lives in Grand Hyatt Beijing. This is a superb hotel.&amp;quot;&amp;quot;&amp;quot; &lt;br /&gt;
&lt;br /&gt;
nertable=[&lt;br /&gt;
  [[&amp;quot;Barack&amp;quot;,&amp;quot;Obama&amp;quot;],&amp;quot;Barack Obama&amp;quot;,&amp;quot;ner_noun&amp;quot;,&amp;quot;http://en.wikipedia.org/wiki/Barack_Obama&amp;quot;,&amp;quot;person&amp;quot;],&lt;br /&gt;
  [[&amp;quot;China&amp;quot;],&amp;quot;China&amp;quot;,&amp;quot;ner_noun&amp;quot;,&amp;quot;http://en.wikipedia.org/wiki/China&amp;quot;,&amp;quot;country&amp;quot;],&lt;br /&gt;
  [[&amp;quot;Grand&amp;quot;,&amp;quot;Hyatt&amp;quot;,&amp;quot;Beijing&amp;quot;],&amp;quot;Grand Hyatt Beijing&amp;quot;,&amp;quot;ner_noun&amp;quot;,&amp;quot;https://en.wikipedia.org/wiki/Grand_Hyatt_Beijing&amp;quot;,&amp;quot;company&amp;quot;]&lt;br /&gt;
]  &lt;br /&gt;
&lt;br /&gt;
postable=[&lt;br /&gt;
  [[&amp;quot;went&amp;quot;],&amp;quot;go&amp;quot;,&amp;quot;verb&amp;quot;,&amp;quot;http://conceptnet5.media.mit.edu/data/5.3/c/en/go&amp;quot;,&amp;quot;past&amp;quot;],&lt;br /&gt;
  [[&amp;quot;to&amp;quot;],&amp;quot;to&amp;quot;,&amp;quot;preposition&amp;quot;,&amp;quot;http://conceptnet5.media.mit.edu/data/5.3/c/en/to&amp;quot;,None],&lt;br /&gt;
  [[&amp;quot;yesterday&amp;quot;],&amp;quot;yesterday&amp;quot;,&amp;quot;adverb&amp;quot;,&amp;quot;http://conceptnet5.media.mit.edu/data/5.3/c/en/yesterday&amp;quot;,None],&lt;br /&gt;
  [[&amp;quot;this&amp;quot;],&amp;quot;this&amp;quot;,&amp;quot;adjective&amp;quot;,&amp;quot;http://conceptnet5.media.mit.edu/data/5.3/c/en/this&amp;quot;,None]&lt;br /&gt;
]  &lt;br /&gt;
&lt;br /&gt;
# [barack,action1,china]   &amp;quot;to china&amp;quot;, &amp;quot;went ... yesterday&amp;quot;&lt;br /&gt;
# [action1,activity,moveto]&lt;br /&gt;
# [action1,time,past]&lt;br /&gt;
&lt;br /&gt;
# [he,action2, grandhyattbeijing]&lt;br /&gt;
# [action2,activity,live_in]&lt;br /&gt;
# [action2,time,current]&lt;br /&gt;
&lt;br /&gt;
# TODO:&lt;br /&gt;
#sentencetable=[&lt;br /&gt;
#  [[&amp;quot;noun&amp;quot;,&amp;quot;verb&amp;quot;,&amp;quot;noun&amp;quot;],[[0,1,2]]]&lt;br /&gt;
  &lt;br /&gt;
def main(txt):&lt;br /&gt;
  splitted=split_text(txt)&lt;br /&gt;
  print(&amp;quot;splitted:&amp;quot;)&lt;br /&gt;
  print(splitted)&lt;br /&gt;
  nerred=ner_text(splitted)&lt;br /&gt;
  print(&amp;quot;nerred:&amp;quot;)&lt;br /&gt;
  print(nerred)&lt;br /&gt;
  posed=pos_text(nerred)&lt;br /&gt;
  print(&amp;quot;posed:&amp;quot;)&lt;br /&gt;
  print(posed)&lt;br /&gt;
  pretty_print(posed)&lt;br /&gt;
  &lt;br /&gt;
def ner_text(slst):&lt;br /&gt;
  rlst=[]&lt;br /&gt;
  for sent in slst:&lt;br /&gt;
    srlst=[]&lt;br /&gt;
    i=0&lt;br /&gt;
    while i&amp;lt;len(sent):&lt;br /&gt;
      tmp=sent_has_name_at(sent,i)&lt;br /&gt;
      if tmp:&lt;br /&gt;
        srlst.append(tmp[0])&lt;br /&gt;
        i=tmp[1]&lt;br /&gt;
      else:&lt;br /&gt;
        srlst.append(sent[i])&lt;br /&gt;
      i+=1  &lt;br /&gt;
    rlst.append(srlst)&lt;br /&gt;
  return rlst&lt;br /&gt;
&lt;br /&gt;
def sent_has_name_at(sent,i):&lt;br /&gt;
  if not sent: return 0&lt;br /&gt;
  if i&amp;gt;=len(sent): return 0&lt;br /&gt;
  for known in nertable:&lt;br /&gt;
    phrase=known[0]&lt;br /&gt;
    j=0&lt;br /&gt;
    while j&amp;lt;len(phrase):&lt;br /&gt;
      if i+j&amp;gt;=len(sent): break&lt;br /&gt;
      if sent[i+j]!=phrase[j]:&lt;br /&gt;
        break&lt;br /&gt;
      j+=1&lt;br /&gt;
    if j==len(phrase):&lt;br /&gt;
      res=[known,i+len(phrase)-1]&lt;br /&gt;
      return res&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
def pos_text(slst):&lt;br /&gt;
  rlst=[]&lt;br /&gt;
  for sent in slst:&lt;br /&gt;
    srlst=[]&lt;br /&gt;
    i=0&lt;br /&gt;
    while i&amp;lt;len(sent):&lt;br /&gt;
      if type(sent[i])==type([0]): &lt;br /&gt;
        srlst.append(sent[i])&lt;br /&gt;
        i+=1&lt;br /&gt;
        continue&lt;br /&gt;
      tmp=sent_has_pos_at(sent,i)&lt;br /&gt;
      if tmp:&lt;br /&gt;
        srlst.append(tmp[0])&lt;br /&gt;
        i=tmp[1]&lt;br /&gt;
      else:&lt;br /&gt;
        srlst.append(sent[i])&lt;br /&gt;
      i+=1  &lt;br /&gt;
    rlst.append(srlst)&lt;br /&gt;
  return rlst&lt;br /&gt;
&lt;br /&gt;
def sent_has_pos_at(sent,i):&lt;br /&gt;
  if not sent: return 0&lt;br /&gt;
  if i&amp;gt;=len(sent): return 0&lt;br /&gt;
  for known in postable:&lt;br /&gt;
    phrase=known[0]&lt;br /&gt;
    j=0&lt;br /&gt;
    while j&amp;lt;len(phrase):&lt;br /&gt;
      if i+j&amp;gt;=len(sent): break&lt;br /&gt;
      if sent[i+j]!=phrase[j]:&lt;br /&gt;
        break&lt;br /&gt;
      j+=1&lt;br /&gt;
    if j==len(phrase):&lt;br /&gt;
      res=[known,i+len(phrase)-1]&lt;br /&gt;
      return res&lt;br /&gt;
&lt;br /&gt;
def split_text(txt):&lt;br /&gt;
  sentlst=txt.replace(&amp;quot;,&amp;quot;,&amp;quot; &amp;quot;).split(&amp;quot;.&amp;quot;)&lt;br /&gt;
  wlst=[]&lt;br /&gt;
  for s in sentlst:&lt;br /&gt;
    if not s: continue&lt;br /&gt;
    sp=s.replace(&amp;quot;.&amp;quot;,&amp;quot; &amp;quot;).replace(&amp;quot;\n&amp;quot;,&amp;quot; &amp;quot;).split(&amp;quot; &amp;quot;)&lt;br /&gt;
    tmp=[]&lt;br /&gt;
    for w in sp:&lt;br /&gt;
      w1=w.strip()&lt;br /&gt;
      if w1: tmp.append(w1)      &lt;br /&gt;
    wlst.append(tmp)&lt;br /&gt;
  return wlst&lt;br /&gt;
&lt;br /&gt;
def pretty_print(sentlst):&lt;br /&gt;
  for sent in sentlst:&lt;br /&gt;
    print(&amp;quot;sentence: &amp;quot;)&lt;br /&gt;
    for phrase in sent:&lt;br /&gt;
      print(&amp;quot;  &amp;quot;+str(phrase)) &lt;br /&gt;
&lt;br /&gt;
main(intxt)&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;/div&gt;</summary>
		<author><name>Tanel</name></author>
	</entry>
</feed>