[Hejes-devel] [891] code upgrade: alg is finally working, it seems
hejes-devel at nytud.hu
hejes-devel at nytud.hu
Sun Aug 4 20:51:24 CEST 2013
Revision: 891
Author: hussami
Date: 2013-08-04 20:51:24 +0200 (Sun, 04 Aug 2013)
Log Message:
-----------
code upgrade: alg is finally working, it seems
Modified Paths:
--------------
trunk/misc/osiris_xml/hitmarker.py
Modified: trunk/misc/osiris_xml/hitmarker.py
===================================================================
--- trunk/misc/osiris_xml/hitmarker.py 2013-08-02 13:47:55 UTC (rev 890)
+++ trunk/misc/osiris_xml/hitmarker.py 2013-08-04 18:51:24 UTC (rev 891)
@@ -16,14 +16,15 @@
self.data = {}
self.banned = {}
- def addHit(self, key, term_id, term_index, islast, misc):
+ def addHit(self, key, term_id, term_version, term_index, islast, misc):
if key not in self.data:
self.data[key] = {}
if term_id not in self.data[key]:
- self.data[key][term_id] = [(term_index, islast, misc)]
+ self.data[key][term_id] = [(term_version, term_index, islast, misc)]
else:
- self.data[key][term_id].append((term_index, islast, misc))
+ self.data[key][term_id].append((term_version, term_index,
+ islast, misc))
def dropCompleteHit(self, key):
if key in self.data:
@@ -69,19 +70,33 @@
def fill(self, lst):
"""
- lst must be in the form: (key, word_id, word_index, islast, misc)
+ lst must be in the form: (key, word_id, word_version, word_index,
+ islast, misc)
"""
- for lk, lwd, lwi, lis, limisc in lst:
- self.hits.addHit(lk, lwd, lwi, lis, limisc)
+ for lk, lwd, lve, lwi, lis, limisc in lst:
+ self.hits.addHit(lk, lwd, lve, lwi, lis, limisc)
+ def recordHit(self, hits, term_id, version, begin, end):
+ """
+ Records all the relevant values for a hit.
+ """
+ if term_id not in hits:
+ hits[term_id] = {}
+ if version not in hits[term_id]:
+ hits[term_id][version] = []
+ hits[term_id][version].append((begin, end))
+
+
def lookup(self, lst):
"""
lst is a list of keys
"""
+ result = {}
workdict = {}
#for all elements of the list
for li, le in enumerate(lst):
+ print "\twd at ", li, " out of ", len(lst), " is ", workdict
newworkdict = {}
#first: find all entries for this key
entries = self.hits.getCompleteHit(le)
@@ -97,41 +112,72 @@
print "termid=", term_id
for term_desc in term_desc_list:
+ version = term_desc[0]
+ print "\tnow looking at version: " + str(version), term_desc[1]
#is this id already in progress?
- if term_id in workdict:
+ cont_branch = (term_id in workdict)
+ if cont_branch:
+ cont_branch = version in workdict[term_id]
+ if cont_branch:
+ cont_branch = (term_desc[1] - 1) in \
+ workdict[term_id][version]
+
+ #if it is in progress, see if we can now upgrade or exit
+ if cont_branch:
+ workentry = workdict[term_id][version][term_desc[1] - 1]
#is this a continuation by index?
- if term_desc[0] == workdict[term_id][0] + 1:
+ if term_desc[1] == \
+ workentry[0] + 1:
#is this the last entry for the term?
- if term_desc[1]:
+ if term_desc[2]:
print "A hit!"
+ self.recordHit(result, term_id, version,
+ workentry[1], li)
else:
- newworkdict[term_id] = (workdict[term_id][0] + 1, \
- workdict[term_id][1])
+ if term_id not in newworkdict:
+ newworkdict[term_id] = {}
+ if version not in newworkdict[term_id]:
+ newworkdict[term_id][version] = {}
+ newworkdict[term_id][version][term_desc[1]] = (
+ workentry[0] + 1, workentry[1])
print "an upgrade!"
else:
- del workdict[term_id]
- print "DEL"
+# del workdict[term_id]
+ print "This did not match"
- else: #a new sight!
- print "td0=", term_desc
- if term_desc[0] == 0: #a beginning
- print "okay"
- if term_desc[1]: #and end as well!
- print "hit2"
- else: #record new
- newworkdict[term_id] = (0, li)
- print "a newie"
+ #anyway, we should check if it's a new entry
+ print "td1=", term_desc[1]
+ if term_desc[1] == 0: #a beginning
+ if term_desc[2]: #and end as well!
+ print "hit2"
+ self.recordHit(result, term_id, version, li, li)
+ else: #record new
+ if term_id not in newworkdict:
+ newworkdict[term_id] = {}
+ if version not in newworkdict[term_id]:
+ newworkdict[term_id][version] = {}
+
+ newworkdict[term_id][version][0] = (0, li)
+ print "a newie"
+
workdict = newworkdict
+ return result
if __name__ == "__main__":
hm = HitMarker()
- lst = [('cica', 15, 0, 0, 9), ('rugja', 15, 1, 0, 9), ('meg', 15, 2, 1, 9),\
- ('cica', 100, 0, 0, 1), ('farka', 100, 1, 1, 1)]
+ lst = [('cica', 15, 4, 0, 0, 9), ('rugja', 15, 4, 1, 0, 9), ('meg', 15, 4, 2, 1, 9),\
+ ('cica', 100, 2, 0, 0, 1), ('cica', 100, 2, 1, 0, 1), \
+ ('farka', 100, 2, 2, 1, 1), ('cica', 100, 1, 0, 0, 1), \
+ ('farka', 100, 1, 1, 1, 1), ('cica', 405, 1, 0, 1, 11)]
hm.fill(lst)
print hm.hits
- tox = ['cica', 'rugja', 'mar', 'meg']
- hm.lookup(tox)
+# tox = ['cica', 'rugja', 'mar', 'meg']
+# tox = ['cica', 'rugja', 'meg', 'rugja']
+ tox = ['cica', 'cica', 'farka', 'cica', 'cica', 'farka']
+ print "lento",len(tox)
+ rr = hm.lookup(tox)
+ print "result=", rr
More information about the Hejes-devel
mailing list