[Hejes-devel] [891] code upgrade: alg is finally working, it seems

hejes-devel at nytud.hu hejes-devel at nytud.hu
Sun Aug 4 20:51:24 CEST 2013


Revision: 891
Author:   hussami
Date:     2013-08-04 20:51:24 +0200 (Sun, 04 Aug 2013)
Log Message:
-----------
code upgrade: alg is finally working, it seems

Modified Paths:
--------------
    trunk/misc/osiris_xml/hitmarker.py

Modified: trunk/misc/osiris_xml/hitmarker.py
===================================================================
--- trunk/misc/osiris_xml/hitmarker.py	2013-08-02 13:47:55 UTC (rev 890)
+++ trunk/misc/osiris_xml/hitmarker.py	2013-08-04 18:51:24 UTC (rev 891)
@@ -16,14 +16,15 @@
         self.data = {}
         self.banned = {}
 
-    def addHit(self, key, term_id, term_index, islast, misc):
+    def addHit(self, key, term_id, term_version, term_index, islast, misc):
         if key not in self.data:
             self.data[key] = {}
 
         if term_id not in self.data[key]:
-            self.data[key][term_id] = [(term_index, islast, misc)]
+            self.data[key][term_id] = [(term_version, term_index, islast, misc)]
         else:
-            self.data[key][term_id].append((term_index, islast, misc))
+            self.data[key][term_id].append((term_version, term_index, 
+                islast, misc))
 
     def dropCompleteHit(self, key):
         if key in self.data:
@@ -69,19 +70,33 @@
 
     def fill(self, lst):
         """
-            lst must be in the form: (key, word_id, word_index, islast, misc)
+            lst must be in the form: (key, word_id, word_version, word_index, 
+                islast, misc)
         """
-        for lk, lwd, lwi, lis, limisc in lst:
-            self.hits.addHit(lk, lwd, lwi, lis, limisc)
+        for lk, lwd, lve, lwi, lis, limisc in lst:
+            self.hits.addHit(lk, lwd, lve, lwi, lis, limisc)
 
+    def recordHit(self, hits, term_id, version, begin, end):
+        """
+            Records all the relevant values for a hit.
+        """
+        if term_id not in hits:
+            hits[term_id] = {}
+        if version not in hits[term_id]:
+            hits[term_id][version] = []
+        hits[term_id][version].append((begin, end))
+
+
     def lookup(self, lst):
         """
             lst is a list of keys
         """
+        result = {}
         workdict = {}
 
         #for all elements of the list
         for li, le in enumerate(lst):
+            print "\twd at ", li, " out of ", len(lst), " is ", workdict
             newworkdict = {}
             #first: find all entries for this key
             entries = self.hits.getCompleteHit(le)
@@ -97,41 +112,72 @@
                 print "termid=", term_id
 
                 for term_desc in term_desc_list:
+                    version = term_desc[0]
+                    print "\tnow looking at version: " + str(version), term_desc[1]
                     #is this id already in progress?
-                    if term_id in workdict:
+                    cont_branch = (term_id in workdict)
+                    if cont_branch:
+                        cont_branch = version in workdict[term_id]
+                    if cont_branch:
+                        cont_branch = (term_desc[1] - 1) in \
+                            workdict[term_id][version]
+
+                    #if it is in progress, see if we can now upgrade or exit
+                    if cont_branch:
+                        workentry = workdict[term_id][version][term_desc[1] - 1]
                         #is this a continuation by index?
-                        if term_desc[0] == workdict[term_id][0] + 1:
+                        if term_desc[1] == \
+                            workentry[0] + 1:
                             #is this the last entry for the term?
-                            if term_desc[1]: 
+                            if term_desc[2]: 
                                 print "A hit!"
+                                self.recordHit(result, term_id, version,
+                                    workentry[1], li)
                             else:
-                                newworkdict[term_id] = (workdict[term_id][0] + 1, \
-                                    workdict[term_id][1])
+                                if term_id not in newworkdict:
+                                    newworkdict[term_id] = {}
+                                if version not in newworkdict[term_id]:
+                                    newworkdict[term_id][version] = {}
+                                newworkdict[term_id][version][term_desc[1]] = (
+                                    workentry[0] + 1, workentry[1])
                                 print "an upgrade!"
                         else:
-                            del workdict[term_id]
-                            print "DEL"
+#                            del workdict[term_id]
+                            print "This did not match"
 
-                    else: #a new sight!
-                        print "td0=", term_desc
-                        if term_desc[0] == 0: #a beginning
-                            print "okay"
-                            if term_desc[1]: #and end as well!
-                                print "hit2"
-                            else: #record new
-                                newworkdict[term_id] = (0, li)
-                                print "a newie"
+                    #anyway, we should check if it's a new entry
+                    print "td1=", term_desc[1]
+                    if term_desc[1] == 0: #a beginning
+                        if term_desc[2]: #and end as well!
+                            print "hit2"
+                            self.recordHit(result, term_id, version, li, li)
+                        else: #record new
+                            if term_id not in newworkdict:
+                                newworkdict[term_id] = {}
 
+                            if version not in newworkdict[term_id]:
+                                newworkdict[term_id][version] = {}
+
+                            newworkdict[term_id][version][0] = (0, li)
+                            print "a newie"
+
             workdict = newworkdict
+        return result
 
 
 if __name__ == "__main__":
     hm = HitMarker()
 
-    lst = [('cica', 15, 0, 0, 9), ('rugja', 15, 1, 0, 9), ('meg', 15, 2, 1, 9),\
-        ('cica', 100, 0, 0, 1), ('farka', 100, 1, 1, 1)]
+    lst = [('cica', 15, 4, 0, 0, 9), ('rugja', 15, 4, 1, 0, 9), ('meg', 15, 4, 2, 1, 9),\
+        ('cica', 100, 2, 0, 0, 1), ('cica', 100, 2, 1, 0, 1), \
+        ('farka', 100, 2, 2, 1, 1), ('cica', 100, 1, 0, 0, 1), \
+        ('farka', 100, 1, 1, 1, 1), ('cica', 405, 1, 0, 1, 11)]
     hm.fill(lst)
 
     print hm.hits
-    tox = ['cica', 'rugja', 'mar', 'meg']
-    hm.lookup(tox)
+#    tox = ['cica', 'rugja', 'mar', 'meg']
+#    tox = ['cica', 'rugja', 'meg', 'rugja']
+    tox = ['cica', 'cica', 'farka', 'cica', 'cica', 'farka']
+    print "lento",len(tox)
+    rr = hm.lookup(tox)
+    print "result=", rr




More information about the Hejes-devel mailing list