[Hejes-devel] [865] found a bug...

hejes-devel at nytud.hu hejes-devel at nytud.hu
Wed Jul 24 07:57:32 CEST 2013


Revision: 865
Author:   hussami
Date:     2013-07-24 07:57:31 +0200 (Wed, 24 Jul 2013)
Log Message:
-----------
found a bug...

Modified Paths:
--------------
    trunk/misc/osiris_xml/ohanalyze.py

Modified: trunk/misc/osiris_xml/ohanalyze.py
===================================================================
--- trunk/misc/osiris_xml/ohanalyze.py	2013-07-23 15:29:52 UTC (rev 864)
+++ trunk/misc/osiris_xml/ohanalyze.py	2013-07-24 05:57:31 UTC (rev 865)
@@ -135,6 +135,7 @@
       result.append(t)
       mindex = ind + 1;
 
+#    print "indexate result=", result
     return result
 
   def string2intlist(self, s):
@@ -282,20 +283,22 @@
         del lst[-1]
 #        print "\tback from notok", ind
 
+#    print "result=", result
 
-
     
 
   def humorSplit(self, s, splits): #filter bad splits
     dct = {}
     self.recurse(s, dct, 0)
-#    print dct
+#    print "dct=", dct
     lll = []
     resl = []
     self.analyzeRecursion(s, dct, 0, lll, resl)
 #    print resl
 #    print "----"
     return resl
+
+##cut out
     nomoresplits = {}
     maygoon = {}
     for split, v in splits.iteritems():
@@ -328,7 +331,7 @@
 
   def humorize(self, s):
     s = s.rstrip()
-    h = StemmingAnalysis(s)
+    h = StemmingAnalysis(s, True)
 #    print "'{0}'".format(s), ":", len(h.getAnas())
     result = []
     for x in h.getAnas():
@@ -412,9 +415,11 @@
     for spt in splits:
       previndex = 0
       prl = []
+#      print spt
       for se in spt:
         curindex = previndex + se
         prl.append(s[previndex:curindex])
+#        print "\t", previndex, curindex, s[previndex:curindex]
         previndex = curindex
       result.append('+'.join(prl))
     return result
@@ -442,7 +447,8 @@
 
   def try2(self, where):
 
-    query = "select id, actual, norm from ohdict where id > 1000 limit 200";
+#    query = "select id, actual, norm from ohdict where id > 1000 limit 200";
+    query = "select id, actual, norm from ohdict order by id asc";
     self.db.execute(query)
 
     results = self.db.fetchall()
@@ -456,7 +462,7 @@
       totsplits = []
       for ne, nind in il:
 #        termsplits = self.getAllSplits(ne, 0)
-#        print ne
+#        print "ne=", ne
 #        print termsplits
 #        spdict = {str(len(ne)) : 1}
 #        print "_____"
@@ -475,6 +481,9 @@
         print "\t" + qq
 
 
+
+dumpmsgs = False
+
 oh = OHAnalyze(False)
 #oh.makeIncidences(1)
 oh.fill()
@@ -485,17 +494,20 @@
 #ll = oh.getAllSplits("agyagos", 0)
 #print ll
 #oh.humorSplit("agyagos", ll)
-spl=oh.humorSplit("agyagtalaj", ll)
-oh.printSplits("agyagtalaj", spl)
+#spl=oh.humorSplit("agyagtalaj", ll)
+#oh.printSplits("agyagtalaj", spl)
 oh.bye()
 
+
+
 ###
 # NOTE: Number conversions!!!
 ###
-print "REMINDER: have a list of banned morphemes, i.e. letters etc., test for these in getAllSplits"
-print "2: SQL select distinct eleg gazul birja az ekezeteket. pl kerek, ke'rek, kere'k mind ua."
-print "3 (ext. 1): hogy milyen morfemakombokat engedunk meg, sokkal bonyolultabb, mint gondolnank. pl. agrarberuhazas? agraripari?"
-print "4: IK cimkehez kell ige is!!!. pl. fuggelek nem lehet fugg+el+ek!"
-print "bug: agrarerdekkepviselet vegen csomo +jel"
-print "lex7 humorize atirni"
-print "command-line megoldas: zsofi-megoldas standard outra vs. indextabla letrehozasa az eredeti feladatra"
+if dumpmsgs:
+    print "REMINDER: have a list of banned morphemes, i.e. letters etc., test for these in getAllSplits"
+    print "2: SQL select distinct eleg gazul birja az ekezeteket. pl kerek, ke'rek, kere'k mind ua."
+    print "3 (ext. 1): hogy milyen morfemakombokat engedunk meg, sokkal bonyolultabb, mint gondolnank. pl. agrarberuhazas? agraripari?"
+    print "4: IK cimkehez kell ige is!!!. pl. fuggelek nem lehet fugg+el+ek!"
+    print "bug: agrarerdekkepviselet vegen csomo +jel"
+    print "lex7 humorize atirni"
+    print "command-line megoldas: zsofi-megoldas standard outra vs. indextabla letrehozasa az eredeti feladatra"




More information about the Hejes-devel mailing list