[Hejes-devel] [831] ohanalyze notes

hejes-devel at nytud.hu hejes-devel at nytud.hu
Thu Jun 27 15:24:59 CEST 2013


Revision: 831
Author:   hussami
Date:     2013-06-27 15:24:59 +0200 (Thu, 27 Jun 2013)
Log Message:
-----------
ohanalyze notes

Modified Paths:
--------------
    trunk/misc/osiris_xml/ohanalyze.py

Modified: trunk/misc/osiris_xml/ohanalyze.py
===================================================================
--- trunk/misc/osiris_xml/ohanalyze.py	2013-06-27 12:41:09 UTC (rev 830)
+++ trunk/misc/osiris_xml/ohanalyze.py	2013-06-27 13:24:59 UTC (rev 831)
@@ -227,7 +227,7 @@
   def splitString(self, s, splitlstr):
     result = []
     splitlist = self.string2intlist(splitlstr)
-    print splitlstr, splitlist
+ #   print splitlstr, splitlist
     previndex = 0
     for i in splitlist:
       result.append(s[previndex:previndex + i])
@@ -235,52 +235,52 @@
     return result
 
   def recurse(self, s, dct, startind):
-    print "Start with", s, startind
+#    print "Start with", s, startind
     hl = self.humorize(s)
-    print "hl=", s, hl
+#    print "hl=", s, hl
     if startind not in dct:
-      print "insert: ", s, startind
+#      print "insert: ", s, startind
       dct[startind] = {len(s) : 1}
 #    if len(hl) == 1:
 #      if len(hl[0]) == 1:
 #        return
     for hle in hl:
-      print hle
+#      print hle
       keyl = self.correctHumorSplit(hle)
 #      dct[s].append(keyl)
       dct[startind][len(s)] = 1
       pind = 0
       for k in keyl:
         temps = s[pind:pind + k]
-        print "temps", temps, startind, pind
+#        print "temps", temps, startind, pind
         if (startind + pind) not in dct:
           self.recurse(temps, dct, startind + pind)
         elif len(temps) not in dct[startind + pind]:
           self.recurse(temps, dct, startind + pind)
-        else:
-          print "already done it!"
+#        else:
+#          print "already done it!"
         pind += k
-      print "K", keyl
+#      print "K", keyl
 
   def analyzeRecursion(self, s, dct, ind, lst, result):
     l = len(s)
-    print "anal:", ind, l
+#    print "anal:", ind, l
     if ind not in dct:
       return
-    print dct[ind]
+#    print dct[ind]
     for k, v in dct[ind].iteritems():
-      print "?: ", k
+#      print "?: ", k
       if (k + ind == l):
-        print "OK!", l, ind, k
+#        print "OK!", l, ind, k
         lst.append(k)
         result.append(copy.deepcopy(lst))
         del lst[-1]
       else:
-        print "notok:", ind, k
+#        print "notok:", ind, k
         lst.append(k)
         self.analyzeRecursion(s, dct, ind + k, lst, result)
         del lst[-1]
-        print "\tback from notok", ind
+#        print "\tback from notok", ind
 
 
 
@@ -289,12 +289,12 @@
   def humorSplit(self, s, splits): #filter bad splits
     dct = {}
     self.recurse(s, dct, 0)
-    print dct
+#    print dct
     lll = []
     resl = []
     self.analyzeRecursion(s, dct, 0, lll, resl)
-    print resl
-    print "----"
+#    print resl
+#    print "----"
     return resl
     nomoresplits = {}
     maygoon = {}
@@ -347,11 +347,11 @@
       for i in range(0, len(tags)):
         t = forms[i], tags[i], lens[i]
         locresult.append(t)
-      print "2:", locresult
+#      print "2:", locresult
 
       if locresult:
         locresult = self.filterTags2(locresult, True)
-      print "3:", locresult
+#      print "3:", locresult
       if locresult:
         result.append(locresult)
 
@@ -359,7 +359,7 @@
     if not result: #by default return the whole thing
       t = s, 'FINISH', len(s)
       result.append([t])
-    print result
+#    print result
     return result
 #      self.checkTags(l)
 
@@ -459,14 +459,14 @@
 #        print ne
 #        print termsplits
 #        spdict = {str(len(ne)) : 1}
-        print "_____"
+#        print "_____"
         spdict = {'6,8' : 1}
         termsplits = self.humorSplit(ne, spdict)
 
 #        self.printSplits(ne, termsplits)
-        print termsplits
+#        print termsplits
         totsplits.append(termsplits)
-      print totsplits
+#      print totsplits
       crtl = self.cartese(totsplits)
 #      print crtl
       fin = self.getFinalSplits(row[2], crtl)
@@ -496,3 +496,6 @@
 print "2: SQL select distinct eleg gazul birja az ekezeteket. pl kerek, ke'rek, kere'k mind ua."
 print "3 (ext. 1): hogy milyen morfemakombokat engedunk meg, sokkal bonyolultabb, mint gondolnank. pl. agrarberuhazas? agraripari?"
 print "4: IK cimkehez kell ige is!!!. pl. fuggelek nem lehet fugg+el+ek!"
+print "bug: agrarerdekkepviselet vegen csomo +jel"
+print "lex7 humorize atirni"
+print "command-line megoldas: zsofi-megoldas standard outra vs. indextabla letrehozasa az eredeti feladatra"




More information about the Hejes-devel mailing list