[Hejes-devel] [831] ohanalyze notes
hejes-devel at nytud.hu
hejes-devel at nytud.hu
Thu Jun 27 15:24:59 CEST 2013
Revision: 831
Author: hussami
Date: 2013-06-27 15:24:59 +0200 (Thu, 27 Jun 2013)
Log Message:
-----------
ohanalyze notes
Modified Paths:
--------------
trunk/misc/osiris_xml/ohanalyze.py
Modified: trunk/misc/osiris_xml/ohanalyze.py
===================================================================
--- trunk/misc/osiris_xml/ohanalyze.py 2013-06-27 12:41:09 UTC (rev 830)
+++ trunk/misc/osiris_xml/ohanalyze.py 2013-06-27 13:24:59 UTC (rev 831)
@@ -227,7 +227,7 @@
def splitString(self, s, splitlstr):
result = []
splitlist = self.string2intlist(splitlstr)
- print splitlstr, splitlist
+ # print splitlstr, splitlist
previndex = 0
for i in splitlist:
result.append(s[previndex:previndex + i])
@@ -235,52 +235,52 @@
return result
def recurse(self, s, dct, startind):
- print "Start with", s, startind
+# print "Start with", s, startind
hl = self.humorize(s)
- print "hl=", s, hl
+# print "hl=", s, hl
if startind not in dct:
- print "insert: ", s, startind
+# print "insert: ", s, startind
dct[startind] = {len(s) : 1}
# if len(hl) == 1:
# if len(hl[0]) == 1:
# return
for hle in hl:
- print hle
+# print hle
keyl = self.correctHumorSplit(hle)
# dct[s].append(keyl)
dct[startind][len(s)] = 1
pind = 0
for k in keyl:
temps = s[pind:pind + k]
- print "temps", temps, startind, pind
+# print "temps", temps, startind, pind
if (startind + pind) not in dct:
self.recurse(temps, dct, startind + pind)
elif len(temps) not in dct[startind + pind]:
self.recurse(temps, dct, startind + pind)
- else:
- print "already done it!"
+# else:
+# print "already done it!"
pind += k
- print "K", keyl
+# print "K", keyl
def analyzeRecursion(self, s, dct, ind, lst, result):
l = len(s)
- print "anal:", ind, l
+# print "anal:", ind, l
if ind not in dct:
return
- print dct[ind]
+# print dct[ind]
for k, v in dct[ind].iteritems():
- print "?: ", k
+# print "?: ", k
if (k + ind == l):
- print "OK!", l, ind, k
+# print "OK!", l, ind, k
lst.append(k)
result.append(copy.deepcopy(lst))
del lst[-1]
else:
- print "notok:", ind, k
+# print "notok:", ind, k
lst.append(k)
self.analyzeRecursion(s, dct, ind + k, lst, result)
del lst[-1]
- print "\tback from notok", ind
+# print "\tback from notok", ind
@@ -289,12 +289,12 @@
def humorSplit(self, s, splits): #filter bad splits
dct = {}
self.recurse(s, dct, 0)
- print dct
+# print dct
lll = []
resl = []
self.analyzeRecursion(s, dct, 0, lll, resl)
- print resl
- print "----"
+# print resl
+# print "----"
return resl
nomoresplits = {}
maygoon = {}
@@ -347,11 +347,11 @@
for i in range(0, len(tags)):
t = forms[i], tags[i], lens[i]
locresult.append(t)
- print "2:", locresult
+# print "2:", locresult
if locresult:
locresult = self.filterTags2(locresult, True)
- print "3:", locresult
+# print "3:", locresult
if locresult:
result.append(locresult)
@@ -359,7 +359,7 @@
if not result: #by default return the whole thing
t = s, 'FINISH', len(s)
result.append([t])
- print result
+# print result
return result
# self.checkTags(l)
@@ -459,14 +459,14 @@
# print ne
# print termsplits
# spdict = {str(len(ne)) : 1}
- print "_____"
+# print "_____"
spdict = {'6,8' : 1}
termsplits = self.humorSplit(ne, spdict)
# self.printSplits(ne, termsplits)
- print termsplits
+# print termsplits
totsplits.append(termsplits)
- print totsplits
+# print totsplits
crtl = self.cartese(totsplits)
# print crtl
fin = self.getFinalSplits(row[2], crtl)
@@ -496,3 +496,6 @@
print "2: SQL select distinct eleg gazul birja az ekezeteket. pl kerek, ke'rek, kere'k mind ua."
print "3 (ext. 1): hogy milyen morfemakombokat engedunk meg, sokkal bonyolultabb, mint gondolnank. pl. agrarberuhazas? agraripari?"
print "4: IK cimkehez kell ige is!!!. pl. fuggelek nem lehet fugg+el+ek!"
+print "bug: agrarerdekkepviselet vegen csomo +jel"
+print "lex7 humorize atirni"
+print "command-line megoldas: zsofi-megoldas standard outra vs. indextabla letrehozasa az eredeti feladatra"
More information about the Hejes-devel
mailing list