[Hejes-devel] [865] found a bug...
hejes-devel at nytud.hu
hejes-devel at nytud.hu
Wed Jul 24 07:57:32 CEST 2013
Revision: 865
Author: hussami
Date: 2013-07-24 07:57:31 +0200 (Wed, 24 Jul 2013)
Log Message:
-----------
found a bug...
Modified Paths:
--------------
trunk/misc/osiris_xml/ohanalyze.py
Modified: trunk/misc/osiris_xml/ohanalyze.py
===================================================================
--- trunk/misc/osiris_xml/ohanalyze.py 2013-07-23 15:29:52 UTC (rev 864)
+++ trunk/misc/osiris_xml/ohanalyze.py 2013-07-24 05:57:31 UTC (rev 865)
@@ -135,6 +135,7 @@
result.append(t)
mindex = ind + 1;
+# print "indexate result=", result
return result
def string2intlist(self, s):
@@ -282,20 +283,22 @@
del lst[-1]
# print "\tback from notok", ind
+# print "result=", result
-
def humorSplit(self, s, splits): #filter bad splits
dct = {}
self.recurse(s, dct, 0)
-# print dct
+# print "dct=", dct
lll = []
resl = []
self.analyzeRecursion(s, dct, 0, lll, resl)
# print resl
# print "----"
return resl
+
+##cut out
nomoresplits = {}
maygoon = {}
for split, v in splits.iteritems():
@@ -328,7 +331,7 @@
def humorize(self, s):
s = s.rstrip()
- h = StemmingAnalysis(s)
+ h = StemmingAnalysis(s, True)
# print "'{0}'".format(s), ":", len(h.getAnas())
result = []
for x in h.getAnas():
@@ -412,9 +415,11 @@
for spt in splits:
previndex = 0
prl = []
+# print spt
for se in spt:
curindex = previndex + se
prl.append(s[previndex:curindex])
+# print "\t", previndex, curindex, s[previndex:curindex]
previndex = curindex
result.append('+'.join(prl))
return result
@@ -442,7 +447,8 @@
def try2(self, where):
- query = "select id, actual, norm from ohdict where id > 1000 limit 200";
+# query = "select id, actual, norm from ohdict where id > 1000 limit 200";
+ query = "select id, actual, norm from ohdict order by id asc";
self.db.execute(query)
results = self.db.fetchall()
@@ -456,7 +462,7 @@
totsplits = []
for ne, nind in il:
# termsplits = self.getAllSplits(ne, 0)
-# print ne
+# print "ne=", ne
# print termsplits
# spdict = {str(len(ne)) : 1}
# print "_____"
@@ -475,6 +481,9 @@
print "\t" + qq
+
+dumpmsgs = False
+
oh = OHAnalyze(False)
#oh.makeIncidences(1)
oh.fill()
@@ -485,17 +494,20 @@
#ll = oh.getAllSplits("agyagos", 0)
#print ll
#oh.humorSplit("agyagos", ll)
-spl=oh.humorSplit("agyagtalaj", ll)
-oh.printSplits("agyagtalaj", spl)
+#spl=oh.humorSplit("agyagtalaj", ll)
+#oh.printSplits("agyagtalaj", spl)
oh.bye()
+
+
###
# NOTE: Number conversions!!!
###
-print "REMINDER: have a list of banned morphemes, i.e. letters etc., test for these in getAllSplits"
-print "2: SQL select distinct eleg gazul birja az ekezeteket. pl kerek, ke'rek, kere'k mind ua."
-print "3 (ext. 1): hogy milyen morfemakombokat engedunk meg, sokkal bonyolultabb, mint gondolnank. pl. agrarberuhazas? agraripari?"
-print "4: IK cimkehez kell ige is!!!. pl. fuggelek nem lehet fugg+el+ek!"
-print "bug: agrarerdekkepviselet vegen csomo +jel"
-print "lex7 humorize atirni"
-print "command-line megoldas: zsofi-megoldas standard outra vs. indextabla letrehozasa az eredeti feladatra"
+if dumpmsgs:
+ print "REMINDER: have a list of banned morphemes, i.e. letters etc., test for these in getAllSplits"
+ print "2: SQL select distinct eleg gazul birja az ekezeteket. pl kerek, ke'rek, kere'k mind ua."
+ print "3 (ext. 1): hogy milyen morfemakombokat engedunk meg, sokkal bonyolultabb, mint gondolnank. pl. agrarberuhazas? agraripari?"
+ print "4: IK cimkehez kell ige is!!!. pl. fuggelek nem lehet fugg+el+ek!"
+ print "bug: agrarerdekkepviselet vegen csomo +jel"
+ print "lex7 humorize atirni"
+ print "command-line megoldas: zsofi-megoldas standard outra vs. indextabla letrehozasa az eredeti feladatra"
More information about the Hejes-devel
mailing list