[Hejes-devel] [830] added option to use lex7 lexicon with humor in egybekulon2_input. StemmingAnalysis
hejes-devel at nytud.hu
hejes-devel at nytud.hu
Thu Jun 27 14:41:09 CEST 2013
Revision: 830
Author: mihaltz
Date: 2013-06-27 14:41:09 +0200 (Thu, 27 Jun 2013)
Log Message:
-----------
added option to use lex7 lexicon with humor in egybekulon2_input.StemmingAnalysis
Modified Paths:
--------------
trunk/web2py/applications/helyesiras_webdev/modules/egybekulon2_humor.py
Modified: trunk/web2py/applications/helyesiras_webdev/modules/egybekulon2_humor.py
===================================================================
--- trunk/web2py/applications/helyesiras_webdev/modules/egybekulon2_humor.py 2013-06-26 07:59:40 UTC (rev 829)
+++ trunk/web2py/applications/helyesiras_webdev/modules/egybekulon2_humor.py 2013-06-27 12:41:09 UTC (rev 830)
@@ -166,13 +166,13 @@
"""Return representation for reproduction (for @memcachememoize)"""
return 'StemmingAnalysis("' + self.wordform + '")'
- def __init__(self, query):
+ def __init__(self, query, use_lex7=False):
"""Initializes object.
First checks whether query is in the user dictionary, if yes, sets raw_data from there.
Otherwise calls command-line humor stemmer+analyzer for query with 2 lexicons:
- 1. with "lex7" for normal stemming+anal with lexical compound boundaries marked by "*"
+ 1. If use_lex7==True: with "lex7" for normal stemming+anal with lexical compound boundaries marked by "*"
2. with "lex6" for info on number of syllables and compound parts
- Output is saved into raw_data.
+ Output is saved into self.raw_data.
"""
self.wordform = query
self.raw_lex6, self.raw_lex7 = [], [] # raw alternative analysis strings (with stemming info) from humor outputs
@@ -183,13 +183,12 @@
self.raw_lex7 = [x[0]+'\t'+x[1] for x in USERDICT[query]]
self.raw_lex6 = [x[0]+'\t'+x[2] for x in USERDICT[query]]
self.from_userdict = True
- else: # call analyzer twice with the 2 lexicons
- ##### !!!!!!!!!!!!! HUMOR LEX6 CALL DISABLED for efficiency -- so far nothing used this feature
- ##### TODO: more long-term solution
+ else: # call analyzer twice with the 2 lexicons (or just 1 if use_lex7 == False)
self.raw_lex6 = self.call_humor_cmdline(query, CMD_LEX6)
- #self.raw_lex7 = self.call_humor_cmdline(query, CMD_LEX7)
- self.raw_lex7 = self.raw_lex6
- ##### !!!!!!!!!!!!!
+ if use_lex7:
+ self.raw_lex7 = self.call_humor_cmdline(query, CMD_LEX7)
+ else:
+ self.raw_lex7 = self.raw_lex6
# set is_unknown
if not ( self.raw_lex7 == [] or self.raw_lex6 == []
or self.raw_lex7[0] == None or self.raw_lex6[0] == None
@@ -335,7 +334,7 @@
inp = sys.stdin.readline().rstrip()
if not inp:
break
- h = StemmingAnalysis(inp)
+ h = StemmingAnalysis(inp, use_lex7=True)
print('')
print('lex7 = [{0}]'.format(', '.join(['"' + x + '"' for x in h.raw_lex7])))
print('lex6 = [{0}]'.format(', '.join(['"' + x + '"' for x in h.raw_lex6])))
More information about the Hejes-devel
mailing list