[Hejes-devel] [830] added option to use lex7 lexicon with humor in egybekulon2_input. StemmingAnalysis

Thu Jun 27 14:41:09 CEST 2013

Revision: 830
Author:   mihaltz
Date:     2013-06-27 14:41:09 +0200 (Thu, 27 Jun 2013)
Log Message:
-----------
added option to use lex7 lexicon with humor in egybekulon2_input.StemmingAnalysis

Modified Paths:
--------------
    trunk/web2py/applications/helyesiras_webdev/modules/egybekulon2_humor.py

Modified: trunk/web2py/applications/helyesiras_webdev/modules/egybekulon2_humor.py
===================================================================

--- trunk/web2py/applications/helyesiras_webdev/modules/egybekulon2_humor.py	2013-06-26 07:59:40 UTC (rev 829)
+++ trunk/web2py/applications/helyesiras_webdev/modules/egybekulon2_humor.py	2013-06-27 12:41:09 UTC (rev 830)
@@ -166,13 +166,13 @@
     """Return representation for reproduction (for @memcachememoize)"""
     return 'StemmingAnalysis("' + self.wordform + '")'
 
-  def __init__(self, query):
+  def __init__(self, query, use_lex7=False):
     """Initializes object. 
        First checks whether query is in the user dictionary, if yes, sets raw_data from there.
        Otherwise calls command-line humor stemmer+analyzer for query with 2 lexicons:       
-       1. with "lex7" for normal stemming+anal with lexical compound boundaries marked by "*"
+       1. If use_lex7==True: with "lex7" for normal stemming+anal with lexical compound boundaries marked by "*"
        2. with "lex6" for info on number of syllables and compound parts
-       Output is saved into raw_data.       
+       Output is saved into self.raw_data.       
     """
     self.wordform = query
     self.raw_lex6, self.raw_lex7 = [], [] # raw alternative analysis strings (with stemming info) from humor outputs
@@ -183,13 +183,12 @@
       self.raw_lex7 = [x[0]+'\t'+x[1] for x in USERDICT[query]]
       self.raw_lex6 = [x[0]+'\t'+x[2] for x in USERDICT[query]]
       self.from_userdict = True
-    else: # call analyzer twice with the 2 lexicons
-      ##### !!!!!!!!!!!!! HUMOR LEX6 CALL DISABLED for efficiency -- so far nothing used this feature
-      ##### TODO: more long-term solution
+    else: # call analyzer twice with the 2 lexicons (or just 1 if use_lex7 == False)
       self.raw_lex6 = self.call_humor_cmdline(query, CMD_LEX6)
-      #self.raw_lex7 = self.call_humor_cmdline(query, CMD_LEX7)
-      self.raw_lex7 = self.raw_lex6
-      ##### !!!!!!!!!!!!!
+      if use_lex7:
+        self.raw_lex7 = self.call_humor_cmdline(query, CMD_LEX7)
+      else:
+        self.raw_lex7 = self.raw_lex6
     # set is_unknown
     if not (   self.raw_lex7 == [] or self.raw_lex6 == [] 
             or self.raw_lex7[0] == None or self.raw_lex6[0] == None
@@ -335,7 +334,7 @@
     inp = sys.stdin.readline().rstrip()
     if not inp:
       break
-    h = StemmingAnalysis(inp)
+    h = StemmingAnalysis(inp, use_lex7=True)
     print('')
     print('lex7 = [{0}]'.format(', '.join(['"' + x + '"' for x in h.raw_lex7])))
     print('lex6 = [{0}]'.format(', '.join(['"' + x + '"' for x in h.raw_lex6])))