[Hejes-devel] [1463] spell.py: juhhal etc.

hejes-devel at nytud.hu hejes-devel at nytud.hu
Wed Sep 2 20:52:19 CEST 2015


Revision: 1463
Author:   mihaltz
Date:     2015-09-02 20:52:19 +0200 (Wed, 02 Sep 2015)
Log Message:
-----------
spell.py: juhhal etc. works for akh12

Modified Paths:
--------------
    trunk/web2py/applications/helyesiras_webdev/modules/spell.py
    trunk/web2py/applications/helyesiras_webdev/modules/spell_helpers.py

Modified: trunk/web2py/applications/helyesiras_webdev/modules/spell.py
===================================================================
--- trunk/web2py/applications/helyesiras_webdev/modules/spell.py	2015-09-02 18:40:55 UTC (rev 1462)
+++ trunk/web2py/applications/helyesiras_webdev/modules/spell.py	2015-09-02 18:52:19 UTC (rev 1463)
@@ -23,7 +23,7 @@
 
 from MemcacheHelper import memcachememoize
 from egybekulon2_humor import StemmingAnalysis, HumorAna, Morph
-from spell_helpers import load_spell_exc_dict, init_pyhunspell
+from spell_helpers import load_spell_exc_dict, init_pyhunspell, find_first
 
 USRINP_REGEXP = re.compile( unicode( r'^[0-9a-zA-ZáéíóöőúüűÁÉÍÓÖŐÚÜŰ \-\.\n\t%]*$', 'utf8' ) )
 """Regexp to check if user input contains only allowed characters. Use it on a Unicode object."""
@@ -178,6 +178,37 @@
   return [(x[0], [unicode(y, 'utf8', 'replace') for y in x[1]]) for x in ret]
 
 
+def patch_akh12(inp, utok, hanas):
+  """
+  Override previous decision, if needed.
+  :param inp: 1-element list: [(correct, [suggestions])] (unicode)
+  :param utok: unicode string, the token in question
+  :param hanas: list of HumorAna objects, analyzes of utok (utf8 strings!)
+  :return: [(correct, [suggestions])] (unicode)
+  """
+  # N_h + -val/vel, -vá/vé: dühhel, dühvel both correct
+  # dühvel: accepted by hunspell, dühhel: not -- make accept
+  if len(hanas) > 0: # there are humor anas
+    # find first FN or MN ana
+    i, ana = find_first(hanas, lambda ana: ana.pos in ['FN', 'MN'])
+    if i is not None:
+      morphs = ana.morphs
+      # if stem ends with 'h' and last morph is -val or -vá whose 'v' hasonult into a 'h':
+      if len(morphs) >= 2 \
+        and morphs[-1].tag in ['INS', 'FAC'] \
+        and morphs[-1].lex.startswith('h') \
+        and morphs[-2].lex.endswith('h'):
+        # then make it a correct, done
+        return [(True, [])]
+
+  # N_sz + -szerű: észszerű correct only (*ésszerű)
+
+  # N_tt + -val/vel, -vá/vé, -tól/től: Bernadett-tel correct only (*Bernadettel)
+
+  # default
+  return inp
+
+
 def get_productive_tips(tok, hanas):
   """Returns unicode string with tips on correct language usage.
      Returns None if no tips can be associated.
@@ -306,13 +337,14 @@
       ret.append(res)
       continue # no need for other knowledge sources for this token
 
+    # call Humor for stemming and morph. analysis
+    hanas = StemmingAnalysis(tok8).getAnas()
+
     # call hunspell with AkH11 and AkH12 setups (returned: 1-element lists)
     akh11 = check_with_hunspell(PYHUNSPELL_AKH11, SPELL_EXC_DICT_AKH11, [utok])
     akh12 = check_with_hunspell(PYHUNSPELL_AKH12, SPELL_EXC_DICT_AKH12, [utok])
+    akh12 = patch_akh12(akh12, utok, hanas)
 
-    # call Humor for stemming and morph. analysis
-    hanas = StemmingAnalysis(tok8).getAnas()
-
     # get dictionary-based tips
     dtips = get_dictionary_tips(tok8, hanas, db)
 
@@ -395,6 +427,13 @@
     pprint.pprint(h)
     print('')
 
+    hu = [StemmingAnalysis(x).getAnas() for x in tinp]
+    print('Humor:')
+    for anas in hu:
+      for ana in anas:
+        print(ana)
+    print('')
+
   return
 
 

Modified: trunk/web2py/applications/helyesiras_webdev/modules/spell_helpers.py
===================================================================
--- trunk/web2py/applications/helyesiras_webdev/modules/spell_helpers.py	2015-09-02 18:40:55 UTC (rev 1462)
+++ trunk/web2py/applications/helyesiras_webdev/modules/spell_helpers.py	2015-09-02 18:52:19 UTC (rev 1463)
@@ -51,3 +51,16 @@
       else:
         sys.stderr.write('Error in file {0}: "{1}"'.format(customdictfile, line))
   return hs
+
+
+def find_first(l, f):
+  """
+  :param l: list
+  :param f: function, 1 argument, returns boolean
+  :return: (i, x) if exist i such that 0<=i<len(l) and f(l[i]) is True, and i is the first one,
+  or (None, None) if no such i exists
+  """
+  for i, x in enumerate(l):
+    if f(x):
+      return (i, x)
+  return (None, None)
\ No newline at end of file




More information about the Hejes-devel mailing list