[Hejes-devel] [1463] spell.py: juhhal etc.
hejes-devel at nytud.hu
hejes-devel at nytud.hu
Wed Sep 2 20:52:19 CEST 2015
Revision: 1463
Author: mihaltz
Date: 2015-09-02 20:52:19 +0200 (Wed, 02 Sep 2015)
Log Message:
-----------
spell.py: juhhal etc. works for akh12
Modified Paths:
--------------
trunk/web2py/applications/helyesiras_webdev/modules/spell.py
trunk/web2py/applications/helyesiras_webdev/modules/spell_helpers.py
Modified: trunk/web2py/applications/helyesiras_webdev/modules/spell.py
===================================================================
--- trunk/web2py/applications/helyesiras_webdev/modules/spell.py 2015-09-02 18:40:55 UTC (rev 1462)
+++ trunk/web2py/applications/helyesiras_webdev/modules/spell.py 2015-09-02 18:52:19 UTC (rev 1463)
@@ -23,7 +23,7 @@
from MemcacheHelper import memcachememoize
from egybekulon2_humor import StemmingAnalysis, HumorAna, Morph
-from spell_helpers import load_spell_exc_dict, init_pyhunspell
+from spell_helpers import load_spell_exc_dict, init_pyhunspell, find_first
USRINP_REGEXP = re.compile( unicode( r'^[0-9a-zA-ZáéíóöőúüűÁÉÍÓÖŐÚÜŰ \-\.\n\t%]*$', 'utf8' ) )
"""Regexp to check if user input contains only allowed characters. Use it on a Unicode object."""
@@ -178,6 +178,37 @@
return [(x[0], [unicode(y, 'utf8', 'replace') for y in x[1]]) for x in ret]
+def patch_akh12(inp, utok, hanas):
+ """
+ Override previous decision, if needed.
+ :param inp: 1-element list: [(correct, [suggestions])] (unicode)
+ :param utok: unicode string, the token in question
+ :param hanas: list of HumorAna objects, analyzes of utok (utf8 strings!)
+ :return: [(correct, [suggestions])] (unicode)
+ """
+ # N_h + -val/vel, -vá/vé: dühhel, dühvel both correct
+ # dühvel: accepted by hunspell, dühhel: not -- make accept
+ if len(hanas) > 0: # there are humor anas
+ # find first FN or MN ana
+ i, ana = find_first(hanas, lambda ana: ana.pos in ['FN', 'MN'])
+ if i is not None:
+ morphs = ana.morphs
+ # if stem ends with 'h' and last morph is -val or -vá whose 'v' hasonult into a 'h':
+ if len(morphs) >= 2 \
+ and morphs[-1].tag in ['INS', 'FAC'] \
+ and morphs[-1].lex.startswith('h') \
+ and morphs[-2].lex.endswith('h'):
+ # then make it a correct, done
+ return [(True, [])]
+
+ # N_sz + -szerű: észszerű correct only (*ésszerű)
+
+ # N_tt + -val/vel, -vá/vé, -tól/től: Bernadett-tel correct only (*Bernadettel)
+
+ # default
+ return inp
+
+
def get_productive_tips(tok, hanas):
"""Returns unicode string with tips on correct language usage.
Returns None if no tips can be associated.
@@ -306,13 +337,14 @@
ret.append(res)
continue # no need for other knowledge sources for this token
+ # call Humor for stemming and morph. analysis
+ hanas = StemmingAnalysis(tok8).getAnas()
+
# call hunspell with AkH11 and AkH12 setups (returned: 1-element lists)
akh11 = check_with_hunspell(PYHUNSPELL_AKH11, SPELL_EXC_DICT_AKH11, [utok])
akh12 = check_with_hunspell(PYHUNSPELL_AKH12, SPELL_EXC_DICT_AKH12, [utok])
+ akh12 = patch_akh12(akh12, utok, hanas)
- # call Humor for stemming and morph. analysis
- hanas = StemmingAnalysis(tok8).getAnas()
-
# get dictionary-based tips
dtips = get_dictionary_tips(tok8, hanas, db)
@@ -395,6 +427,13 @@
pprint.pprint(h)
print('')
+ hu = [StemmingAnalysis(x).getAnas() for x in tinp]
+ print('Humor:')
+ for anas in hu:
+ for ana in anas:
+ print(ana)
+ print('')
+
return
Modified: trunk/web2py/applications/helyesiras_webdev/modules/spell_helpers.py
===================================================================
--- trunk/web2py/applications/helyesiras_webdev/modules/spell_helpers.py 2015-09-02 18:40:55 UTC (rev 1462)
+++ trunk/web2py/applications/helyesiras_webdev/modules/spell_helpers.py 2015-09-02 18:52:19 UTC (rev 1463)
@@ -51,3 +51,16 @@
else:
sys.stderr.write('Error in file {0}: "{1}"'.format(customdictfile, line))
return hs
+
+
+def find_first(l, f):
+ """
+ :param l: list
+ :param f: function, 1 argument, returns boolean
+ :return: (i, x) if exist i such that 0<=i<len(l) and f(l[i]) is True, and i is the first one,
+ or (None, None) if no such i exists
+ """
+ for i, x in enumerate(l):
+ if f(x):
+ return (i, x)
+ return (None, None)
\ No newline at end of file
More information about the Hejes-devel
mailing list