[Hejes-devel] [1468] spell.py cleanup
hejes-devel at nytud.hu
hejes-devel at nytud.hu
Thu Sep 3 01:27:20 CEST 2015
Revision: 1468
Author: mihaltz
Date: 2015-09-03 01:27:20 +0200 (Thu, 03 Sep 2015)
Log Message:
-----------
spell.py cleanup
Modified Paths:
--------------
trunk/web2py/applications/helyesiras_webdev/modules/spell.py
Modified: trunk/web2py/applications/helyesiras_webdev/modules/spell.py
===================================================================
--- trunk/web2py/applications/helyesiras_webdev/modules/spell.py 2015-09-02 23:07:03 UTC (rev 1467)
+++ trunk/web2py/applications/helyesiras_webdev/modules/spell.py 2015-09-02 23:27:20 UTC (rev 1468)
@@ -80,47 +80,6 @@
)
-def union_humor_hunspell(unicode_toks):
- """This is an experimental modification of union_humor_hunspell().
- Calls only hunspell. Suggestions are filtered: skip if contains space and any token is 1 char long, keep only up to TOPNSUGG suggestions.
- ALSO uses exception dictionary SPELL_EXC_DICT: if token is found, use value from there,
- regardless of either engines' outputs.
- NOTE: this differs from the humor and hunspell functions, which expect arrays of normal strings, not Unicode strings!
- Returns an array: [(token_is_correct, [list_of_suggestions]), ...]
- token_is_correct: True if the nth token was recognized, False otherwise
- [list_of_suggestions]: [] if token_is_correct==True, otherwise may be a list of suggested correct forms (Unicode strings), or [] if no suggestions available.
- Returns [None, <error message>] if some critical error occured.
- Returns [] if input was emtpy array or contained no meaningful tokens (e.g. only punctuation).
- """
- ret = []
- # convert input to normal strings (utf8 encoding)
- inp = [x.encode('utf8') for x in unicode_toks]
- # call the engine
- hunspout = call_hunspell_cmdline_ntok(inp)
- # process
- ret = hunspout
- for i, x in enumerate(hunspout):
- # override with exception dictionary (if applicable)
- if len(hunspout) == len(inp): # safety check
- e = SPELL_EXC_DICT.get(inp[i])
- if e != None:
- ret[i] = e
- continue
- # filter suggestions & convert them to unicode
- if ret[i][0] == False:
- tmp = []
- for s in ret[i][1]:
- t = unicode(s, 'utf8', 'replace')
- sx = t.split(' ')
- if len(sx) > 1 and len([x for x in sx if len(x) == 1]) > 0: # skip suggestion if contains space-delimited tokens and any token is only 1 char long (e.g. "u száj" for "uszáj")
- continue
- tmp.append(t)
- if len(tmp) == TOPNSUGG: # no more than TOPNSUGG
- break
- ret[i] = (False, tmp)
- return ret
-
-
def check_with_hunspell(hsobj, excdict, utoks):
"""
:param hsobj: an initialized hunspell.HunSpell object (see PYHUNSPELL_AKH11|12)
@@ -313,7 +272,6 @@
akh12_correct: s.a.
akh12_suggestions: s.a.
tips: {field: value, ...} or {} (unicode strings)
- Returns [None, <error message>] if some critical error occured.
Returns [] if input was emtpy array or contained no meaningful tokens (e.g. only punctuation).
"""
ret = []
More information about the Hejes-devel
mailing list