[Hejes-devel] [1468] spell.py cleanup

hejes-devel at nytud.hu hejes-devel at nytud.hu
Thu Sep 3 01:27:20 CEST 2015


Revision: 1468
Author:   mihaltz
Date:     2015-09-03 01:27:20 +0200 (Thu, 03 Sep 2015)
Log Message:
-----------
spell.py cleanup

Modified Paths:
--------------
    trunk/web2py/applications/helyesiras_webdev/modules/spell.py

Modified: trunk/web2py/applications/helyesiras_webdev/modules/spell.py
===================================================================
--- trunk/web2py/applications/helyesiras_webdev/modules/spell.py	2015-09-02 23:07:03 UTC (rev 1467)
+++ trunk/web2py/applications/helyesiras_webdev/modules/spell.py	2015-09-02 23:27:20 UTC (rev 1468)
@@ -80,47 +80,6 @@
                                    )
 
 
-def union_humor_hunspell(unicode_toks):
-  """This is an experimental modification of union_humor_hunspell().
-     Calls only hunspell. Suggestions are filtered: skip if contains space and any token is 1 char long, keep only up to TOPNSUGG suggestions.
-     ALSO uses exception dictionary SPELL_EXC_DICT: if token is found, use value from there,
-     regardless of either engines' outputs.
-     NOTE: this differs from the humor and hunspell functions, which expect arrays of normal strings, not Unicode strings!
-     Returns an array: [(token_is_correct, [list_of_suggestions]), ...]
-     token_is_correct: True if the nth token was recognized, False otherwise
-     [list_of_suggestions]: [] if token_is_correct==True, otherwise may be a list of suggested correct forms (Unicode strings), or [] if no suggestions available.
-     Returns [None, <error message>] if some critical error occured.
-     Returns [] if input was emtpy array or contained no meaningful tokens (e.g. only punctuation).
-  """
-  ret = []
-  # convert input to normal strings (utf8 encoding)
-  inp = [x.encode('utf8') for x in unicode_toks]
-  # call the engine
-  hunspout = call_hunspell_cmdline_ntok(inp)
-  # process
-  ret = hunspout
-  for i, x in enumerate(hunspout):
-    # override with exception dictionary (if applicable)
-    if len(hunspout) == len(inp): # safety check
-      e = SPELL_EXC_DICT.get(inp[i])
-      if e != None:
-        ret[i] = e
-        continue
-    # filter suggestions & convert them to unicode
-    if ret[i][0] == False:
-      tmp = []
-      for s in ret[i][1]:
-        t = unicode(s, 'utf8', 'replace')
-        sx = t.split(' ')
-        if len(sx) > 1 and len([x for x in sx if len(x) == 1]) > 0: # skip suggestion if contains space-delimited tokens and any token is only 1 char long (e.g. "u száj" for "uszáj")
-          continue
-        tmp.append(t)
-        if len(tmp) == TOPNSUGG: # no more than TOPNSUGG
-          break
-      ret[i] = (False, tmp)
-  return ret
-
-
 def check_with_hunspell(hsobj, excdict, utoks):
   """
   :param hsobj: an initialized hunspell.HunSpell object (see PYHUNSPELL_AKH11|12)
@@ -313,7 +272,6 @@
        akh12_correct: s.a.
        akh12_suggestions: s.a.
        tips:  {field: value, ...} or {} (unicode strings)
-     Returns [None, <error message>] if some critical error occured.
      Returns [] if input was emtpy array or contained no meaningful tokens (e.g. only punctuation).
   """
   ret = []




More information about the Hejes-devel mailing list