[Hejes-devel] [816] Added ohanalyze.py which generates the DB lookup table source SQL

hejes-devel at nytud.hu hejes-devel at nytud.hu
Mon Jun 10 20:45:25 CEST 2013


Revision: 816
Author:   hussami
Date:     2013-06-10 20:45:25 +0200 (Mon, 10 Jun 2013)
Log Message:
-----------
Added ohanalyze.py which generates the DB lookup table source SQL

Modified Paths:
--------------
    trunk/web2py/applications/helyesiras_webdev/modules/egybekulon2.py

Added Paths:
-----------
    trunk/web2py/applications/helyesiras_webdev/modules/ohanalyze.py

Modified: trunk/web2py/applications/helyesiras_webdev/modules/egybekulon2.py
===================================================================
--- trunk/web2py/applications/helyesiras_webdev/modules/egybekulon2.py	2013-06-07 09:56:27 UTC (rev 815)
+++ trunk/web2py/applications/helyesiras_webdev/modules/egybekulon2.py	2013-06-10 18:45:25 UTC (rev 816)
@@ -290,6 +290,7 @@
     norm_strings = self.get_normalized_forms()
     try:
       sql = 'SELECT actual FROM `ohdict` WHERE norm IN ({0});'.format(','.join([escape_string(x) for x in norm_strings]))
+      print sql #xxx
       result = self.dbconn.executesql(sql, as_dict=True) # e.g. [{actual='alma fa'}, {actual='almafa'}]
     except:
       return False # return safely from any DB error

Added: trunk/web2py/applications/helyesiras_webdev/modules/ohanalyze.py
===================================================================
--- trunk/web2py/applications/helyesiras_webdev/modules/ohanalyze.py	                        (rev 0)
+++ trunk/web2py/applications/helyesiras_webdev/modules/ohanalyze.py	2013-06-10 18:45:25 UTC (rev 816)
@@ -0,0 +1,105 @@
+#!/usr/bin/env python
+
+import MySQLdb
+import re
+
+class MySQLHandler:
+  def __init__(self, verbose = 0):
+    self.connection = None
+    self.verbose = verbose
+    self.cursor = None
+    self.clear()
+
+  def clear(self):
+    self.disconnect()
+    self.cursor = None
+
+  def connect(self, server, user, pwd, db):
+    self.connection = MySQLdb.connect(server, user, pwd, db)
+
+  def disconnect(self):
+    if self.connection != None:
+	self.connection.close()
+    self.connection = None
+
+  def execute(self, str):
+    if self.verbose > 0:
+	print str
+
+    self.cursor = self.connection.cursor()
+    self.cursor.execute(str)
+
+  def fetchall(self):
+    return self.cursor.fetchall()
+
+
+class OHAnalyze:
+  def __init__(self):
+    self.db = MySQLHandler()
+    self.db.connect("localhost", "dbdicter", "dbdicter123", "dbdict")
+
+  def bye(self):
+    if self.db != None:
+      self.db.disconnect()
+
+  def tokenize(self, s):
+    """Split a string into tokens
+       Param: the target string
+       Returns: the result list
+    """
+    l = re.split(' |;|-', s)
+    result = []
+    for sub in l:
+#      if sub.isdigit():
+#        result.append("_NUMBER_")
+#      else:
+      result.append(sub)
+    return result
+
+  def indexate(self, lst, norm):
+    """Finds an ordered set of strings in a normalized string
+       Param: the list and the normalized string
+       Returns: a list of tuples
+       Could be done differently, but it tests DB consistency somewhat.
+    """
+    mindex = 0
+    result = []
+    for l in lst:
+      ind = norm.find(l, mindex)
+      if mindex == -1:
+        raise 'Problem: ' + norm
+      #convert numbers!
+  #    if l.isdigit():
+  #      t = "_NUMBER_", ind
+  #    else:
+      t = l, ind
+      result.append(t)
+      mindex = ind + 1;
+
+    return result
+
+  def try1(self):
+    print "use dbdict;"
+    print "drop table incidences;"
+    print "create table incidences(term varchar(100), dict_id int, " + \
+      "idx int);"
+    query = "select id, actual, norm from ohdict where id < 20";
+    self.db.execute(query)
+    results = self.db.fetchall()
+
+    for row in results:
+      news = self.tokenize(row[1])
+      idval = int(row[0])
+      il = self.indexate(news, row[2])
+      for ne, nind in il:
+        print "insert into incidences(term, dict_id, index) values('" + \
+          ne + "', " + str(idval) + ", " + str(nind) + ");"
+
+
+oh = OHAnalyze()
+oh.try1()
+oh.bye()
+
+###
+# NOTE: Number conversions!!!
+###


Property changes on: trunk/web2py/applications/helyesiras_webdev/modules/ohanalyze.py
___________________________________________________________________
Added: svn:executable
   + *




More information about the Hejes-devel mailing list