[Hejes-devel] [816] Added ohanalyze.py which generates the DB lookup table source SQL
hejes-devel at nytud.hu
hejes-devel at nytud.hu
Mon Jun 10 20:45:25 CEST 2013
Revision: 816
Author: hussami
Date: 2013-06-10 20:45:25 +0200 (Mon, 10 Jun 2013)
Log Message:
-----------
Added ohanalyze.py which generates the DB lookup table source SQL
Modified Paths:
--------------
trunk/web2py/applications/helyesiras_webdev/modules/egybekulon2.py
Added Paths:
-----------
trunk/web2py/applications/helyesiras_webdev/modules/ohanalyze.py
Modified: trunk/web2py/applications/helyesiras_webdev/modules/egybekulon2.py
===================================================================
--- trunk/web2py/applications/helyesiras_webdev/modules/egybekulon2.py 2013-06-07 09:56:27 UTC (rev 815)
+++ trunk/web2py/applications/helyesiras_webdev/modules/egybekulon2.py 2013-06-10 18:45:25 UTC (rev 816)
@@ -290,6 +290,7 @@
norm_strings = self.get_normalized_forms()
try:
sql = 'SELECT actual FROM `ohdict` WHERE norm IN ({0});'.format(','.join([escape_string(x) for x in norm_strings]))
+ print sql #xxx
result = self.dbconn.executesql(sql, as_dict=True) # e.g. [{actual='alma fa'}, {actual='almafa'}]
except:
return False # return safely from any DB error
Added: trunk/web2py/applications/helyesiras_webdev/modules/ohanalyze.py
===================================================================
--- trunk/web2py/applications/helyesiras_webdev/modules/ohanalyze.py (rev 0)
+++ trunk/web2py/applications/helyesiras_webdev/modules/ohanalyze.py 2013-06-10 18:45:25 UTC (rev 816)
@@ -0,0 +1,105 @@
+#!/usr/bin/env python
+
+import MySQLdb
+import re
+
+class MySQLHandler:
+ def __init__(self, verbose = 0):
+ self.connection = None
+ self.verbose = verbose
+ self.cursor = None
+ self.clear()
+
+ def clear(self):
+ self.disconnect()
+ self.cursor = None
+
+ def connect(self, server, user, pwd, db):
+ self.connection = MySQLdb.connect(server, user, pwd, db)
+
+ def disconnect(self):
+ if self.connection != None:
+ self.connection.close()
+ self.connection = None
+
+ def execute(self, str):
+ if self.verbose > 0:
+ print str
+
+ self.cursor = self.connection.cursor()
+ self.cursor.execute(str)
+
+ def fetchall(self):
+ return self.cursor.fetchall()
+
+
+class OHAnalyze:
+ def __init__(self):
+ self.db = MySQLHandler()
+ self.db.connect("localhost", "dbdicter", "dbdicter123", "dbdict")
+
+ def bye(self):
+ if self.db != None:
+ self.db.disconnect()
+
+ def tokenize(self, s):
+ """Split a string into tokens
+ Param: the target string
+ Returns: the result list
+ """
+ l = re.split(' |;|-', s)
+ result = []
+ for sub in l:
+# if sub.isdigit():
+# result.append("_NUMBER_")
+# else:
+ result.append(sub)
+ return result
+
+ def indexate(self, lst, norm):
+ """Finds an ordered set of strings in a normalized string
+ Param: the list and the normalized string
+ Returns: a list of tuples
+ Could be done differently, but it tests DB consistency somewhat.
+ """
+ mindex = 0
+ result = []
+ for l in lst:
+ ind = norm.find(l, mindex)
+ if mindex == -1:
+ raise 'Problem: ' + norm
+ #convert numbers!
+ # if l.isdigit():
+ # t = "_NUMBER_", ind
+ # else:
+ t = l, ind
+ result.append(t)
+ mindex = ind + 1;
+
+ return result
+
+ def try1(self):
+ print "use dbdict;"
+ print "drop table incidences;"
+ print "create table incidences(term varchar(100), dict_id int, " + \
+ "idx int);"
+ query = "select id, actual, norm from ohdict where id < 20";
+ self.db.execute(query)
+ results = self.db.fetchall()
+
+ for row in results:
+ news = self.tokenize(row[1])
+ idval = int(row[0])
+ il = self.indexate(news, row[2])
+ for ne, nind in il:
+ print "insert into incidences(term, dict_id, index) values('" + \
+ ne + "', " + str(idval) + ", " + str(nind) + ");"
+
+
+oh = OHAnalyze()
+oh.try1()
+oh.bye()
+
+###
+# NOTE: Number conversions!!!
+###
Property changes on: trunk/web2py/applications/helyesiras_webdev/modules/ohanalyze.py
___________________________________________________________________
Added: svn:executable
+ *
More information about the Hejes-devel
mailing list