[Hejes-devel] [842] added ek2 rule clusterer

Wed Jul 10 14:49:59 CEST 2013

Revision: 842
Author:   mihaltz
Date:     2013-07-10 14:49:59 +0200 (Wed, 10 Jul 2013)
Log Message:
-----------
added ek2 rule clusterer

Added Paths:
-----------
    trunk/misc/cluster_ek2_rules.py

Added: trunk/misc/cluster_ek2_rules.py
===================================================================

--- trunk/misc/cluster_ek2_rules.py	                        (rev 0)
+++ trunk/misc/cluster_ek2_rules.py	2013-07-10 12:49:59 UTC (rev 842)
@@ -0,0 +1,71 @@
+#!/usr/bin/env python
+# coding: utf8
+
+"""
+Print a clustering of egybekulon2 grammar rules
+
+Rules x and y are in the same cluster iff [n[0] for n in x.lhs] == [n[0] for n in y.lhs]
+
+"""
+
+import sys
+
+sys.path.append('../web2py/applications/helyesiras_webdev/modules')
+from egybekulon2_grammar import GrammarParser, GRM_FILE
+
+
+LHS_ATTR_NAMES = ['stem', 'wordform', 'ortho', 'match', 'sem', 'sep', 'ncomparts', 'nsylls', 'ntoks', 'join1', 'join2', 'join3', 'type', 'hasnesep', '63exception', 'prespart1', 'prespart2', '3idcons']
+
+RHS_ATTR_NAMES = ['sep', 'type', 'hasnesep', '63exception', 'prespart1', 'prespart2', '3idcons']
+
+
+def printRule(rule):
+  """Print a rule to stdout in the GRM_FILE's syntax"""
+  print('id: {0}'.format(rule.id))
+  #print('rule: {0} == {1}'.format(rule.lhs, rule.rhs))
+  lhs = []
+  for (node, valmap), opmap in zip(rule.lhs, rule.lhs_ops):
+    avm = []
+    for attr in [x for x in LHS_ATTR_NAMES if x in valmap.keys()]:
+      if len(valmap[attr]) > 1:
+        avm.append('{0}{1}[{2}]'.format(attr, opmap[attr], ', '.join(['"{0}"'.format(x) for x in valmap[attr]])))
+      else:
+        avm.append('{0}{1}"{2}"'.format(attr, opmap[attr], valmap[attr][0]))
+    lhs.append('{0}({1})'.format(node, ', '.join(avm)))
+  avm = []
+  for attr in [x for x in RHS_ATTR_NAMES if x in rule.rhs[1].keys()]:
+    if len(rule.rhs[1][attr]) > 1:
+      avm.append('{0}=[{1}]'.format(attr, ', '.join(['"{0}"'.format(x) for x in rule.rhs[1][attr]])))
+    else:
+      avm.append('{0}="{1}"'.format(attr, rule.rhs[1][attr][0]))
+  print('rule: {0} == {1}({2})'.format(' + '.join(lhs), rule.rhs[0], ', '.join(avm)))
+  print('comment: {0}'.format(rule.comment))
+  print('refs: {0}'.format(', '.join(rule.refs)))
+  print('kill: {0}'.format(', '.join(rule.kills)))
+  print('ex: {0}'.format(', '.join(["{0} = {1}".format(' + '.join(x[0]), x[1]) for x in rule.exs])))
+
+"""Parses and validates the default grammar file, prints stats to stdout."""
+parser = GrammarParser()
+sys.stderr.write("Parsing grammar file '{0}'...\n".format(GRM_FILE))
+rules = parser.parse()
+sys.stderr.write('{0} line(s) read\n'.format(parser.lcnt))
+sys.stderr.write('{0} rule(s) parsed\n'.format(len(parser.rules)))
+
+# Do the clustering
+clusters = {} # { 'LHS-Arg1+LHS-Arg2+...': [<Rule1>, <Rule2>...], ...}
+for rule in rules:
+  key = ' + '.join([n[0] for n in rule.lhs])
+  if key in clusters:
+    clusters[key].append(rule)
+  else:
+    clusters[key] = [rule]
+    
+# Dump the clusters
+for key in sorted(clusters.keys(), cmp=lambda x,y: cmp(len(clusters[x]), len(clusters[y])), reverse=True):
+  print('# Cluster: {0}\n# {1} rule(s)\n# =============\n'.format(key, len(clusters[key])))
+  for rule in clusters[key]:
+    #print(rule.id)
+    printRule(rule)
+    print('')
+  print('\n')
+


Property changes on: trunk/misc/cluster_ek2_rules.py
___________________________________________________________________
Added: svn:executable
   + *