[Hejes-devel] [842] added ek2 rule clusterer
hejes-devel at nytud.hu
hejes-devel at nytud.hu
Wed Jul 10 14:49:59 CEST 2013
Revision: 842
Author: mihaltz
Date: 2013-07-10 14:49:59 +0200 (Wed, 10 Jul 2013)
Log Message:
-----------
added ek2 rule clusterer
Added Paths:
-----------
trunk/misc/cluster_ek2_rules.py
Added: trunk/misc/cluster_ek2_rules.py
===================================================================
--- trunk/misc/cluster_ek2_rules.py (rev 0)
+++ trunk/misc/cluster_ek2_rules.py 2013-07-10 12:49:59 UTC (rev 842)
@@ -0,0 +1,71 @@
+#!/usr/bin/env python
+# coding: utf8
+
+"""
+Print a clustering of egybekulon2 grammar rules
+
+Rules x and y are in the same cluster iff [n[0] for n in x.lhs] == [n[0] for n in y.lhs]
+
+"""
+
+import sys
+
+sys.path.append('../web2py/applications/helyesiras_webdev/modules')
+from egybekulon2_grammar import GrammarParser, GRM_FILE
+
+
+LHS_ATTR_NAMES = ['stem', 'wordform', 'ortho', 'match', 'sem', 'sep', 'ncomparts', 'nsylls', 'ntoks', 'join1', 'join2', 'join3', 'type', 'hasnesep', '63exception', 'prespart1', 'prespart2', '3idcons']
+
+RHS_ATTR_NAMES = ['sep', 'type', 'hasnesep', '63exception', 'prespart1', 'prespart2', '3idcons']
+
+
+def printRule(rule):
+ """Print a rule to stdout in the GRM_FILE's syntax"""
+ print('id: {0}'.format(rule.id))
+ #print('rule: {0} == {1}'.format(rule.lhs, rule.rhs))
+ lhs = []
+ for (node, valmap), opmap in zip(rule.lhs, rule.lhs_ops):
+ avm = []
+ for attr in [x for x in LHS_ATTR_NAMES if x in valmap.keys()]:
+ if len(valmap[attr]) > 1:
+ avm.append('{0}{1}[{2}]'.format(attr, opmap[attr], ', '.join(['"{0}"'.format(x) for x in valmap[attr]])))
+ else:
+ avm.append('{0}{1}"{2}"'.format(attr, opmap[attr], valmap[attr][0]))
+ lhs.append('{0}({1})'.format(node, ', '.join(avm)))
+ avm = []
+ for attr in [x for x in RHS_ATTR_NAMES if x in rule.rhs[1].keys()]:
+ if len(rule.rhs[1][attr]) > 1:
+ avm.append('{0}=[{1}]'.format(attr, ', '.join(['"{0}"'.format(x) for x in rule.rhs[1][attr]])))
+ else:
+ avm.append('{0}="{1}"'.format(attr, rule.rhs[1][attr][0]))
+ print('rule: {0} == {1}({2})'.format(' + '.join(lhs), rule.rhs[0], ', '.join(avm)))
+ print('comment: {0}'.format(rule.comment))
+ print('refs: {0}'.format(', '.join(rule.refs)))
+ print('kill: {0}'.format(', '.join(rule.kills)))
+ print('ex: {0}'.format(', '.join(["{0} = {1}".format(' + '.join(x[0]), x[1]) for x in rule.exs])))
+
+"""Parses and validates the default grammar file, prints stats to stdout."""
+parser = GrammarParser()
+sys.stderr.write("Parsing grammar file '{0}'...\n".format(GRM_FILE))
+rules = parser.parse()
+sys.stderr.write('{0} line(s) read\n'.format(parser.lcnt))
+sys.stderr.write('{0} rule(s) parsed\n'.format(len(parser.rules)))
+
+# Do the clustering
+clusters = {} # { 'LHS-Arg1+LHS-Arg2+...': [<Rule1>, <Rule2>...], ...}
+for rule in rules:
+ key = ' + '.join([n[0] for n in rule.lhs])
+ if key in clusters:
+ clusters[key].append(rule)
+ else:
+ clusters[key] = [rule]
+
+# Dump the clusters
+for key in sorted(clusters.keys(), cmp=lambda x,y: cmp(len(clusters[x]), len(clusters[y])), reverse=True):
+ print('# Cluster: {0}\n# {1} rule(s)\n# =============\n'.format(key, len(clusters[key])))
+ for rule in clusters[key]:
+ #print(rule.id)
+ printRule(rule)
+ print('')
+ print('\n')
+
Property changes on: trunk/misc/cluster_ek2_rules.py
___________________________________________________________________
Added: svn:executable
+ *
More information about the Hejes-devel
mailing list