From ffc7b107e5bd5978295da61f64ea87b9ea3ad4d1 Mon Sep 17 00:00:00 2001
From: Matthew Fernandez <matthew.fernandez@nicta.com.au>
Date: Tue, 6 Oct 2015 17:15:48 +1100
Subject: [PATCH] misc: Add a Python module for dealing with Isabelle symbols.

---
 misc/pysymbols/README.md                | 27 +++++++
 misc/pysymbols/isasymbols/__init__.py   | 15 ++++
 misc/pysymbols/isasymbols/isasymbols.py | 99 +++++++++++++++++++++++++
 3 files changed, 141 insertions(+)
 create mode 100644 misc/pysymbols/README.md
 create mode 100644 misc/pysymbols/isasymbols/__init__.py
 create mode 100644 misc/pysymbols/isasymbols/isasymbols.py
diff --git a/misc/pysymbols/README.md b/misc/pysymbols/README.md
new file mode 100644
index 000000000..e55971113
--- /dev/null
+++ b/misc/pysymbols/README.md
@@ -0,0 +1,27 @@
+<!--
+ Copyright 2015, NICTA
+
+ This software may be distributed and modified according to the terms of
+ the BSD 2-Clause license. Note that NO WARRANTY is provided.
+ See "LICENSE_BSD2.txt" for details.
+
+ @TAG(NICTA_BSD)
+-->
+
+# Python Isabelle Symbols Module
+
+This directory contains Python functionality for translation between Isabelle's
+ascii representations (e.g. `\<forall>`) and its unicode representations (e.g.
+`∀`). You need to provide it with a copy of Isabelle's internal "symbols" file
+that it uses to form translation mappings.
+
+Example usage:
+
+```python
+import isasymbols
+
+t = isasymbols.make_translator('/path/to/symbols')
+print t.encode('\\<lbrakk>A; B\\<rbrakk> \\<Longrightarrow> A')
+```
+
+For anything more complicated, please consult the source.
diff --git a/misc/pysymbols/isasymbols/__init__.py b/misc/pysymbols/isasymbols/__init__.py
new file mode 100644
index 000000000..e068ced9c
--- /dev/null
+++ b/misc/pysymbols/isasymbols/__init__.py
@@ -0,0 +1,15 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+#
+# Copyright 2015, NICTA
+#
+# This software may be distributed and modified according to the terms of
+# the BSD 2-Clause license. Note that NO WARRANTY is provided.
+# See "LICENSE_BSD2.txt" for details.
+#
+# @TAG(NICTA_BSD)
+#
+
+from .isasymbols import IsaSymbolsException, Symbol, make_translator, \
+    Translator
diff --git a/misc/pysymbols/isasymbols/isasymbols.py b/misc/pysymbols/isasymbols/isasymbols.py
new file mode 100644
index 000000000..0f31ac74a
--- /dev/null
+++ b/misc/pysymbols/isasymbols/isasymbols.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+#
+# Copyright 2015, NICTA
+#
+# This software may be distributed and modified according to the terms of
+# the BSD 2-Clause license. Note that NO WARRANTY is provided.
+# See "LICENSE_BSD2.txt" for details.
+#
+# @TAG(NICTA_BSD)
+#
+
+import codecs, collections, numbers, types
+
+class IsaSymbolsException(Exception):
+    pass
+
+class Symbol(object):
+    def __init__(self, ascii_text, code_point, group=None, font=None,
+            abbreviations=None):
+        assert isinstance(ascii_text, basestring)
+        assert isinstance(code_point, numbers.Number)
+        assert isinstance(group, (basestring, types.NoneType))
+        assert isinstance(font, (basestring, types.NoneType))
+        assert isinstance(abbreviations, (collections.Iterable,
+            types.NoneType))
+
+        self.ascii_text = ascii_text
+        self.code_point = code_point
+        self.group = group
+        self.font = font
+        self.abbreviations = abbreviations or []
+
+class Translator(object):
+    def __init__(self, symbols_text):
+        assert isinstance(symbols_text, basestring)
+
+        self.symbols = []
+
+        for number, line in enumerate(x.strip() for x in
+                symbols_text.split('\n')):
+
+            if line.startswith('#'):
+                # Comment
+                continue
+
+            if line == '':
+                # Blank line
+                continue
+
+            items = line.split()
+            if len(items) < 3 or len(items) % 2 == 0:
+                raise IsaSymbolsException('%d: unexpected line format' %
+                    number)
+
+            fields = {'ascii_text':items[0]}
+
+            for k, v in zip(*[iter(items[1:])] * 2):
+
+                if k == 'code:':
+                    try:
+                        code = int(v, 16)
+                    except ValueError:
+                        raise IsaSymbolsException('%d: invalid code field' %
+                            number)
+                    fields['code_point'] = code
+
+                elif k == 'group:':
+                    fields['group'] = v
+
+                elif k == 'font:':
+                    fields['font'] = v
+
+                elif k == 'abbrev:':
+                    if 'abbreviations' not in fields:
+                        fields['abbreviations'] = []
+                    fields['abbreviations'].append(v)
+
+                else:
+                    raise IsaSymbolsException('%d: unexpected field %s' %
+                        (number, k))
+
+            self.symbols.append(Symbol(**fields))
+
+    def encode(self, data):
+        for symbol in self.symbols:
+            data = data.replace(symbol.ascii_text, unichr(symbol.code_point))
+        return data
+
+    def decode(self, data):
+        for symbol in self.symbols:
+            data = data.replace(unichr(symbol.code_point), symbol.ascii_text)
+        return data
+
+def make_translator(symbols_file):
+    assert isinstance(symbols_file, basestring)
+    with codecs.open(symbols_file, 'r', 'utf-8') as f:
+        return Translator(f.read())