From ffc7b107e5bd5978295da61f64ea87b9ea3ad4d1 Mon Sep 17 00:00:00 2001 From: Matthew Fernandez Date: Tue, 6 Oct 2015 17:15:48 +1100 Subject: [PATCH] misc: Add a Python module for dealing with Isabelle symbols. --- misc/pysymbols/README.md | 27 +++++++ misc/pysymbols/isasymbols/__init__.py | 15 ++++ misc/pysymbols/isasymbols/isasymbols.py | 99 +++++++++++++++++++++++++ 3 files changed, 141 insertions(+) create mode 100644 misc/pysymbols/README.md create mode 100644 misc/pysymbols/isasymbols/__init__.py create mode 100644 misc/pysymbols/isasymbols/isasymbols.py diff --git a/misc/pysymbols/README.md b/misc/pysymbols/README.md new file mode 100644 index 000000000..e55971113 --- /dev/null +++ b/misc/pysymbols/README.md @@ -0,0 +1,27 @@ + + +# Python Isabelle Symbols Module + +This directory contains Python functionality for translation between Isabelle's +ascii representations (e.g. `\`) and its unicode representations (e.g. +`∀`). You need to provide it with a copy of Isabelle's internal "symbols" file +that it uses to form translation mappings. + +Example usage: + +```python +import isasymbols + +t = isasymbols.make_translator('/path/to/symbols') +print t.encode('\\A; B\\ \\ A') +``` + +For anything more complicated, please consult the source. diff --git a/misc/pysymbols/isasymbols/__init__.py b/misc/pysymbols/isasymbols/__init__.py new file mode 100644 index 000000000..e068ced9c --- /dev/null +++ b/misc/pysymbols/isasymbols/__init__.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# +# Copyright 2015, NICTA +# +# This software may be distributed and modified according to the terms of +# the BSD 2-Clause license. Note that NO WARRANTY is provided. +# See "LICENSE_BSD2.txt" for details. +# +# @TAG(NICTA_BSD) +# + +from .isasymbols import IsaSymbolsException, Symbol, make_translator, \ + Translator diff --git a/misc/pysymbols/isasymbols/isasymbols.py b/misc/pysymbols/isasymbols/isasymbols.py new file mode 100644 index 000000000..0f31ac74a --- /dev/null +++ b/misc/pysymbols/isasymbols/isasymbols.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# +# Copyright 2015, NICTA +# +# This software may be distributed and modified according to the terms of +# the BSD 2-Clause license. Note that NO WARRANTY is provided. +# See "LICENSE_BSD2.txt" for details. +# +# @TAG(NICTA_BSD) +# + +import codecs, collections, numbers, types + +class IsaSymbolsException(Exception): + pass + +class Symbol(object): + def __init__(self, ascii_text, code_point, group=None, font=None, + abbreviations=None): + assert isinstance(ascii_text, basestring) + assert isinstance(code_point, numbers.Number) + assert isinstance(group, (basestring, types.NoneType)) + assert isinstance(font, (basestring, types.NoneType)) + assert isinstance(abbreviations, (collections.Iterable, + types.NoneType)) + + self.ascii_text = ascii_text + self.code_point = code_point + self.group = group + self.font = font + self.abbreviations = abbreviations or [] + +class Translator(object): + def __init__(self, symbols_text): + assert isinstance(symbols_text, basestring) + + self.symbols = [] + + for number, line in enumerate(x.strip() for x in + symbols_text.split('\n')): + + if line.startswith('#'): + # Comment + continue + + if line == '': + # Blank line + continue + + items = line.split() + if len(items) < 3 or len(items) % 2 == 0: + raise IsaSymbolsException('%d: unexpected line format' % + number) + + fields = {'ascii_text':items[0]} + + for k, v in zip(*[iter(items[1:])] * 2): + + if k == 'code:': + try: + code = int(v, 16) + except ValueError: + raise IsaSymbolsException('%d: invalid code field' % + number) + fields['code_point'] = code + + elif k == 'group:': + fields['group'] = v + + elif k == 'font:': + fields['font'] = v + + elif k == 'abbrev:': + if 'abbreviations' not in fields: + fields['abbreviations'] = [] + fields['abbreviations'].append(v) + + else: + raise IsaSymbolsException('%d: unexpected field %s' % + (number, k)) + + self.symbols.append(Symbol(**fields)) + + def encode(self, data): + for symbol in self.symbols: + data = data.replace(symbol.ascii_text, unichr(symbol.code_point)) + return data + + def decode(self, data): + for symbol in self.symbols: + data = data.replace(unichr(symbol.code_point), symbol.ascii_text) + return data + +def make_translator(symbols_file): + assert isinstance(symbols_file, basestring) + with codecs.open(symbols_file, 'r', 'utf-8') as f: + return Translator(f.read())