229 lines
7.2 KiB
Python
229 lines
7.2 KiB
Python
#
|
|
# Copyright 2014, NICTA
|
|
#
|
|
# This software may be distributed and modified according to the terms of
|
|
# the BSD 2-Clause license. Note that NO WARRANTY is provided.
|
|
# See "LICENSE_BSD2.txt" for details.
|
|
#
|
|
# @TAG(NICTA_BSD)
|
|
#
|
|
|
|
from __future__ import print_function
|
|
|
|
|
|
class BracedString:
|
|
"""A string split into components based on delimiters (usually braces).
|
|
|
|
When l occurs in the string, create a new component whose contents are
|
|
the rest of the string until the matching r.
|
|
|
|
When l = ( and r = ), this has the approximate behavior of splitting
|
|
the string into the components of a Haskell function application,
|
|
where each individual component, if not containing the delimiters, can
|
|
be split on white space to determine the arguments of the function.
|
|
|
|
This behaves exactly like a str, except for split, map, and
|
|
discard_enclosing_braces.
|
|
|
|
Invariant: a component either has no delimiters, or is surrounded by
|
|
delimiters.
|
|
"""
|
|
|
|
def __init__(self, s, l, r, bits=None):
|
|
if bits is None:
|
|
bits = self._get_bits(s, l, r)
|
|
self.bits = bits
|
|
self.s = s
|
|
self.l = l
|
|
self.r = r
|
|
|
|
def _get_bits(self, s, l, r):
|
|
nesting_depth = 0
|
|
bits = ['']
|
|
for c in s:
|
|
if c == l:
|
|
if nesting_depth == 0:
|
|
if bits[-1]:
|
|
bits.append('')
|
|
nesting_depth = nesting_depth + 1
|
|
bits[-1] = bits[-1] + c
|
|
if c == r:
|
|
nesting_depth = nesting_depth - 1
|
|
if nesting_depth == 0:
|
|
if bits[-1]:
|
|
bits.append('')
|
|
if not bits[-1]:
|
|
bits.pop(-1)
|
|
return bits
|
|
|
|
def __str__(self):
|
|
return self.s
|
|
|
|
def __repr__(self):
|
|
check = BracedString(self.s, self.l, self.r)
|
|
if check.bits == self.bits:
|
|
return 'B%s%s: %r' % (self.l, self.r, self.s)
|
|
else:
|
|
return 'Broken Braced: %r, %r, %r' % (self.s, self.bits,
|
|
check.bits)
|
|
|
|
def __add__(self, other):
|
|
if isinstance(other, BracedString):
|
|
if self.bits[-1].startswith(self.l):
|
|
bits = self.bits + other.bits
|
|
elif other.bits[0].startswith(self.l):
|
|
bits = self.bits + other.bits
|
|
else:
|
|
bits = self.bits[:-1] + \
|
|
[self.bits[-1] + other.bits[0]] + \
|
|
other.bits[1:]
|
|
return BracedString(self.s + other.s, self.l, self.r, bits)
|
|
|
|
return BracedString(self.s + other, self.l, self.r)
|
|
|
|
def __eq__(self, other):
|
|
return other == self.s
|
|
|
|
def __ne__(self, other):
|
|
return other != self.s
|
|
|
|
def __iter__(self):
|
|
return iter(self.s)
|
|
|
|
def __getitem__(self, n):
|
|
return self.s[n]
|
|
|
|
def __getslice__(self, i, j):
|
|
return self.s.__getslice__(i, j)
|
|
|
|
def __len__(self):
|
|
return len(self.s)
|
|
|
|
def split(self, str=None, num=-2, braces=False):
|
|
"""Split into multiple BracedStrings, using `str` as a delimiter, and
|
|
into a maximum of `num` components.
|
|
|
|
If `braces` is true (defaults to false), braces will also count as a
|
|
delimiter, and each braced component will become a single element of
|
|
the output.
|
|
|
|
Otherwise, each braced pair will not be split into a separate
|
|
component, but splitting will ignore the contents inside the
|
|
delimiter.
|
|
"""
|
|
if braces:
|
|
bits = []
|
|
bbs = []
|
|
for bit in self.bits:
|
|
d = num + 1 - len(bits)
|
|
if d == 0:
|
|
bits[-1] = bits[-1] + bit
|
|
bbs[-1].append(bit)
|
|
elif bit.startswith(self.l):
|
|
bits.append(bit)
|
|
bbs.append([bit])
|
|
else:
|
|
if num == -2:
|
|
n_bits = bit.split(str)
|
|
else:
|
|
n_bits = bit.split(str, d)
|
|
bits.extend(n_bits)
|
|
bbs.extend([[b] for b in n_bits])
|
|
else:
|
|
# s is the original string, but with delimited substrings replaced
|
|
# with just the delimiters
|
|
s = ''
|
|
internals = []
|
|
for bit in self.bits:
|
|
if bit.startswith(self.l):
|
|
s = s + self.l + self.r
|
|
internals.append(bit)
|
|
else:
|
|
s = s + bit
|
|
# split on the thing, secure in the knowledge that it won't mess
|
|
# up things inside delimiters.
|
|
bits1 = s.split(str, num)
|
|
bits = []
|
|
bbs = []
|
|
for bit in bits1:
|
|
# Invariant: if self.{l,r} not in bit, bit remains whole.
|
|
|
|
# split on delimiters, which we inserted earlier
|
|
bits2 = bit.split(self.l + self.r)
|
|
meshed = [bits2.pop(0)]
|
|
while bits2:
|
|
# If this list has more elements, then we need to insert,
|
|
# where each delimiter pair was, the corresponding
|
|
# contents which we stored in `internals`.
|
|
meshed.append(internals.pop(0))
|
|
# then we add in the next component of the string, which
|
|
# was after that delimiter pair.
|
|
meshed.append(bits2.pop(0))
|
|
# remove empty strings
|
|
meshed = [s for s in meshed if s != '']
|
|
bbs.append(meshed)
|
|
bits.append(''.join(meshed))
|
|
|
|
return [BracedString(bit, self.l, self.r, bbs[i])
|
|
for i, bit in enumerate(bits)]
|
|
|
|
def startswith(self, s):
|
|
return self.s.startswith(s)
|
|
|
|
def endswith(self, s):
|
|
return self.s.endswith(s)
|
|
|
|
def map(self, fn):
|
|
"""Apply a function to each component of this braced string.
|
|
|
|
For delimited components, the delimiters will not be passed to the
|
|
function.
|
|
"""
|
|
new_s = ''
|
|
new_bits = []
|
|
|
|
for bit in self.bits:
|
|
if bit.startswith(self.l):
|
|
new = fn(bit[1:-1])
|
|
new = self.l + new + self.r
|
|
new_s = new_s + new
|
|
new_bits.append(new)
|
|
else:
|
|
new_s = new_s + bit
|
|
new_bits.append(bit)
|
|
|
|
return BracedString(new_s, self.l, self.r, new_bits)
|
|
|
|
def discard_enclosing_braces(self):
|
|
"""If the string consists of one braced expression,
|
|
discard the redundant enclosing braces. Otherwise
|
|
return the string."""
|
|
if len(self.bits) > 1:
|
|
return self
|
|
|
|
[bit] = self.bits
|
|
|
|
if bit.startswith(self.l):
|
|
return BracedString(bit[1:-1], self.l, self.r)
|
|
else:
|
|
return self
|
|
|
|
|
|
def clone(str, obj):
|
|
if isinstance(obj, BracedString):
|
|
return BracedString(str.__str__(), obj.l, obj.r)
|
|
else:
|
|
return str
|
|
|
|
|
|
str = BracedString
|
|
|
|
if __name__ == '__main__':
|
|
x = BracedString('a => b => c => (d => (e, f))', '(', ')')
|
|
print(x.split('=>'))
|
|
print(x.split(','))
|
|
print(1, x.split('=>', 1))
|
|
print(2, x.split('=>', 2))
|
|
print(3, x.split('=>', 3))
|
|
print([y.split() for y in x.split('=>')])
|