Isabelle_DOF/document-generator/converter/src/main/scala/com/logicalhacking/dof/converter/DofConverter.scala

311 lines
16 KiB
Scala

/**
* Copyright (c) 2018 The University of Sheffield. All rights reserved.
* 2018 The University of Paris-Sud. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* SPDX-License-Identifier: BSD-2-Clause
*/
package com.logicalhacking.dof.converter
import java.io.{ BufferedWriter, File, FileWriter }
import IoUtils._
import scala.util.matching.Regex
import scala.annotation.tailrec
object DofConverter {
val version = "0.0.3"
val sep=RAWTEXT("%\n")
def deMarkUpArgList(tokens: List[LaTeXToken]): List[LaTeXToken] = {
@tailrec
def deMarkUpArgListRec(acc:List[LaTeXToken], tokens: List[LaTeXToken]): List[LaTeXToken] = {
val (t,tail) = tokens match {
case CURLYOPEN :: COMMAND("""\isacharprime""") :: CURLYCLOSE :: CURLYOPEN :: COMMAND("""\isacharprime""") :: CURLYCLOSE :: tail
=> (RAWTEXT(""),tail)
case CURLYOPEN :: COMMAND("""\isachardoublequoteopen""") :: CURLYCLOSE :: tail => (RAWTEXT("""{"""),tail)
case CURLYOPEN :: COMMAND("""\isachardoublequoteclose""") :: CURLYCLOSE :: tail => (RAWTEXT("""}"""), tail)
case CURLYOPEN :: COMMAND("""\isacharunderscore""") :: CURLYCLOSE :: tail => (RAWTEXT("""_"""),tail)
case t :: tail => (t,tail)
case Nil => (RAWTEXT(""),Nil)
}
if (tokens == Nil) acc
else deMarkUpArgListRec(acc++List(t), tail)
}
deMarkUpArgListRec(Nil, tokens)
}
def deMarkUp(tokens: List[LaTeXToken]): List[LaTeXToken] = {
@tailrec
def deMarkupRec(out:List[LaTeXToken], tokens: List[LaTeXToken]): List[LaTeXToken] = {
val (t,tail) = tokens match {
case CURLYOPEN :: COMMAND("""\isacharcolon""") :: CURLYCLOSE :: tail => (RAWTEXT(""":"""),tail)
case CURLYOPEN :: COMMAND("""\isacharunderscore""") :: CURLYCLOSE :: tail => (RAWTEXT("""\_"""),tail)
case CURLYOPEN :: COMMAND("""\isadigit""") :: CURLYOPEN::n::CURLYCLOSE::CURLYCLOSE :: tail => (n,tail)
case CURLYOPEN :: COMMAND("""\isacharcomma""") :: CURLYCLOSE :: tail => (RAWTEXT(""","""),tail)
case COMMAND("""\isanewline""") :: tail => (RAWTEXT(""),tail)
case CURLYOPEN :: COMMAND("""\isachardot""") :: CURLYCLOSE :: tail => (RAWTEXT("""."""),tail)
case CURLYOPEN :: COMMAND("""\isacharsemicolon""") :: CURLYCLOSE :: tail => (RAWTEXT(""";"""),tail)
case CURLYOPEN :: COMMAND("""\isacharbackslash""") :: CURLYCLOSE :: tail => (RAWTEXT("""\"""),tail)
case CURLYOPEN :: COMMAND("""\isacharslash""") :: CURLYCLOSE :: tail => (RAWTEXT("""/"""),tail)
case CURLYOPEN :: COMMAND("""\isacharbraceleft""") :: CURLYCLOSE :: tail => (RAWTEXT("""{"""),tail)
case CURLYOPEN :: COMMAND("""\isacharbraceright""") :: CURLYCLOSE :: tail => (RAWTEXT("""}"""),tail)
case CURLYOPEN :: COMMAND("""\isacharparenleft""") :: CURLYCLOSE :: tail => (RAWTEXT("""("""),tail)
case CURLYOPEN :: COMMAND("""\isacharparenright""") :: CURLYCLOSE :: tail => (RAWTEXT(""")"""),tail)
case CURLYOPEN :: COMMAND("""\isacharequal""") :: CURLYCLOSE :: tail => (RAWTEXT("""="""),tail)
case CURLYOPEN :: COMMAND("""\isacharminus""") :: CURLYCLOSE :: tail => (RAWTEXT("""-"""),tail)
case CURLYOPEN :: COMMAND("""\isacharplus""") :: CURLYCLOSE :: tail => (RAWTEXT("""+"""),tail)
case CURLYOPEN :: COMMAND("""\isacharprime""") :: CURLYCLOSE :: tail => (RAWTEXT("""'"""),tail)
case VSPACE :: tail => (RAWTEXT(""" """),tail)
case t :: tail => (t,tail)
case Nil => (RAWTEXT(""" """),Nil)
}
if (tokens == Nil) out
else deMarkupRec(out++List(t), tail)
}
deMarkupRec(Nil, tokens)
}
def convertIsaDofCommand(cmd: String, tokens: List[LaTeXToken]): List[LaTeXToken] = {
@tailrec
def convertType(head: List[LaTeXToken], tail: List[LaTeXToken]): Tuple2[String,List[LaTeXToken]] = {
@tailrec
def split(head:List[LaTeXToken], tokens: List[LaTeXToken]):Tuple2[List[LaTeXToken], List[LaTeXToken]] = {
tokens match {
case CURLYOPEN::COMMAND("""\isacharcomma""")::CURLYCLOSE::tail => (head,tokens)
case CURLYCLOSE::COMMAND("""\isacharcomma""")::CURLYOPEN::tail => (head++(CURLYCLOSE::COMMAND("""\isacharcomma""")::CURLYOPEN::List()),tail)
case CURLYCLOSE::COMMAND("""\isacharbrackleft""")::CURLYOPEN::tail => (head++(CURLYCLOSE::COMMAND("""\isacharbrackleft""")::CURLYOPEN::List()),tail)
case BRACKETOPEN::tail => (head,BRACKETOPEN::tail)
case CURLYOPEN::COMMAND("""\isacharbrackright""")::CURLYCLOSE::tail => (head,tokens)
case t::tail => split(head++List(t), tail)
case t => (head,t)
}
}
tail match {
case CURLYOPEN::COMMAND("""\isacharcolon""")::CURLYCLOSE :: CURLYOPEN::COMMAND("""\isacharcolon""")::CURLYCLOSE :: tail => {
val (label, shead)= split(List(), head.reverse)
val (typ, stail) = split(List(), delSpace(tail))
val typstring = LaTeXLexer.toString(deMarkUpArgList(typ))
(typstring,(shead.reverse)++List(RAWTEXT("""label={"""))++(label.reverse)++List(RAWTEXT("""}, type={"""))++typ++List(RAWTEXT("""}"""))++stail)
}
case CURLYOPEN::COMMAND("""\isacharbrackright""")::CURLYCLOSE :: tail => {
val (label, shead)= split(List(), head.reverse)
("",(shead.reverse)++List(RAWTEXT("""label={"""))++(label.reverse)++List(RAWTEXT("""}"""))++List(RAWTEXT("""]"""))++delSpace(tail))
}
case t::tail => convertType(head++List(t), tail)
case Nil => {
val (label, shead)= split(List(), head.reverse)
("",(shead.reverse)++List(RAWTEXT("""label={"""))++(label.reverse)++List(RAWTEXT("""}"""))++List(RAWTEXT(""","""))++delSpace(tail))
}
}
}
@tailrec
def delSpace(tokens: List[LaTeXToken]): List[LaTeXToken] = {
tokens match {
case VSPACE :: tail => delSpace(tail)
case COMMAND("""\isanewline""")::tail => delSpace(tail)
case COMMAND("""\newline""")::tail => delSpace(tail)
case RAWTEXT(""" """)::tail => delSpace(tail)
case RAWTEXT("")::tail => delSpace(tail)
case RAWTEXT("\n")::tail => delSpace(tail)
case RAWTEXT("\t")::tail => delSpace(tail)
case VBACKSLASH::tail => delSpace(tail)
case tokens => tokens
}
}
def backSpace(tokens: List[LaTeXToken]): List[LaTeXToken] = (delSpace(tokens.reverse)).reverse
def trim(tokens: List[LaTeXToken]): List[LaTeXToken] = delSpace(backSpace(tokens))
def parseIsaDofCmd(args: List[LaTeXToken], tokens: List[LaTeXToken]): Tuple3[String,List[LaTeXToken], List[LaTeXToken]] = {
(args, tokens) match {
case (args, COMMAND("""\isamarkupfalse""") :: tail) => parseIsaDofCmd(args, tail)
case (args, CURLYOPEN :: COMMAND("""\isachardoublequoteopen""") :: CURLYCLOSE :: CURLYOPEN :: COMMAND("""\isacharbrackleft""") :: CURLYCLOSE :: tail)
=> parseIsaDofCmd(trim(args) ++ List(CURLYOPEN), tail)
case (args, CURLYOPEN :: COMMAND("""\isacharbrackright""") :: CURLYCLOSE :: CURLYOPEN :: COMMAND("""\isachardoublequoteclose""") :: CURLYCLOSE :: tail)
=> parseIsaDofCmd(trim(args) ++ List(CURLYCLOSE), delSpace(tail))
case (args, CURLYOPEN :: COMMAND("""\isacharbrackleft""") :: CURLYCLOSE :: tail) => parseIsaDofCmd(trim(args) ++List(sep) ++ List(BRACKETOPEN), tail)
case (args, CURLYOPEN :: COMMAND("""\isacharbrackright""") :: CURLYCLOSE :: tail) => {
val (typ,arglist) = convertType(List(), args)
val (_, t1, t2) = parseIsaDofCmd(deMarkUpArgList(arglist)++List(BRACKETCLOSE,sep), tail)
(typ,t1,t2)
}
case (args, CURLYOPEN :: COMMAND("""\isacharverbatimopen""") :: CURLYCLOSE ::tail) => parseIsaDofCmd(trim(args) ++ List(CURLYOPEN), delSpace(tail))
case (args, CURLYOPEN :: COMMAND("""\isacharverbatimclose""") :: CURLYCLOSE :: tail) => ("",deMarkUp(trim(args) ++ List(CURLYCLOSE)), sep::delSpace(tail))
case (args, CURLYOPEN :: COMMAND("""\isacartoucheopen""") :: CURLYCLOSE ::tail) => parseIsaDofCmd(trim(args) ++ List(CURLYOPEN), delSpace(tail))
case (args, CURLYOPEN :: COMMAND("""\isacartoucheclose""") :: CURLYCLOSE :: tail) => ("",deMarkUp(trim(args) ++ List(CURLYCLOSE)), sep::delSpace(tail))
case (args, t :: tail) => parseIsaDofCmd(args ++ List(t), tail)
case (args, Nil) => ("",deMarkUp(args), Nil)
}
}
val (pre:List[LaTeXToken], tail:List[LaTeXToken]) = {
val (typ,sectionArgs, tail) = parseIsaDofCmd(Nil, tokens)
val dofcmd = if (normalize(cmd) == normalize(typ)) normalize(cmd)
else normalize(cmd)+normalize(typ)
(sep::COMMAND("""\isaDof"""+dofcmd) :: sectionArgs,tail)
}
backSpace(pre) ++ convertLaTeXTokenStream(delSpace(tail))
}
def normalize(s:String) = ((s.split("_")).map(x => x.capitalize)).reduceLeft(_+_)
def convertLaTeXTokenStream(tokens: List[LaTeXToken]): List[LaTeXToken] = {
@tailrec
def convertLaTeXTokenStreamRec(acc: List[LaTeXToken], tokens: List[LaTeXToken]): List[LaTeXToken] = {
val (res, tail, rec) = tokens match {
case Nil => (Nil, Nil, false)
case COMMAND("""\isacommand""") :: CURLYOPEN :: RAWTEXT(cmd) :: CURLYOPEN
:: COMMAND("""\isacharasterisk""") :: CURLYCLOSE :: CURLYCLOSE :: ts => (convertIsaDofCommand(cmd, ts), Nil, false)
case COMMAND("""\isacommand""") :: CURLYOPEN :: RAWTEXT(cmd0) ::CURLYOPEN :: COMMAND("""\isacharunderscore""")::CURLYCLOSE::RAWTEXT(cmd1) :: CURLYOPEN
:: COMMAND("""\isacharasterisk""") :: CURLYCLOSE :: CURLYCLOSE :: ts => (convertIsaDofCommand(cmd0 + "_" + cmd1, ts), Nil, false)
case COMMAND("""\isacommand""") :: CURLYOPEN :: RAWTEXT(cmd0) ::CURLYOPEN :: COMMAND("""\isacharunderscore""")::CURLYCLOSE::RAWTEXT(cmd1) ::CURLYOPEN :: COMMAND("""\isacharunderscore""")::CURLYCLOSE::RAWTEXT(cmd2) :: CURLYOPEN
:: COMMAND("""\isacharasterisk""") :: CURLYCLOSE :: CURLYCLOSE :: ts => (convertIsaDofCommand(cmd0 + "_" + cmd1 + "_" + cmd2, ts), Nil, false)
case COMMAND("""\isacommand""") :: CURLYOPEN :: RAWTEXT(cmd0) ::
CURLYOPEN :: COMMAND("""\isacharunderscore""")::CURLYCLOSE::RAWTEXT(cmd1) ::
CURLYOPEN :: COMMAND("""\isacharunderscore""")::CURLYCLOSE::RAWTEXT(cmd2) ::
CURLYOPEN :: COMMAND("""\isacharunderscore""")::CURLYCLOSE::RAWTEXT(cmd3) ::
CURLYOPEN :: COMMAND("""\isacharasterisk""") :: CURLYCLOSE :: CURLYCLOSE :: ts => (convertIsaDofCommand(cmd0 + "_" + cmd1 + "_" + cmd2 + "_" + cmd3, ts), Nil, false)
case COMMAND("""\isacommand""") :: CURLYOPEN :: RAWTEXT(cmd0) ::
CURLYOPEN :: COMMAND("""\isacharunderscore""")::CURLYCLOSE::RAWTEXT(cmd1) ::
CURLYOPEN :: COMMAND("""\isacharunderscore""")::CURLYCLOSE::RAWTEXT(cmd2) ::
CURLYOPEN :: COMMAND("""\isacharunderscore""")::CURLYCLOSE::RAWTEXT(cmd3) ::
CURLYOPEN :: COMMAND("""\isacharunderscore""")::CURLYCLOSE::RAWTEXT(cmd4) ::
CURLYOPEN :: COMMAND("""\isacharasterisk""") :: CURLYCLOSE :: CURLYCLOSE :: ts => (convertIsaDofCommand(cmd0 + "_" + cmd1 + "_" + cmd2 + "_" + cmd3 + "_" + cmd4, ts), Nil, false)
case t :: ts => (t::Nil, ts, true)
}
if (! rec) acc++res
else convertLaTeXTokenStreamRec(acc++res, tail)
}
convertLaTeXTokenStreamRec(Nil, tokens)
}
def convertLaTeX(string: String): Either[LaTeXLexerError, String] = {
LaTeXLexer(string) match {
case Left(err) => Left(err)
case Right(tokens) => Right(LaTeXLexer.toString(convertLaTeXTokenStream(tokens)))
}
}
def convertFile(f: File): Option[(String, LaTeXLexerError)] = {
val texFileName = f.getAbsolutePath()
println("DOF Converter "+version+": converting " + texFileName)
f.renameTo(new File(texFileName + ".orig"))
using(io.Source.fromFile(texFileName + ".orig")) {
inputFile =>
using(new BufferedWriter(new FileWriter(new File(texFileName), true))) {
outputFile =>
outputFile.write("% This file was modified by the DOF LaTeX converter, version " + version + "\n")
val input = inputFile.getLines.reduceLeft(_ + "\n" + _)
convertLaTeX(input) match {
case Left(err) => Some((texFileName, err))
case Right(output) => {
outputFile.write(output)
None
}
}
}
}
}
def processArgs(args: List[String]): Option[List[String]] = {
def printVersion() = {
println("DOF LaTeX converter version " + version)
}
def printUsage() = {
println("Usage:")
println(" scala dof_latex_converter.jar [OPTIONS] [directory ...]")
println("")
println("Options:")
println(" --version, -v print version and exit")
println(" --help, -h print usage inforamtion and exit")
}
args match {
case Nil => Some(List[String]())
case "-v" :: Nil =>
printVersion(); None
case "--version" :: Nil =>
printVersion(); None
case "-h" :: tail =>
printUsage(); None
case "--help" :: tail =>
printUsage(); None
case file :: tail => processArgs(tail) match {
case Some(files) => Some(file :: files)
case None => None
}
case _ => printUsage(); None
}
}
def main(args: Array[String]): Unit = {
val directories = processArgs(args.toList) match {
case None =>
System.exit(1); List[String]()
case Some(Nil) => List[String](".")
case Some(l) => l
}
val emptyTexFiles = directories.map(dir => recursiveListFiles(new File(dir), new Regex("\\.tex$")).filter(_.length() == 0)).flatten
val nonEmptyTexFiles = directories.map(dir => recursiveListFiles(new File(dir), new Regex("\\.tex$")).filterNot(_.length() == 0)).flatten
if (! emptyTexFiles.isEmpty) {
println()
println("DOF LaTeX converter warning(s):")
println("=============================")
println(" Empty LATeX files found, Isabelle build most likely failed!")
emptyTexFiles.map { case (file:File) => println(" "+ file + " is empty") }
}
if (nonEmptyTexFiles.isEmpty) {
println()
println("DOF LaTeX converter error(s):")
println("=============================")
println(" No valid LaTeX files found")
System.exit(1)
}
val errors = nonEmptyTexFiles.map(file => convertFile(file)).flatten
if (!errors.isEmpty) {
println()
println("DOF LaTeX converter error(s):")
println("=============================")
errors.map { case (file: String, err: LaTeXLexerError) => println(" " + file + ": " + err) }
System.exit(1)
}
System.exit(0)
}
}