su4sml/lib/fxp/src/Unicode/encoding.sml

90 lines
2.6 KiB
Standard ML

signature Encoding =
sig
datatype Encoding =
NOENC | ASCII | EBCDIC | LATIN1
| UCS4B | UCS4L | UCS4SB | UCS4SL
| UCS2B | UCS2L | UTF16B | UTF16L
| UTF8
val UCS2 : Encoding
val UCS4 : Encoding
val UTF16 : Encoding
val encodingName : Encoding -> string
val isEncoding : string -> Encoding
val switchEncoding : Encoding * Encoding -> Encoding
end
structure Encoding : Encoding =
struct
open StringDict
datatype Encoding =
NOENC | ASCII | EBCDIC | LATIN1
| UCS4B | UCS4L | UCS4SB | UCS4SL
| UCS2B | UCS2L | UTF16B | UTF16L
| UTF8
val UCS2 = UCS2B
val UCS4 = UCS4B
val UTF16 = UTF16B
fun encodingName enc =
case enc
of NOENC => "NONE"
| ASCII => "ASCII"
| EBCDIC => "EBCDIC"
| LATIN1 => "ISO-8859-1"
| UCS2B => "UCS-2"
| UCS2L => "UCS-2"
| UCS4B => "UCS-4"
| UCS4L => "UCS-4"
| UCS4SB => "UCS-4"
| UCS4SL => "UCS-4"
| UTF8 => "UTF-8"
| UTF16B => "UTF-16"
| UTF16L => "UTF-16"
val encDict = makeDict("encoding",6,NOENC)
val encAliases =
[(ASCII,["ANSI_X3.4-1968","ANSI_X3.4-1986","ASCII","US-ASCII","US",
"ISO646-US","ISO-IR-6","ISO_646.IRV:1991","IBM367","CP367"]),
(EBCDIC,["EBCDIC"]),
(LATIN1,["ISO_8859-1:1987","ISO-8859-1","ISO_8859-1",
"ISO-IR-100","CP819","IBM819","L1","LATIN1"]),
(UCS2,["UCS-2","ISO-10646-UCS-2"]),
(UCS4,["UCS-4","ISO-10646-UCS-4"]),
(UTF16,["UTF-16"]),
(UTF8,["UTF-8"])
]
val _ = app (fn (x,ys) => app (fn y => setByKey(encDict,y,x)) ys) encAliases
fun isEncoding name = getByKey(encDict,name)
fun compatAscii new =
case new
of ASCII => new
| LATIN1 => new
| UTF8 => new
| _ => NOENC
fun compatUcs4 (old,new) =
if new=UCS4 then old else NOENC
fun switchEncoding(old,new) =
case old
of NOENC => NOENC
| ASCII => compatAscii new
| EBCDIC => if new=EBCDIC then new else NOENC
| LATIN1 => compatAscii new
| UCS4B => compatUcs4(old,new)
| UCS4L => compatUcs4(old,new)
| UCS4SB => compatUcs4(old,new)
| UCS4SL => compatUcs4(old,new)
| UTF16B => if new=UTF16 then old else if new=UCS2 then UCS2B else NOENC
| UTF16L => if new=UTF16 then old else if new=UCS2 then UCS2L else NOENC
| UCS2B => if new=UCS2 then old else if new=UTF16 then UTF16B else NOENC
| UCS2L => if new=UCS2 then old else if new=UTF16 then UTF16L else NOENC
| UTF8 => compatAscii new
end