90 lines
2.6 KiB
Standard ML
90 lines
2.6 KiB
Standard ML
|
|
|
|
signature Encoding =
|
|
sig
|
|
datatype Encoding =
|
|
NOENC | ASCII | EBCDIC | LATIN1
|
|
| UCS4B | UCS4L | UCS4SB | UCS4SL
|
|
| UCS2B | UCS2L | UTF16B | UTF16L
|
|
| UTF8
|
|
|
|
val UCS2 : Encoding
|
|
val UCS4 : Encoding
|
|
val UTF16 : Encoding
|
|
|
|
val encodingName : Encoding -> string
|
|
val isEncoding : string -> Encoding
|
|
val switchEncoding : Encoding * Encoding -> Encoding
|
|
end
|
|
|
|
structure Encoding : Encoding =
|
|
struct
|
|
open StringDict
|
|
|
|
datatype Encoding =
|
|
NOENC | ASCII | EBCDIC | LATIN1
|
|
| UCS4B | UCS4L | UCS4SB | UCS4SL
|
|
| UCS2B | UCS2L | UTF16B | UTF16L
|
|
| UTF8
|
|
|
|
val UCS2 = UCS2B
|
|
val UCS4 = UCS4B
|
|
val UTF16 = UTF16B
|
|
|
|
fun encodingName enc =
|
|
case enc
|
|
of NOENC => "NONE"
|
|
| ASCII => "ASCII"
|
|
| EBCDIC => "EBCDIC"
|
|
| LATIN1 => "ISO-8859-1"
|
|
| UCS2B => "UCS-2"
|
|
| UCS2L => "UCS-2"
|
|
| UCS4B => "UCS-4"
|
|
| UCS4L => "UCS-4"
|
|
| UCS4SB => "UCS-4"
|
|
| UCS4SL => "UCS-4"
|
|
| UTF8 => "UTF-8"
|
|
| UTF16B => "UTF-16"
|
|
| UTF16L => "UTF-16"
|
|
|
|
val encDict = makeDict("encoding",6,NOENC)
|
|
val encAliases =
|
|
[(ASCII,["ANSI_X3.4-1968","ANSI_X3.4-1986","ASCII","US-ASCII","US",
|
|
"ISO646-US","ISO-IR-6","ISO_646.IRV:1991","IBM367","CP367"]),
|
|
(EBCDIC,["EBCDIC"]),
|
|
(LATIN1,["ISO_8859-1:1987","ISO-8859-1","ISO_8859-1",
|
|
"ISO-IR-100","CP819","IBM819","L1","LATIN1"]),
|
|
(UCS2,["UCS-2","ISO-10646-UCS-2"]),
|
|
(UCS4,["UCS-4","ISO-10646-UCS-4"]),
|
|
(UTF16,["UTF-16"]),
|
|
(UTF8,["UTF-8"])
|
|
]
|
|
val _ = app (fn (x,ys) => app (fn y => setByKey(encDict,y,x)) ys) encAliases
|
|
fun isEncoding name = getByKey(encDict,name)
|
|
|
|
fun compatAscii new =
|
|
case new
|
|
of ASCII => new
|
|
| LATIN1 => new
|
|
| UTF8 => new
|
|
| _ => NOENC
|
|
fun compatUcs4 (old,new) =
|
|
if new=UCS4 then old else NOENC
|
|
|
|
fun switchEncoding(old,new) =
|
|
case old
|
|
of NOENC => NOENC
|
|
| ASCII => compatAscii new
|
|
| EBCDIC => if new=EBCDIC then new else NOENC
|
|
| LATIN1 => compatAscii new
|
|
| UCS4B => compatUcs4(old,new)
|
|
| UCS4L => compatUcs4(old,new)
|
|
| UCS4SB => compatUcs4(old,new)
|
|
| UCS4SL => compatUcs4(old,new)
|
|
| UTF16B => if new=UTF16 then old else if new=UCS2 then UCS2B else NOENC
|
|
| UTF16L => if new=UTF16 then old else if new=UCS2 then UCS2L else NOENC
|
|
| UCS2B => if new=UCS2 then old else if new=UTF16 then UTF16B else NOENC
|
|
| UCS2L => if new=UCS2 then old else if new=UTF16 then UTF16L else NOENC
|
|
| UTF8 => compatAscii new
|
|
end
|