package org.apache.tomcat.util.buf;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.util.Locale;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
public class CharsetCache {
static final String[] INITIAL_CHARSETS = new String[] { "iso-8859-1", "utf-8" };
static final String[] LAZY_CHARSETS = new String[] {
"037", "1006", "1025", "1026", "1046", "1047", "1089", "1097", "1098", "1112", "1122", "1123", "1124",
"1140", "1141", "1142", "1143", "1144", "1145", "1146", "1147", "1148", "1149", "1166", "1364", "1381",
"1383", "273", "277", "278", "280", "284", "285", "290", "297", "300", "33722", "420", "424", "437", "500",
"5601", "646", "737", "775", "813", "834", "838", "850", "852", "855", "856", "857", "858", "860", "861",
"862", "863", "864", "865", "866", "868", "869", "870", "871", "874", "875", "8859_13", "8859_15", "8859_2",
"8859_3", "8859_4", "8859_5", "8859_6", "8859_7", "8859_8", "8859_9", "912", "913", "914", "915", "916",
"918", "920", "921", "922", "923", "930", "933", "935", "937", "939", "942", "942c", "943", "943c", "948",
"949", "949c", "950", "964", "970", "ansi-1251", "ansi_x3.4-1968", "ansi_x3.4-1986", "arabic", "ascii",
"ascii7", "asmo-708", "big5", "big5-hkscs", "big5-hkscs", "big5-hkscs-2001", "big5-hkscs:unicode3.0",
"big5_hkscs", "big5_hkscs_2001", "big5_solaris", "big5hk", "big5hk-2001", "big5hkscs", "big5hkscs-2001",
"ccsid00858", "ccsid01140", "ccsid01141", "ccsid01142", "ccsid01143", "ccsid01144", "ccsid01145",
"ccsid01146", "ccsid01147", "ccsid01148", "ccsid01149", "cesu-8", "cesu8", "cns11643", "compound_text",
"cp-ar", "cp-gr", "cp-is", "cp00858", "cp01140", "cp01141", "cp01142", "cp01143", "cp01144", "cp01145",
"cp01146", "cp01147", "cp01148", "cp01149", "cp037", "cp1006", "cp1025", "cp1026", "cp1046", "cp1047",
"cp1089", "cp1097", "cp1098", "cp1112", "cp1122", "cp1123", "cp1124", "cp1140", "cp1141", "cp1142",
"cp1143", "cp1144", "cp1145", "cp1146", "cp1147", "cp1148", "cp1149", "cp1166", "cp1250", "cp1251",
"cp1252", "cp1253", "cp1254", "cp1255", "cp1256", "cp1257", "cp1258", "cp1364", "cp1381", "cp1383", "cp273",
"cp277", "cp278", "cp280", "cp284", "cp285", "cp290", "cp297", "cp300", "cp33722", "cp367", "cp420",
"cp424", "cp437", "cp500", "cp50220", "cp50221", "cp5346", "cp5347", "cp5348", "cp5349", "cp5350", "cp5353",
"cp737", "cp775", "cp813", "cp833", "cp834", "cp838", "cp850", "cp852", "cp855", "cp856", "cp857", "cp858",
"cp860", "cp861", "cp862", "cp863", "cp864", "cp865", "cp866", "cp868", "cp869", "cp870", "cp871", "cp874",
"cp875", "cp912", "cp913", "cp914", "cp915", "cp916", "cp918", "cp920", "cp921", "cp922", "cp923", "cp930",
"cp933", "cp935", "cp936", "cp937", "cp939", "cp942", "cp942c", "cp943", "cp943c", "cp948", "cp949",
"cp949c", "cp950", "cp964", "cp970", "cpibm284", "cpibm285", "cpibm297", "cpibm37", "cs-ebcdic-cp-ca",
"cs-ebcdic-cp-nl", "cs-ebcdic-cp-us", "cs-ebcdic-cp-wt", "csascii", "csbig5", "cscesu-8", "cseuckr",
"cseucpkdfmtjapanese", "cshalfwidthkatakana", "csibm037", "csibm278", "csibm284", "csibm285", "csibm290",
"csibm297", "csibm420", "csibm424", "csibm500", "csibm857", "csibm860", "csibm861", "csibm862", "csibm863",
"csibm864", "csibm865", "csibm866", "csibm868", "csibm869", "csibm870", "csibm871", "csiso153gost1976874",
"csiso159jisx02121990", "csiso2022cn", "csiso2022jp", "csiso2022jp2", "csiso2022kr", "csiso87jisx0208",
"csisolatin0", "csisolatin2", "csisolatin3", "csisolatin4", "csisolatin5", "csisolatin9",
"csisolatinarabic", "csisolatincyrillic", "csisolatingreek", "csisolatinhebrew", "csjisencoding", "cskoi8r",
"cspc850multilingual", "cspc862latinhebrew", "cspc8codepage437", "cspcp852", "cspcp855", "csshiftjis",
"cswindows31j", "cyrillic", "default", "ebcdic-cp-ar1", "ebcdic-cp-ar2", "ebcdic-cp-bh", "ebcdic-cp-ca",
"ebcdic-cp-ch", "ebcdic-cp-fr", "ebcdic-cp-gb", "ebcdic-cp-he", "ebcdic-cp-is", "ebcdic-cp-nl",
"ebcdic-cp-roece", "ebcdic-cp-se", "ebcdic-cp-us", "ebcdic-cp-wt", "ebcdic-cp-yu", "ebcdic-de-273+euro",
"ebcdic-dk-277+euro", "ebcdic-es-284+euro", "ebcdic-fi-278+euro", "ebcdic-fr-277+euro", "ebcdic-gb",
"ebcdic-gb-285+euro", "ebcdic-international-500+euro", "ebcdic-it-280+euro", "ebcdic-jp-kana",
"ebcdic-no-277+euro", "ebcdic-s-871+euro", "ebcdic-se-278+euro", "ebcdic-sv", "ebcdic-us-037+euro",
"ecma-114", "ecma-118", "elot_928", "euc-cn", "euc-jp", "euc-jp-linux", "euc-kr", "euc-tw", "euc_cn",
"euc_jp", "euc_jp_linux", "euc_jp_solaris", "euc_kr", "euc_tw", "euccn", "eucjis", "eucjp", "eucjp-open",
"euckr", "euctw", "extended_unix_code_packed_format_for_japanese", "gb18030", "gb18030-2000", "gb2312",
"gb2312", "gb2312-1980", "gb2312-80", "gbk", "greek", "greek8", "hebrew", "ibm-037", "ibm-1006", "ibm-1025",
"ibm-1026", "ibm-1046", "ibm-1047", "ibm-1089", "ibm-1097", "ibm-1098", "ibm-1112", "ibm-1122", "ibm-1123",
"ibm-1124", "ibm-1166", "ibm-1364", "ibm-1381", "ibm-1383", "ibm-273", "ibm-277", "ibm-278", "ibm-280",
"ibm-284", "ibm-285", "ibm-290", "ibm-297", "ibm-300", "ibm-33722", "ibm-33722_vascii_vpua", "ibm-37",
"ibm-420", "ibm-424", "ibm-437", "ibm-500", "ibm-5050", "ibm-737", "ibm-775", "ibm-813", "ibm-833",
"ibm-834", "ibm-838", "ibm-850", "ibm-852", "ibm-855", "ibm-856", "ibm-857", "ibm-860", "ibm-861",
"ibm-862", "ibm-863", "ibm-864", "ibm-865", "ibm-866", "ibm-868", "ibm-869", "ibm-870", "ibm-871",
"ibm-874", "ibm-875", "ibm-912", "ibm-913", "ibm-914", "ibm-915", "ibm-916", "ibm-918", "ibm-920",
"ibm-921", "ibm-922", "ibm-923", "ibm-930", "ibm-933", "ibm-935", "ibm-937", "ibm-939", "ibm-942",
"ibm-942c", "ibm-943", "ibm-943c", "ibm-948", "ibm-949", "ibm-949c", "ibm-950", "ibm-964", "ibm-970",
"ibm-euckr", "ibm-thai", "ibm00858", "ibm01140", "ibm01141", "ibm01142", "ibm01143", "ibm01144", "ibm01145",
"ibm01146", "ibm01147", "ibm01148", "ibm01149", "ibm037", "ibm037", "ibm1006", "ibm1025", "ibm1026",
"ibm1026", "ibm1046", "ibm1047", "ibm1089", "ibm1097", "ibm1098", "ibm1112", "ibm1122", "ibm1123",
"ibm1124", "ibm1166", "ibm1364", "ibm1381", "ibm1383", "ibm273", "ibm273", "ibm277", "ibm277", "ibm278",
"ibm278", "ibm280", "ibm280", "ibm284", "ibm284", "ibm285", "ibm285", "ibm290", "ibm290", "ibm297",
"ibm297", "ibm300", "ibm33722", "ibm367", "ibm420", "ibm420", "ibm424", "ibm424", "ibm437", "ibm437",
"ibm500", "ibm500", "ibm737", "ibm775", "ibm775", "ibm813", "ibm833", "ibm834", "ibm838", "ibm850",
"ibm850", "ibm852", "ibm852", "ibm855", "ibm855", "ibm856", "ibm857", "ibm857", "ibm860", "ibm860",
"ibm861", "ibm861", "ibm862", "ibm862", "ibm863", "ibm863", "ibm864", "ibm864", "ibm865", "ibm865",
"ibm866", "ibm866", "ibm868", "ibm868", "ibm869", "ibm869", "ibm870", "ibm870", "ibm871", "ibm871",
"ibm874", "ibm875", "ibm912", "ibm913", "ibm914", "ibm915", "ibm916", "ibm918", "ibm920", "ibm921",
"ibm922", "ibm923", "ibm930", "ibm933", "ibm935", "ibm937", "ibm939", "ibm942", "ibm942c", "ibm943",
"ibm943c", "ibm948", "ibm949", "ibm949c", "ibm950", "ibm964", "ibm970", "iscii", "iscii91",
"iso-10646-ucs-2", "iso-2022-cn", "iso-2022-cn-cns", "iso-2022-cn-gb", "iso-2022-jp", "iso-2022-jp-2",
"iso-2022-kr", "iso-8859-11", "iso-8859-13", "iso-8859-15", "iso-8859-15", "iso-8859-2", "iso-8859-3",
"iso-8859-4", "iso-8859-5", "iso-8859-6", "iso-8859-7", "iso-8859-8", "iso-8859-9", "iso-ir-101",
"iso-ir-109", "iso-ir-110", "iso-ir-126", "iso-ir-127", "iso-ir-138", "iso-ir-144", "iso-ir-148",
"iso-ir-153", "iso-ir-159", "iso-ir-6", "iso-ir-87", "iso2022cn", "iso2022cn_cns", "iso2022cn_gb",
"iso2022jp", "iso2022jp2", "iso2022kr", "iso646-us", "iso8859-13", "iso8859-15", "iso8859-2", "iso8859-3",
"iso8859-4", "iso8859-5", "iso8859-6", "iso8859-7", "iso8859-8", "iso8859-9", "iso8859_11", "iso8859_13",
"iso8859_15", "iso8859_15_fdis", "iso8859_2", "iso8859_3", "iso8859_4", "iso8859_5", "iso8859_6",
"iso8859_7", "iso8859_8", "iso8859_9", "iso_646.irv:1983", "iso_646.irv:1991", "iso_8859-13", "iso_8859-15",
"iso_8859-2", "iso_8859-2:1987", "iso_8859-3", "iso_8859-3:1988", "iso_8859-4", "iso_8859-4:1988",
"iso_8859-5", "iso_8859-5:1988", "iso_8859-6", "iso_8859-6:1987", "iso_8859-7", "iso_8859-7:1987",
"iso_8859-8", "iso_8859-8:1988", "iso_8859-9", "iso_8859-9:1989", "jis", "jis0201", "jis0208", "jis0212",
"jis_c6226-1983", "jis_encoding", "jis_x0201", "jis_x0201", "jis_x0208-1983", "jis_x0212-1990",
"jis_x0212-1990", "jisautodetect", "johab", "koi8", "koi8-r", "koi8-u", "koi8_r", "koi8_u",
"ks_c_5601-1987", "ksc5601", "ksc5601-1987", "ksc5601-1992", "ksc5601_1987", "ksc5601_1992", "ksc_5601",
"l2", "l3", "l4", "l5", "l9", "latin0", "latin2", "latin3", "latin4", "latin5", "latin9", "macarabic",
"maccentraleurope", "maccroatian", "maccyrillic", "macdingbat", "macgreek", "machebrew", "maciceland",
"macroman", "macromania", "macsymbol", "macthai", "macturkish", "macukraine", "ms-874", "ms1361", "ms50220",
"ms50221", "ms874", "ms932", "ms936", "ms949", "ms950", "ms950_hkscs", "ms950_hkscs_xp", "ms_936", "ms_949",
"ms_kanji", "pc-multilingual-850+euro", "pck", "shift-jis", "shift_jis", "shift_jis", "sjis",
"st_sev_358-88", "sun_eu_greek", "tis-620", "tis620", "tis620.2533", "unicode", "unicodebig",
"unicodebigunmarked", "unicodelittle", "unicodelittleunmarked", "us", "us-ascii", "utf-16", "utf-16be",
"utf-16le", "utf-32", "utf-32be", "utf-32be-bom", "utf-32le", "utf-32le-bom", "utf16", "utf32", "utf_16",
"utf_16be", "utf_16le", "utf_32", "utf_32be", "utf_32be_bom", "utf_32le", "utf_32le_bom", "windows-1250",
"windows-1251", "windows-1252", "windows-1253", "windows-1254", "windows-1255", "windows-1256",
"windows-1257", "windows-1258", "windows-31j", "windows-437", "windows-874", "windows-932", "windows-936",
"windows-949", "windows-950", "windows-iso2022jp", "windows949", "x-big5-hkscs-2001", "x-big5-solaris",
"x-compound-text", "x-compound_text", "x-euc-cn", "x-euc-jp", "x-euc-jp-linux", "x-euc-tw", "x-eucjp",
"x-eucjp-open", "x-ibm1006", "x-ibm1025", "x-ibm1046", "x-ibm1097", "x-ibm1098", "x-ibm1112", "x-ibm1122",
"x-ibm1123", "x-ibm1124", "x-ibm1166", "x-ibm1364", "x-ibm1381", "x-ibm1383", "x-ibm300", "x-ibm33722",
"x-ibm737", "x-ibm833", "x-ibm834", "x-ibm856", "x-ibm874", "x-ibm875", "x-ibm921", "x-ibm922", "x-ibm930",
"x-ibm933", "x-ibm935", "x-ibm937", "x-ibm939", "x-ibm942", "x-ibm942c", "x-ibm943", "x-ibm943c",
"x-ibm948", "x-ibm949", "x-ibm949c", "x-ibm950", "x-ibm964", "x-ibm970", "x-iscii91", "x-iso-2022-cn-cns",
"x-iso-2022-cn-gb", "x-iso-8859-11", "x-jis0208", "x-jisautodetect", "x-johab", "x-macarabic",
"x-maccentraleurope", "x-maccroatian", "x-maccyrillic", "x-macdingbat", "x-macgreek", "x-machebrew",
"x-maciceland", "x-macroman", "x-macromania", "x-macsymbol", "x-macthai", "x-macturkish", "x-macukraine",
"x-ms932_0213", "x-ms950-hkscs", "x-ms950-hkscs-xp", "x-mswin-936", "x-pck", "x-sjis", "x-sjis_0213",
"x-utf-16be", "x-utf-16le", "x-utf-16le-bom", "x-utf-32be", "x-utf-32be-bom", "x-utf-32le",
"x-utf-32le-bom", "x-windows-50220", "x-windows-50221", "x-windows-874", "x-windows-949", "x-windows-950",
"x-windows-iso2022jp", "x0201", "x0208", "x0212", "x11-compound_text",
"csiso885915", "csiso885916", "iso-8859-16", "iso-ir-226", "iso_8859-16", "iso_8859-16:2001", "l10",
"latin-9", "latin10", "ms932-0213", "ms932:2004", "ms932_0213", "shift_jis:2004", "shift_jis_0213:2004",
"sjis-0213", "sjis:2004", "sjis_0213", "sjis_0213:2004", "windows-932-0213", "windows-932:2004",
"932", "cp932", "cpeuccn", "ibm-1252", "ibm-932", "ibm-euccn", "ibm1252", "ibm932", "ibmeuccn", "x-ibm932",
"1129", "cp1129", "ibm-1129", "ibm-euctw", "ibm1129", "x-ibm1129",
"29626c", "833", "cp29626c", "ibm-1140", "ibm-1141", "ibm-1142", "ibm-1143", "ibm-1144", "ibm-1145",
"ibm-1146", "ibm-1147", "ibm-1148", "ibm-1149", "ibm-29626c", "ibm-858", "ibm-eucjp", "ibm1140", "ibm1141",
"ibm1142", "ibm1143", "ibm1144", "ibm1145", "ibm1146", "ibm1147", "ibm1148", "ibm1149", "ibm29626c",
"ibm858", "x-ibm29626c"
};
private static final Charset DUMMY_CHARSET = new DummyCharset("Dummy", null);
private ConcurrentMap<String,Charset> cache = new ConcurrentHashMap<>();
public CharsetCache() {
for (String charsetName : INITIAL_CHARSETS) {
Charset charset = Charset.forName(charsetName);
addToCache(charsetName, charset);
}
for (String charsetName : LAZY_CHARSETS) {
addToCache(charsetName, DUMMY_CHARSET);
}
}
private void addToCache(String name, Charset charset) {
cache.put(name, charset);
for (String alias : charset.aliases()) {
cache.put(alias.toLowerCase(Locale.ENGLISH), charset);
}
}
public Charset getCharset(String charsetName) {
String lcCharsetName = charsetName.toLowerCase(Locale.ENGLISH);
Charset result = cache.get(lcCharsetName);
if (result == DUMMY_CHARSET) {
Charset charset = Charset.forName(lcCharsetName);
if (charset == null) {
cache.remove(lcCharsetName);
result = null;
} else {
addToCache(lcCharsetName, charset);
result = charset;
}
}
return result;
}
private static class DummyCharset extends Charset {
protected DummyCharset(String canonicalName, String[] aliases) {
super(canonicalName, aliases);
}
@Override
public boolean contains(Charset cs) {
return false;
}
@Override
public CharsetDecoder newDecoder() {
return null;
}
@Override
public CharsetEncoder newEncoder() {
return null;
}
}
}