package org.jruby.runtime.encoding;
import org.jcodings.Encoding;
import org.jcodings.EncodingDB;
import org.jcodings.EncodingDB.Entry;
import org.jcodings.specific.ASCIIEncoding;
import org.jcodings.specific.ISO8859_16Encoding;
import org.jcodings.spi.ISO_8859_16;
import org.jcodings.util.CaseInsensitiveBytesHash;
import org.jcodings.util.Hash.HashEntryIterator;
import org.jruby.Ruby;
import org.jruby.RubyEncoding;
import org.jruby.javasupport.Java;
import org.jruby.platform.Platform;
import org.jruby.runtime.builtin.IRubyObject;
import org.jruby.util.ByteList;
import java.io.Console;
import java.lang.reflect.Field;
import java.nio.charset.Charset;
import java.nio.charset.UnsupportedCharsetException;
import java.util.Arrays;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jcodings.specific.USASCIIEncoding;
import org.jruby.RubyFixnum;
import org.jruby.RubyString;
import org.jruby.ext.nkf.RubyNKF;
import org.jruby.util.SafePropertyAccessor;
import org.jruby.util.io.EncodingUtils;
public final class EncodingService {
private final CaseInsensitiveBytesHash<Entry> encodings;
private final CaseInsensitiveBytesHash<Entry> aliases;
private final IRubyObject[] encodingList;
private RubyEncoding[] encodingIndex = new RubyEncoding[4];
private final Ruby runtime;
private final Encoding ascii8bit;
private final Encoding javaDefault;
private static final ByteList LOCALE_BL = ByteList.create("locale");
private static final ByteList EXTERNAL_BL = ByteList.create("external");
private static final ByteList INTERNAL_BL = ByteList.create("internal");
private static final ByteList FILESYSTEM_BL = ByteList.create("filesystem");
private static final Pattern MS_CP_PATTERN = Pattern.compile("^MS([0-9]+)$");
public EncodingService(Ruby runtime) {
this.runtime = runtime;
encodings = EncodingDB.getEncodings();
aliases = EncodingDB.getAliases();
ascii8bit = encodings.get("ASCII-8BIT".getBytes()).getEncoding();
String javaDefaultCharset = Charset.defaultCharset().name();
Entry javaDefaultEntry = findEncodingOrAliasEntry(javaDefaultCharset.getBytes());
javaDefault = javaDefaultEntry == null ? ascii8bit : javaDefaultEntry.getEncoding();
encodingList = new IRubyObject[encodings.size()];
}
public Encoding getConsoleEncoding() {
if (!Platform.IS_WINDOWS) return null;
Encoding consoleEncoding = null;
try {
Console console = System.console();
if (console != null) {
final String CONSOLE_CHARSET = "cs";
Field fcs = Console.class.getDeclaredField(CONSOLE_CHARSET);
Java.trySetAccessible(fcs);
Charset cs = (Charset) fcs.get(console);
consoleEncoding = loadEncoding(ByteList.create(cs.name()));
}
} catch (Throwable e) {
}
return consoleEncoding;
}
public Encoding getUSAsciiEncoding() {
return USASCIIEncoding.INSTANCE;
}
public Encoding getAscii8bitEncoding() {
return ascii8bit;
}
public Encoding getFileSystemEncoding() {
return SpecialEncoding.FILESYSTEM.toEncoding(runtime);
}
public CaseInsensitiveBytesHash<Entry> getEncodings() {
return encodings;
}
public CaseInsensitiveBytesHash<Entry> getAliases() {
return aliases;
}
public Entry findEncodingEntry(ByteList bytes) {
return encodings.get(bytes.getUnsafeBytes(), bytes.getBegin(), bytes.getBegin() + bytes.getRealSize());
}
public Entry findEncodingEntry(byte[] bytes) {
return encodings.get(bytes);
}
public Entry findAliasEntry(ByteList bytes) {
return aliases.get(bytes.getUnsafeBytes(), bytes.getBegin(), bytes.getBegin() + bytes.getRealSize());
}
public Entry findAliasEntry(byte[] bytes) {
return aliases.get(bytes);
}
public Entry findEncodingOrAliasEntry(ByteList bytes) {
Entry e = findEncodingEntry(bytes);
return e != null ? e : findAliasEntry(bytes);
}
public Entry findEncodingOrAliasEntry(byte[] bytes) {
Entry e = findEncodingEntry(bytes);
return e != null ? e : findAliasEntry(bytes);
}
private static ByteList defaultCharsetName;
public Encoding getLocaleEncoding() {
final Encoding consoleEncoding = getConsoleEncoding();
if (consoleEncoding != null) {
return consoleEncoding;
}
ByteList encName = defaultCharsetName;
if (encName == null) {
encName = new ByteList(Charset.defaultCharset().name().getBytes(), false);
defaultCharsetName = encName;
}
final Entry entry = findEncodingOrAliasEntry(encName);
return entry == null ? ASCIIEncoding.INSTANCE : entry.getEncoding();
}
public IRubyObject[] getEncodingList() {
return encodingList;
}
public Encoding loadEncoding(ByteList name) {
Entry entry = findEncodingOrAliasEntry(name);
if (entry == null) return null;
loadEncodingEntry(entry);
return entry.getEncoding();
}
private RubyEncoding loadEncodingEntry(final Entry entry) {
Encoding enc = entry.getEncoding();
int index = enc.getIndex();
RubyEncoding[] encodingIndex = this.encodingIndex;
if (index >= encodingIndex.length) {
encodingIndex = this.encodingIndex = Arrays.copyOf(encodingIndex, index + 4);
}
return encodingIndex[index] = (RubyEncoding) encodingList[entry.getIndex()];
}
public RubyEncoding getEncoding(Encoding enc) {
int index = enc.getIndex();
RubyEncoding rubyEncoding;
RubyEncoding[] encodingIndex = this.encodingIndex;
if (index < encodingIndex.length && (rubyEncoding = encodingIndex[index]) != null) {
return rubyEncoding;
}
Entry entry = findEncodingOrAliasEntry(enc.getName());
return loadEncodingEntry(entry);
}
public void defineEncodings() {
HashEntryIterator hei = encodings.entryIterator();
while (hei.hasNext()) {
CaseInsensitiveBytesHash.CaseInsensitiveBytesHashEntry<Entry> e =
((CaseInsensitiveBytesHash.CaseInsensitiveBytesHashEntry<Entry>)hei.next());
Entry ee = e.value;
RubyEncoding encoding = RubyEncoding.newEncoding(runtime, e.bytes, e.p, e.end, ee.isDummy());
encodingList[ee.getIndex()] = encoding;
for (String constName : EncodingUtils.encodingNames(e.bytes, e.p, e.end)) {
defineEncodingConstant(runtime, (RubyEncoding) encodingList[ee.getIndex()], constName);
}
}
}
public void defineAliases() {
HashEntryIterator i = aliases.entryIterator();
while (i.hasNext()) {
CaseInsensitiveBytesHash.CaseInsensitiveBytesHashEntry<Entry> e =
((CaseInsensitiveBytesHash.CaseInsensitiveBytesHashEntry<Entry>)i.next());
Entry entry = e.value;
for (String constName : EncodingUtils.encodingNames(e.bytes, e.p, e.end)) {
defineEncodingConstant(runtime, (RubyEncoding) encodingList[entry.getIndex()], constName);
}
}
}
private void defineEncodingConstant(Ruby runtime, RubyEncoding encoding, String constName) {
runtime.getEncoding().defineConstant(constName, encoding);
}
public IRubyObject getDefaultExternal() {
Encoding defaultEncoding = runtime.getDefaultExternalEncoding();
if (defaultEncoding == null) {
ByteList encodingName = ByteList.create("US-ASCII");
defaultEncoding = runtime.getEncodingService().loadEncoding(encodingName);
runtime.setDefaultExternalEncoding(defaultEncoding);
}
return getEncoding(defaultEncoding);
}
public IRubyObject getDefaultInternal() {
return convertEncodingToRubyEncoding(runtime.getDefaultInternalEncoding());
}
public IRubyObject convertEncodingToRubyEncoding(Encoding defaultEncoding) {
return defaultEncoding != null ? getEncoding(defaultEncoding) : runtime.getNil();
}
public IRubyObject findEncodingObject(byte[] bytes) {
Entry entry = findEncodingEntry(bytes);
Encoding enc;
if (entry != null) {
enc = entry.getEncoding();
} else {
enc = ASCIIEncoding.INSTANCE;
}
return convertEncodingToRubyEncoding(enc);
}
public Encoding getJavaDefault() {
return javaDefault;
}
public Encoding getEncodingFromObject(IRubyObject arg) {
return getEncodingFromObjectCommon(arg, true);
}
public Encoding getEncodingFromObjectNoError(IRubyObject arg) {
return getEncodingFromObjectCommon(arg, false);
}
private Encoding getEncodingFromObjectCommon(IRubyObject arg, boolean error) {
if (arg == null) return null;
if (arg instanceof RubyEncoding) {
return ((RubyEncoding) arg).getEncoding();
}
if (arg instanceof RubyFixnum) {
final int id = (int) arg.convertToInteger().getLongValue();
final String name = RubyNKF.NKFCharsetMap.get(id);
if ( name != null ) return getEncodingFromNKFName(name);
}
if ( ( arg = arg.checkStringType() ).isNil() ) {
return null;
}
if ( ! ((RubyString) arg).getEncoding().isAsciiCompatible() ) {
return null;
}
return findEncodingCommon(((RubyString) arg).getByteList(), error);
}
private Encoding getEncodingFromNKFName(final String name) {
HashEntryIterator hei = encodings.entryIterator();
while (hei.hasNext()) {
@SuppressWarnings("unchecked")
CaseInsensitiveBytesHash.CaseInsensitiveBytesHashEntry<Entry> e =
((CaseInsensitiveBytesHash.CaseInsensitiveBytesHashEntry<Entry>) hei.next());
EncodingDB.Entry entry = e.value;
String className = entry.getEncodingClass();
if ( className.equals(name) ) {
return entry.getEncoding();
}
}
return null;
}
public Encoding getEncodingFromString(String string) {
if (string == null) return null;
ByteList name = new ByteList(ByteList.plain(string), false);
checkAsciiEncodingName(name);
SpecialEncoding special = SpecialEncoding.valueOf(name);
if (special != null) {
return special.toEncoding(runtime);
}
return findEncodingWithError(name);
}
public Encoding findEncoding(IRubyObject str) {
return findEncodingCommon(str, true);
}
public Encoding findEncodingNoError(IRubyObject str) {
return findEncodingCommon(str, false);
}
public Encoding findEncodingNoError(ByteList str) {
return findEncodingCommon(str, false);
}
private Encoding findEncodingCommon(IRubyObject str, boolean error) {
ByteList name = str.convertToString().getByteList();
return findEncodingCommon(name, error);
}
private Encoding findEncodingCommon(ByteList name, boolean error) {
checkAsciiEncodingName(name);
SpecialEncoding special = SpecialEncoding.valueOf(name);
if (special != null) {
Encoding specialEncoding = special.toEncoding(runtime);
if (specialEncoding == null) specialEncoding = ASCIIEncoding.INSTANCE;
return specialEncoding;
}
if (error) return findEncodingWithError(name);
Entry e = findEncodingOrAliasEntry(name);
if (e == null) return null;
return e.getEncoding();
}
public Entry findEntry(IRubyObject str) {
ByteList name = str.convertToString().getByteList();
checkAsciiEncodingName(name);
SpecialEncoding special = SpecialEncoding.valueOf(name);
if (special != null) {
return findEntryFromEncoding(special.toEncoding(runtime));
}
return findEntryWithError(name);
}
public IRubyObject rubyEncodingFromObject(IRubyObject str) {
if (str instanceof RubyEncoding) {
return str;
}
Entry entry = findEntry(str);
if (entry == null) return runtime.getNil();
return getEncodingList()[entry.getIndex()];
}
public Charset charsetForEncoding(Encoding encoding) {
if (encoding == ASCIIEncoding.INSTANCE) {
return RubyEncoding.ISO;
}
if (encoding == ISO8859_16Encoding.INSTANCE) {
return ISO_8859_16.INSTANCE;
}
try {
return EncodingUtils.charsetForEncoding(encoding);
} catch (UnsupportedCharsetException uce) {
throw runtime.newEncodingCompatibilityError("no java.nio.charset.Charset found for encoding `" + encoding.toString() + "'");
}
}
private void checkAsciiEncodingName(ByteList name) {
if (!name.getEncoding().isAsciiCompatible()) {
throw runtime.newArgumentError("invalid name encoding (non ASCII)");
}
}
public Encoding getWindowsFilesystemEncoding(Ruby ruby) {
String encoding = SafePropertyAccessor.getProperty("file.encoding", "UTF-8");
Encoding filesystemEncoding = loadEncoding(ByteList.create(encoding));
if (filesystemEncoding == null) {
Matcher match = MS_CP_PATTERN.matcher(encoding);
if (match.find()) {
String cpEncoding = "CP" + match.group(1);
filesystemEncoding = loadEncoding(ByteList.create(cpEncoding));
}
}
if (filesystemEncoding == null) {
ruby.getWarnings().warn("unrecognized system encoding \"" + encoding + "\", using default external");
filesystemEncoding = ruby.getDefaultExternalEncoding();
}
return filesystemEncoding;
}
private enum SpecialEncoding {
LOCALE, EXTERNAL, INTERNAL, FILESYSTEM;
public static SpecialEncoding valueOf(ByteList name) {
if (name.caseInsensitiveCmp(LOCALE_BL) == 0) {
return LOCALE;
} else if (name.caseInsensitiveCmp(EXTERNAL_BL) == 0) {
return EXTERNAL;
} else if (name.caseInsensitiveCmp(INTERNAL_BL) == 0) {
return INTERNAL;
} else if (name.caseInsensitiveCmp(FILESYSTEM_BL) == 0) {
return FILESYSTEM;
}
return null;
}
public Encoding toEncoding(Ruby runtime) {
switch (this) {
case LOCALE: return runtime.getEncodingService().getLocaleEncoding();
case EXTERNAL: return runtime.getDefaultExternalEncoding();
case INTERNAL: return runtime.getDefaultInternalEncoding();
case FILESYSTEM: return runtime.getDefaultFilesystemEncoding();
default:
throw new AssertionError("invalid SpecialEncoding: " + this);
}
}
}
public Encoding findEncodingWithError(ByteList name) {
return findEntryWithError(name).getEncoding();
}
private Entry findEntryWithError(ByteList name) {
Entry e = findEncodingOrAliasEntry(name);
if (e == null) throw runtime.newArgumentError("unknown encoding name - " + name);
return e;
}
private Entry findEntryFromEncoding(Encoding e) {
if (e == null) return null;
return findEncodingEntry(e.getName());
}
@Deprecated
public Encoding getFileSystemEncoding(Ruby runtime) {
return getFileSystemEncoding();
}
}