package org.codehaus.plexus.util.xml;
/*
* Copyright The Codehaus Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.StringWriter;
import java.io.Writer;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
Character stream that handles (or at least attempts to) all the necessary Voodo to figure out the charset encoding of
the XML document written to the stream.
Author: Herve Boutemy Version: $Id$ Since: 1.4.4
/**
* Character stream that handles (or at least attempts to) all the necessary Voodo to figure out the charset encoding of
* the XML document written to the stream.
*
* @author <a href="mailto:hboutemy@codehaus.org">Herve Boutemy</a>
* @version $Id$
* @since 1.4.4
*/
public class XmlStreamWriter
extends Writer
{
private static final int BUFFER_SIZE = 4096;
private StringWriter xmlPrologWriter = new StringWriter( BUFFER_SIZE );
private OutputStream out;
private Writer writer;
private String encoding;
public XmlStreamWriter( OutputStream out )
{
this.out = out;
}
public XmlStreamWriter( File file )
throws FileNotFoundException
{
this( new FileOutputStream( file ) );
}
public String getEncoding()
{
return encoding;
}
public void close()
throws IOException
{
if ( writer == null )
{
encoding = "UTF-8";
writer = new OutputStreamWriter( out, encoding );
writer.write( xmlPrologWriter.toString() );
}
writer.close();
}
public void flush()
throws IOException
{
if ( writer != null )
{
writer.flush();
}
}
private void detectEncoding( char[] cbuf, int off, int len )
throws IOException
{
int size = len;
StringBuffer xmlProlog = xmlPrologWriter.getBuffer();
if ( xmlProlog.length() + len > BUFFER_SIZE )
{
size = BUFFER_SIZE - xmlProlog.length();
}
xmlPrologWriter.write( cbuf, off, size );
// try to determine encoding
if ( xmlProlog.length() >= 5 )
{
if ( xmlProlog.substring( 0, 5 ).equals( "<?xml" ) )
{
// try to extract encoding from XML prolog
int xmlPrologEnd = xmlProlog.indexOf( "?>" );
if ( xmlPrologEnd > 0 )
{
// ok, full XML prolog written: let's extract encoding
Matcher m = ENCODING_PATTERN.matcher( xmlProlog.substring( 0, xmlPrologEnd ) );
if ( m.find() )
{
encoding = m.group( 1 ).toUpperCase( Locale.ENGLISH );
encoding = encoding.substring( 1, encoding.length() - 1 );
}
else
{
// no encoding found in XML prolog: using default encoding
encoding = "UTF-8";
}
}
else
{
if ( xmlProlog.length() >= BUFFER_SIZE )
{
// no encoding found in first characters: using default encoding
encoding = "UTF-8";
}
}
}
else
{
// no XML prolog: using default encoding
encoding = "UTF-8";
}
if ( encoding != null )
{
// encoding has been chosen: let's do it
xmlPrologWriter = null;
writer = new OutputStreamWriter( out, encoding );
writer.write( xmlProlog.toString() );
if ( len > size )
{
writer.write( cbuf, off + size, len - size );
}
}
}
}
public void write( char[] cbuf, int off, int len )
throws IOException
{
if ( xmlPrologWriter != null )
{
detectEncoding( cbuf, off, len );
}
else
{
writer.write( cbuf, off, len );
}
}
static final Pattern ENCODING_PATTERN = XmlReader.ENCODING_PATTERN;
}