/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/* $Id: XMLWhiteSpaceHandler.java 1679646 2015-05-15 21:09:32Z adelmelle $ */

package org.apache.fop.fo;

import java.util.List;
import java.util.Stack;

import org.apache.fop.fo.flow.Block;
import org.apache.fop.fo.flow.Float;
import org.apache.fop.util.CharUtilities;

Class encapsulating the functionality for white-space-handling during refinement stage. The handleWhiteSpace() methods are called during FOTree-building and marker-cloning:
  • from FObjMixed.addChildNode()
  • from FObjMixed.endOfNode()
  • from FObjMixed.handleWhiteSpaceFor()

Each time one of the variants is called, white-space is handled for all FOText or Character nodes that were added:
  • either prior to newChild (and after the previous non-text child node)
  • or, if newChild is null, after the previous non-text child

The iteration always starts at firstTextNode, goes on until the last text-node is reached, and deals only with FOText or Character nodes.
Note: if the method is called from an inline's endOfNode(), there is too little context to decide whether trailing white-space may be removed, so the pending inline is stored in a List, together with an iterator for which the next() method returns the first in the trailing sequence of white- space characters. This List is processed again at the end of the ancestor block.
/** * Class encapsulating the functionality for white-space-handling * during refinement stage. * The <code>handleWhiteSpace()</code> methods are called during * FOTree-building and marker-cloning: * <br> * <ul> * <li> from <code>FObjMixed.addChildNode()</code></li> * <li> from <code>FObjMixed.endOfNode()</code></li> * <li> from <code>FObjMixed.handleWhiteSpaceFor()</code></li> * </ul> * <br> * Each time one of the variants is called, white-space is handled * for all <code>FOText</code> or <code>Character</code> nodes that * were added: * <br> * <ul> * <li> either prior to <code>newChild</code> (and after the previous * non-text child node)</li> * <li> or, if <code>newChild</code> is <code>null</code>, * after the previous non-text child</li> * </ul> * <br> * The iteration always starts at <code>firstTextNode</code>, * goes on until the last text-node is reached, and deals only * with <code>FOText</code> or <code>Character</code> nodes. * <br> * <em>Note</em>: if the method is called from an inline's endOfNode(), * there is too little context to decide whether trailing * white-space may be removed, so the pending inline is stored * in a List, together with an iterator for which the next() * method returns the first in the trailing sequence of white- * space characters. This List is processed again at the end * of the ancestor block. */
public class XMLWhiteSpaceHandler {
True if we are in a run of white space
/** True if we are in a run of white space */
private boolean inWhiteSpace;
True if the last char was a linefeed
/** True if the last char was a linefeed */
private boolean afterLinefeed = true;
Counter, increased every time a non-white-space is encountered
/** Counter, increased every time a non-white-space is encountered */
private int nonWhiteSpaceCount; private int linefeedTreatment; private int whiteSpaceTreatment; private int whiteSpaceCollapse; private boolean endOfBlock; private boolean nextChildIsBlockLevel; private RecursiveCharIterator charIter; private List pendingInlines; private Stack nestedBlockStack = new java.util.Stack<Block>(); private CharIterator firstWhiteSpaceInSeq;
Handle white-space for the fo that is passed in, starting at firstTextNode
Params:
  • fo – the FO for which to handle white-space
  • firstTextNode – the node at which to start
  • nextChild – the node that will be added to the list after firstTextNode
/** * Handle white-space for the fo that is passed in, starting at * firstTextNode * @param fo the FO for which to handle white-space * @param firstTextNode the node at which to start * @param nextChild the node that will be added to the list * after firstTextNode */
public void handleWhiteSpace(FObjMixed fo, FONode firstTextNode, FONode nextChild) { Block currentBlock = null; int foId = fo.getNameId(); /* set the current block */ switch (foId) { case Constants.FO_BLOCK: currentBlock = (Block) fo; if (nestedBlockStack.empty() || fo != nestedBlockStack.peek()) { if (nextChild != null) { /* if already in a block, push the current block * onto the stack of nested blocks */ nestedBlockStack.push(currentBlock); } } else { if (nextChild == null) { nestedBlockStack.pop(); } } break; case Constants.FO_RETRIEVE_MARKER: /* look for the nearest block ancestor, if any */ FONode ancestor = fo; do { ancestor = ancestor.getParent(); } while (ancestor.getNameId() != Constants.FO_BLOCK && ancestor.getNameId() != Constants.FO_STATIC_CONTENT); if (ancestor.getNameId() == Constants.FO_BLOCK) { currentBlock = (Block) ancestor; nestedBlockStack.push(currentBlock); } break; default: if (!nestedBlockStack.empty()) { currentBlock = (Block) nestedBlockStack.peek(); } } if (currentBlock != null) { linefeedTreatment = currentBlock.getLinefeedTreatment(); whiteSpaceCollapse = currentBlock.getWhitespaceCollapse(); whiteSpaceTreatment = currentBlock.getWhitespaceTreatment(); } else { linefeedTreatment = Constants.EN_TREAT_AS_SPACE; whiteSpaceCollapse = Constants.EN_TRUE; whiteSpaceTreatment = Constants.EN_IGNORE_IF_SURROUNDING_LINEFEED; } endOfBlock = (nextChild == null && fo == currentBlock); if (firstTextNode == null) { //no text means no white-space to handle; return early afterLinefeed = (fo == currentBlock && fo.firstChild == null); nonWhiteSpaceCount = 0; if (endOfBlock) { handlePendingInlines(); } return; } charIter = new RecursiveCharIterator(fo, firstTextNode); inWhiteSpace = false; if (firstTextNode.siblings != null && firstTextNode.siblings[0] != null && firstTextNode.siblings[0].getNameId() == Constants.FO_FLOAT) { inWhiteSpace = ((Float) firstTextNode.siblings[0]).getInWhiteSpace(); } if (fo == currentBlock || currentBlock == null || (foId == Constants.FO_RETRIEVE_MARKER && fo.getParent() == currentBlock)) { if (firstTextNode == fo.firstChild) { afterLinefeed = true; } else { int previousChildId = firstTextNode.siblings[0].getNameId(); afterLinefeed = (previousChildId == Constants.FO_BLOCK || previousChildId == Constants.FO_TABLE_AND_CAPTION || previousChildId == Constants.FO_TABLE || previousChildId == Constants.FO_LIST_BLOCK || previousChildId == Constants.FO_BLOCK_CONTAINER); } } if (foId == Constants.FO_WRAPPER) { FONode parent = fo.parent; int parentId = parent.getNameId(); while (parentId == Constants.FO_WRAPPER) { parent = parent.parent; parentId = parent.getNameId(); } if (parentId == Constants.FO_FLOW || parentId == Constants.FO_STATIC_CONTENT || parentId == Constants.FO_BLOCK_CONTAINER || parentId == Constants.FO_TABLE_CELL) { endOfBlock = (nextChild == null); } } if (nextChild != null) { int nextChildId = nextChild.getNameId(); nextChildIsBlockLevel = ( nextChildId == Constants.FO_BLOCK || nextChildId == Constants.FO_TABLE_AND_CAPTION || nextChildId == Constants.FO_TABLE || nextChildId == Constants.FO_LIST_BLOCK || nextChildId == Constants.FO_BLOCK_CONTAINER); } else { nextChildIsBlockLevel = false; } handleWhiteSpace(); if (fo == currentBlock && (endOfBlock || nextChildIsBlockLevel)) { handlePendingInlines(); } if (nextChild == null) { if (fo != currentBlock) { /* current FO is not a block, and is about to end */ if (nonWhiteSpaceCount > 0 && pendingInlines != null) { /* there is non-white-space text between the pending * inline(s) and the end of the non-block node; * clear list of pending inlines */ pendingInlines.clear(); } if (inWhiteSpace) { /* means there is at least one trailing space in the inline FO that is about to end */ addPendingInline(); } } else { /* end of block: clear the references and pop the * nested block stack */ if (!nestedBlockStack.empty()) { nestedBlockStack.pop(); } charIter = null; firstWhiteSpaceInSeq = null; } } if (nextChild instanceof Float) { ((Float) nextChild).setInWhiteSpace(inWhiteSpace); } }
Reset the handler, release all references
/** * Reset the handler, release all references */
protected final void reset() { if (pendingInlines != null) { pendingInlines.clear(); } nestedBlockStack.clear(); charIter = null; firstWhiteSpaceInSeq = null; }
Handle white-space for the fo that is passed in, starting at firstTextNode (when a nested FO is encountered)
Params:
  • fo – the FO for which to handle white-space
  • firstTextNode – the node at which to start
/** * Handle white-space for the fo that is passed in, starting at * firstTextNode (when a nested FO is encountered) * @param fo the FO for which to handle white-space * @param firstTextNode the node at which to start */
public void handleWhiteSpace(FObjMixed fo, FONode firstTextNode) { handleWhiteSpace(fo, firstTextNode, null); } private void handleWhiteSpace() { EOLchecker lfCheck = new EOLchecker(charIter); nonWhiteSpaceCount = 0; while (charIter.hasNext()) { if (!inWhiteSpace) { firstWhiteSpaceInSeq = charIter.mark(); } char currentChar = charIter.nextChar(); int currentCharClass = CharUtilities.classOf(currentChar); if (currentCharClass == CharUtilities.LINEFEED && linefeedTreatment == Constants.EN_TREAT_AS_SPACE) { // if we have a linefeed and it is supposed to be treated // like a space, that's what we do and continue currentChar = '\u0020'; charIter.replaceChar('\u0020'); currentCharClass = CharUtilities.classOf(currentChar); } switch (CharUtilities.classOf(currentChar)) { case CharUtilities.XMLWHITESPACE: // Some kind of whitespace character, except linefeed. if (inWhiteSpace && whiteSpaceCollapse == Constants.EN_TRUE) { // We are in a run of whitespace and should collapse // Just delete the char charIter.remove(); } else { // Do the white space treatment here boolean bIgnore = false; switch (whiteSpaceTreatment) { case Constants.EN_IGNORE: bIgnore = true; break; case Constants.EN_IGNORE_IF_BEFORE_LINEFEED: bIgnore = lfCheck.beforeLinefeed(); break; case Constants.EN_IGNORE_IF_SURROUNDING_LINEFEED: bIgnore = afterLinefeed || lfCheck.beforeLinefeed(); break; case Constants.EN_IGNORE_IF_AFTER_LINEFEED: bIgnore = afterLinefeed; break; case Constants.EN_PRESERVE: //nothing to do now, replacement takes place later break; default: //nop } // Handle ignore and replacement if (bIgnore) { charIter.remove(); } else { // this is to retain a single space between words inWhiteSpace = true; if (currentChar != '\u0020') { charIter.replaceChar('\u0020'); } } } break; case CharUtilities.LINEFEED: // A linefeed switch (linefeedTreatment) { case Constants.EN_IGNORE: charIter.remove(); break; case Constants.EN_TREAT_AS_ZERO_WIDTH_SPACE: charIter.replaceChar(CharUtilities.ZERO_WIDTH_SPACE); inWhiteSpace = false; break; case Constants.EN_PRESERVE: lfCheck.reset(); inWhiteSpace = false; afterLinefeed = true; // for following whitespace break; default: //nop } break; case CharUtilities.EOT: // A "boundary" objects such as non-character inline // or nested block object was encountered. (? can't happen) // If any whitespace run in progress, finish it. // FALL THROUGH default: // Any other character inWhiteSpace = false; afterLinefeed = false; nonWhiteSpaceCount++; lfCheck.reset(); break; } } } private void addPendingInline() { if (pendingInlines == null) { pendingInlines = new java.util.ArrayList(5); } pendingInlines.add(new PendingInline(firstWhiteSpaceInSeq)); } private void handlePendingInlines() { if (!(pendingInlines == null || pendingInlines.isEmpty())) { if (nonWhiteSpaceCount == 0) { /* handle white-space for all pending inlines*/ PendingInline p; for (int i = pendingInlines.size(); --i >= 0;) { p = (PendingInline)pendingInlines.get(i); charIter = (RecursiveCharIterator)p.firstTrailingWhiteSpace; handleWhiteSpace(); pendingInlines.remove(p); } } else { /* there is non-white-space text between the pending * inline(s) and the end of the block; * clear list of pending inlines */ pendingInlines.clear(); } } }
Helper class, used during white-space handling to look ahead, and see if the next character is a linefeed (or if there will be an equivalent effect during layout, i.e. end-of-block or the following child is a block-level FO)
/** * Helper class, used during white-space handling to look ahead, and * see if the next character is a linefeed (or if there will be * an equivalent effect during layout, i.e. end-of-block or * the following child is a block-level FO) */
private class EOLchecker { private boolean nextIsEOL; private RecursiveCharIterator charIter; EOLchecker(CharIterator charIter) { this.charIter = (RecursiveCharIterator) charIter; } boolean beforeLinefeed() { if (!nextIsEOL) { CharIterator lfIter = charIter.mark(); while (lfIter.hasNext()) { int charClass = CharUtilities.classOf(lfIter.nextChar()); if (charClass == CharUtilities.LINEFEED) { if (linefeedTreatment == Constants.EN_PRESERVE) { nextIsEOL = true; return nextIsEOL; } } else if (charClass != CharUtilities.XMLWHITESPACE) { return nextIsEOL; } } // No more characters == end of text run // means EOL if there either is a nested block to be added, // or if this is the last text node in the current block nextIsEOL = nextChildIsBlockLevel || endOfBlock; } return nextIsEOL; } void reset() { nextIsEOL = false; } }
Helper class to store unfinished inline nodes together with an iterator that starts at the first white-space character in the sequence of trailing white-space
/** * Helper class to store unfinished inline nodes together * with an iterator that starts at the first white-space * character in the sequence of trailing white-space */
private class PendingInline { protected CharIterator firstTrailingWhiteSpace; PendingInline(CharIterator firstTrailingWhiteSpace) { this.firstTrailingWhiteSpace = firstTrailingWhiteSpace; } } }