/*
* Copyright (c) 2013, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package com.oracle.objectfile.macho;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Deque;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.oracle.objectfile.BuildDependency;
import com.oracle.objectfile.LayoutDecision;
import com.oracle.objectfile.LayoutDecisionMap;
import com.oracle.objectfile.ObjectFile;
import com.oracle.objectfile.ObjectFile.Element;
import com.oracle.objectfile.io.AssemblyBuffer;
import com.oracle.objectfile.io.OutputAssembler;
import com.oracle.objectfile.macho.MachOObjectFile.LinkEditSegment64Command;
This element implements a prefix trie mapping symbol names (strings) to their definitions. It's a
nicer analogue of the ELF hash section. This implementation seems to work, but it is currently
NOT used, since we avoid "new-style" Mach-O features and instead generate only "classic" Mach-O
files.
/**
* This element implements a prefix trie mapping symbol names (strings) to their definitions. It's a
* nicer analogue of the ELF hash section. This implementation seems to work, but it is currently
* NOT used, since we avoid "new-style" Mach-O features and instead generate only "classic" Mach-O
* files.
*/
class ExportTrieElement extends MachOObjectFile.LinkEditElement {
/**
*
*/
private final MachOObjectFile owner;
ExportTrieElement(String name, MachOObjectFile owner) {
owner.super(name, owner.getLinkEditSegment());
this.owner = owner;
root = new TrieNode();
root.suffix = "";
}
@Override
public Iterable<BuildDependency> getDependencies(Map<Element, LayoutDecisionMap> decisions) {
return ObjectFile.defaultDependencies(decisions, this);
}
int totalNodesInTree;
TrieNode root;
class TrieTerminal {
byte terminalFlags;
long address;
public void write(OutputAssembler oa) {
oa.writeByte(terminalFlags);
oa.writeLEB128(address);
}
}
class TrieNode {
{
++totalNodesInTree;
}
TrieTerminal terminal;
String suffix;
List<TrieNode> children = new ArrayList<>();
TrieNode parent;
private int encodedTerminalSize() {
return (terminal == null) ? 0 : 1 + MachOObjectFile.encodedLengthLEB128(terminal.address);
}
private int worstCaseChildRecordSize(int childNum) {
// our worst-case ULEB128 is a 64-bit number,
// requiring 10 groups of 7 bits (9 * 7 is 63, one remaining)
String childSuffix = children.get(childNum).suffix;
int nullTerminatorSize = 1;
return (childSuffix == null ? 0 : childSuffix.length()) + nullTerminatorSize + 10;
}
private int worstCaseNodeRecordSize() {
/*
* It's the length bytes plus encoded terminal size plus child-count byte plus
* worst-case child records.
*/
int childRecords = 0;
for (int i = 0; i < children.size(); ++i) {
childRecords += worstCaseChildRecordSize(i);
}
return 1 + encodedTerminalSize() + 1 + childRecords;
}
void writeRoot(OutputAssembler out) {
assert this == root;
Deque<TrieNode> nodes = new ArrayDeque<>();
Deque<Integer> offsetUpperBounds = new ArrayDeque<>();
/*
* Invariant the union of the keysets of these two maps is the set of all nodes pulled
* from the queue. As we add to the queue we take from the first map. As we take from
* the queue, we move nodes into the second map.
*/
Map<TrieNode, Integer> upperBoundsByNode = new HashMap<>();
Map<TrieNode, Integer> offsetsByWrittenNode = new HashMap<>();
// also remember where we've left a space for child pointers.
Map<TrieNode, Integer> childPointerOffsets = new HashMap<>();
Map<TrieNode, Integer> childPointerGapSizes = new HashMap<>();
nodes.addLast(root);
offsetUpperBounds.addLast(0);
upperBoundsByNode.put(root, 0);
while (!nodes.isEmpty()) {
TrieNode node = nodes.removeFirst();
int ourUpperBound = offsetUpperBounds.removeFirst();
upperBoundsByNode.remove(node);
int nodeStartPos = out.pos();
assert ourUpperBound >= nodeStartPos;
out.skip(1); // placeholder for terminal size
int terminalStartPos = out.pos();
if (node.terminal != null) {
node.terminal.write(out);
}
// go back and write the terminal size
int terminalSize = out.pos() - terminalStartPos;
out.pushSeek(nodeStartPos);
out.writeByte((byte) terminalSize);
out.pop();
// store into the 'written nodes' map, to restore invariant
offsetsByWrittenNode.put(node, nodeStartPos);
/*
* For each child, write its suffix followed by its offset in the table. PROBLEM:
* how do we know what offsets we'll be writing these children at? Solution: 1.
* write nodes breadth-first -- KEEP A QUEUE. 2. when we enqueue something, compute
* a pessimistic upper bound on its offset in the table. This depends on the space
* taken by nodes earlier in the queue. So we can compute one upper bound from the
* last one, albeit drifting in accuracy.
*
* By computing an upper bound, we can be sure we leave enough space for the ULEB
* encoding of the eventual offset. Occasionally we'll leave an extra byte, because
* of the pessimistic nature of the upper bound. It takes a big drift just to cause
* one unnecessary byte of padding, so this isn't a big hit.
*
* If we go back and find that our ULEB128-encoded length doesn't fill up all the
* bytes we allocated for it, does it break things? NO, I don't think so. The ULEBs
* come at the end of a record, where an empty zero byte is harmless, because the
* next element will be reached by seeking to an explicit offset. In the case of the
* Terminal flags/addr, we do need to get their ULEB-encoded sizes right, but we
* don't have to pre-pad those; the terminal size field is a single byte which we
* patch up immediately after writing them, so we always know exactly how much space
* they take. By contrast, the problem with node records is that there might be a
* lot of intervening records of unknown size, before the referenced record, so we
* need to patch up its offset much later.
*/
// write the child count byte
assert node.children.size() <= 255;
out.writeByte((byte) node.children.size());
for (TrieNode child : node.children) {
nodes.addLast(child);
/*
* What's the upper bound on its offset? We're still to write
*
* - the current node; if queue is empty, the current is the one that matters
*
* - all preceding nodes in the queue! use the upper bound of the last one to
* compute an upper bound for this one
*/
int childUpperBound;
if (offsetUpperBounds.size() == 0) {
childUpperBound = nodeStartPos + node.worstCaseNodeRecordSize();
} else {
childUpperBound = offsetUpperBounds.peekLast() + nodes.peekLast().worstCaseNodeRecordSize();
}
offsetUpperBounds.addLast(childUpperBound);
upperBoundsByNode.put(child, childUpperBound);
}
/* Now write our current node's child records (leaving gaps for the offsets). */
for (TrieNode child : node.children) {
out.writeString(child.suffix);
int childPointerPos = out.pos();
int gapSize = MachOObjectFile.encodedLengthLEB128(upperBoundsByNode.get(child));
out.skip(gapSize);
childPointerOffsets.put(child, childPointerPos);
childPointerGapSizes.put(child, gapSize);
}
/*
* Now go back and fill in the parent->child pointer for this node -- unless it's
* the root node, when it won't have one.
*/
if (node != root) {
int pointerOffset = childPointerOffsets.get(node);
int gapSize = childPointerGapSizes.get(node);
assert MachOObjectFile.encodedLengthLEB128(nodeStartPos) <= gapSize;
out.pushSeek(pointerOffset);
out.writeLEB128(nodeStartPos);
out.pop();
childPointerOffsets.remove(node);
childPointerGapSizes.remove(node);
}
}
// assert that various maps are empty
assert childPointerOffsets.isEmpty();
assert childPointerGapSizes.isEmpty();
assert nodes.isEmpty();
assert offsetUpperBounds.isEmpty();
assert upperBoundsByNode.isEmpty();
assert offsetsByWrittenNode.size() == totalNodesInTree;
}
TrieNode() {
// make root node
suffix = null;
terminal = null;
parent = null;
}
TrieNode(TrieNode parent, TrieTerminal term) {
parent.children.add(this);
this.terminal = term;
this.parent = parent;
}
TrieNode findLongestPrefixNode(String s, int startPos) {
for (TrieNode child : children) {
assert child.suffix != null;
if (s.substring(startPos).startsWith(child.suffix)) {
/* recurse */
return child.findLongestPrefixNode(s, startPos + child.suffix.length());
}
}
// else we're it
return this;
}
String value() {
List<TrieNode> ancestors = new ArrayList<>();
TrieNode cur = this;
do {
ancestors.add(cur);
} while ((cur = cur.parent) != null);
// concatenate our ancestors in reverse order
StringBuilder sb = new StringBuilder();
for (int i = ancestors.size() - 1; i >= 0; --i) {
String ancestorSuffix = ancestors.get(i).suffix;
sb.append(ancestorSuffix == null ? "" : ancestorSuffix);
}
return sb.toString();
}
}
int commonPrefixLength(String s1, String s2) {
int i = 0;
while (s1.length() > i && s2.length() > i && s1.charAt(i) == s2.charAt(i)) {
++i;
}
return i;
}
private void addSymbol(String s, long addr) {
TrieNode longestPrefixNode = root.findLongestPrefixNode(s, 0);
// we *might* have a duplicate in the sense of one symbol being a prefix of another...
String longestPrefixValue = longestPrefixNode.value();
if (longestPrefixValue.equals(s)) {
// but the precise symbol should not exist already
assert longestPrefixNode.terminal == null;
}
// if we share some more characters with a child of the longest prefix, split it
TrieNode childWithPrefix = null;
int childPrefixLength = 0;
int childPosition = 0;
for (TrieNode child : longestPrefixNode.children) {
childPrefixLength = commonPrefixLength(child.suffix, s.substring(longestPrefixValue.length()));
if (childPrefixLength > 0) {
// assert that there's at most one such
assert childWithPrefix == null;
childWithPrefix = child;
break; // so that childPosition points to correct child pos
}
++childPosition;
}
// to be safe-ish...
if (childWithPrefix == null) {
childPosition = -1;
}
TrieNode nodeToInsertAt;
// if we got a child, split that child...
if (childWithPrefix != null) {
assert childPrefixLength > 0;
String suffixToInsert = childWithPrefix.suffix.substring(0, childPrefixLength);
String newChildSuffix = childWithPrefix.suffix.substring(childPrefixLength);
// remove before we make any changes to the child array, to keep offset valid
assert longestPrefixNode.children.indexOf(childWithPrefix) == childPosition;
assert longestPrefixNode.children.lastIndexOf(childWithPrefix) == childPosition;
longestPrefixNode.children.remove(childPosition);
assert longestPrefixNode.children.indexOf(childWithPrefix) == -1;
// constructor will add the new child to longestPrefixNode's children array
TrieNode newChild = new TrieNode(longestPrefixNode, null);
// ... but the new node has no children
assert newChild.children.size() == 0;
// ... so move the old subtree down underneath the new child
childWithPrefix.suffix = newChildSuffix;
childWithPrefix.parent = newChild;
newChild.children.add(childWithPrefix);
assert newChild.children.size() == 1;
// ... and set its suffix
newChild.suffix = suffixToInsert;
nodeToInsertAt = newChild;
} else {
// add a new child right where we are
nodeToInsertAt = longestPrefixNode;
}
// if we have a nonempty prefix beyond our insertion node,
// we create a child of that node and create the terminal
// there; otherwise, we create the terminal right at the
// node
TrieNode whereToCreateTerminal;
String stringToInsertAt = nodeToInsertAt.value();
if (s.length() > stringToInsertAt.length()) {
whereToCreateTerminal = new TrieNode(nodeToInsertAt, null);
whereToCreateTerminal.suffix = s.substring(stringToInsertAt.length());
} else {
whereToCreateTerminal = nodeToInsertAt;
}
assert whereToCreateTerminal.terminal == null;
whereToCreateTerminal.terminal = new TrieTerminal();
whereToCreateTerminal.terminal.address = addr;
whereToCreateTerminal.terminal.terminalFlags = 0;
}
@Override
public byte[] getOrDecideContent(Map<Element, LayoutDecisionMap> alreadyDecided, byte[] contentHint) {
/* We need to build a prefix tree out of our exported symbols. */
for (MachOSymtab.Entry ent : ((LinkEditSegment64Command) owner.getLinkEditSegment()).getSymtab().getSortedEntries()) {
if (ent.isExternal() && ent.isDefined()) {
// FIXME: do we really want the vaddr?
long symbolVaddr = (int) alreadyDecided.get(ent.getDefinedSection()).getDecidedValue(LayoutDecision.Kind.VADDR) + ent.getDefinedOffset();
addSymbol(ent.getNameInObject(), symbolVaddr);
}
}
OutputAssembler oa = AssemblyBuffer.createOutputAssembler(getOwner().getByteOrder());
root.writeRoot(oa);
return oa.getBlob();
}
@Override
public int getOrDecideSize(Map<Element, LayoutDecisionMap> alreadyDecided, int sizeHint) {
return ((byte[]) alreadyDecided.get(this).getDecidedValue(LayoutDecision.Kind.CONTENT)).length;
}
}