I am trying to split a large XML file (500mb) using jDOM (I know I should probably try SAX but ...) but I am getting org.jdom.IllegalAddException: The Content already has an existing parent "root" exception as shown in the code below.
sample xml and code are below. I believe all index checks and other trivial stuff is correct.
Thanks!!!
first of all sorry for the large amount of code.
<root>
<metadata><md1>...</md1><md2>...</md2><metadata>
<someOtherInfo><soi_1>...</soi_1></someOtherInfo>
<collection>
<item id="1">...</item><item id="2">...</item><item id="2">...</item>
</collection>
</root>
split() {
final String[] nodeNames = XmlUtils.getNodeNames(elementXpath); // returns {root, collection, item}
// creates tree of
//<root>
// <metadata><md1>...</md1><md2>...</md2><metadata>
// <someOtherInfo><soi_1>...</soi_1></someOtherInfo>
// <collection>
final Element originalDestination = importNodes(sourceDocument, nodeNames);
Element destination = null;
// traverses to "collection" element
Element source = sourceDocument.getRootElement();
for (int tempCount = 1; tempCount < nodeNames.length - 1; ++tempCount) {
source = source.getChild(nodeNames[tempCount]);
}
// get all "collection/item" elements
for (Object obj : source.getChildren(nodeNames[nodeNames.length - 1])) {
// makes sure that each out file has batchSize no of elements
if (groupCount % batchSize == 0) {
if (destination != null) {
// traverse and go back up to the root
Element root = destination;
while (root.getParentElement() != null) {
root = root.getParentElement();
}
// this is where I get -- org.jdom.IllegalAddException: The Content already has an existing parent "root" -- exception
final Document destDocument = new Document(destination);
// write file to disk and reset counters
} else {
// create complete clone of originalDestination so that even its parents are cloned
destination = createClone(originalDestination, nodeNames);
}
}
// add this "item" element to destination "collection" element
final Element element = (Element) obj;
destination.addContent(((Element) element.clone()));
count++;
groupCount++;
}
if (groupCount > 0) {
// write remaining "items" to file
}
}
private Element createClone(final Element source, final String[] nodeNames) {
Element destination = source;
while (destination.getParentElement() != null) {
destination = destination.getParentElement();
}
destination = (Element) destination.clone();
for (int tempCount = 1; tempCount < nodeNames.length - 1; ++tempCount) {
destination = destination.getChild(nodeNames[tempCount]);
}
return destination;
}
private Element importNodes(final Document document,
final String[] nodeNames) {
Element source = document.getRootElement();
if (!source.getName().equals(nodeNames[0])) {
return null;
}
Element destination = null;
for (int count = 0; count < (nodeNames.length - 1); count++) {
if (count > 0) {
source = source.getChild(nodeNames[count]);
}
final Element child = new Element(nodeNames[count]);
if (destination != null) {
destination.setContent(child);
}
destination = child;
// copy attributes -- don't want to clone here since this is one of the ancestors of "item"
for (Object objAttb : source.getAttributes()) {
Attribute attb = (Attribute) objAttb;
destination.setAttribute(attb.getName(), attb.getValue());
}
// this is for <metadata> and <soneInfo> elements
for (Object obj : source.getChildren()) {
final Element childToClone = (Element) obj;
if (!childToClone.getName().equals(nodeNames[count + 1])
&& (ignoreWhiteSpaceNodes ? !childToClone.getName()
.equals("#text") : true)) {
final Element clone = (Element) childToClone.clone();
destination.addContent(clone);
}
}
}
return destination;
}
You just need to detach() the element from its parent before you insert it in another document.