001/* $Id: NodeCreateRule.java 992060 2010-09-02 19:09:47Z simonetripodi $
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one or more
004 * contributor license agreements.  See the NOTICE file distributed with
005 * this work for additional information regarding copyright ownership.
006 * The ASF licenses this file to You under the Apache License, Version 2.0
007 * (the "License"); you may not use this file except in compliance with
008 * the License.  You may obtain a copy of the License at
009 *
010 *      http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019
020package org.apache.commons.digester;
021
022
023import javax.xml.parsers.DocumentBuilder;
024import javax.xml.parsers.DocumentBuilderFactory;
025import javax.xml.parsers.ParserConfigurationException;
026
027import org.w3c.dom.Attr;
028import org.w3c.dom.DOMException;
029import org.w3c.dom.Document;
030import org.w3c.dom.Element;
031import org.w3c.dom.Node;
032import org.xml.sax.Attributes;
033import org.xml.sax.ContentHandler;
034import org.xml.sax.SAXException;
035import org.xml.sax.helpers.DefaultHandler;
036
037
038/**
039 * A rule implementation that creates a DOM
040 * {@link org.w3c.dom.Node Node} containing the XML at the element that matched
041 * the rule. Two concrete types of nodes can be created by this rule:
042 * <ul>
043 *   <li>the default is to create an {@link org.w3c.dom.Element Element} node.
044 *   The created element will correspond to the element that matched the rule,
045 *   containing all XML content underneath that element.</li>
046 *   <li>alternatively, this rule can create nodes of type
047 *   {@link org.w3c.dom.DocumentFragment DocumentFragment}, which will contain
048 *   only the XML content under the element the rule was trigged on.</li>
049 * </ul>
050 * The created node will be normalized, meaning it will not contain text nodes 
051 * that only contain white space characters.
052 * 
053
054 * 
055 * <p>The created <code>Node</code> will be pushed on Digester's object stack
056 * when done. To use it in the context of another DOM
057 * {@link org.w3c.dom.Document Document}, it must be imported first, using the
058 * Document method
059 * {@link org.w3c.dom.Document#importNode(org.w3c.dom.Node, boolean) importNode()}.
060 * </p>
061 *
062 * <p><strong>Important Note:</strong> This is implemented by replacing the SAX
063 * {@link org.xml.sax.ContentHandler ContentHandler} in the parser used by 
064 * Digester, and resetting it when the matched element is closed. As a side 
065 * effect, rules that would match XML nodes under the element that matches 
066 * a <code>NodeCreateRule</code> will never be triggered by Digester, which 
067 * usually is the behavior one would expect.</p>
068 * 
069 * <p><strong>Note</strong> that the current implementation does not set the namespace prefixes
070 * in the exported nodes. The (usually more important) namespace URIs are set,
071 * of course.</p>
072 *
073 * @since Digester 1.4
074 */
075
076public class NodeCreateRule extends Rule {
077
078
079    // ---------------------------------------------------------- Inner Classes
080
081
082    /**
083     * The SAX content handler that does all the actual work of assembling the 
084     * DOM node tree from the SAX events.
085     */
086    private class NodeBuilder
087        extends DefaultHandler {
088
089
090        // ------------------------------------------------------- Constructors
091
092
093        /**
094         * Constructor.
095         * 
096         * <p>Stores the content handler currently used by Digester so it can 
097         * be reset when done, and initializes the DOM objects needed to 
098         * build the node.</p>
099         * 
100         * @param doc the document to use to create nodes
101         * @param root the root node
102         * @throws ParserConfigurationException if the DocumentBuilderFactory 
103         *   could not be instantiated
104         * @throws SAXException if the XMLReader could not be instantiated by 
105         *   Digester (should not happen)
106         */
107        public NodeBuilder(Document doc, Node root)
108            throws ParserConfigurationException, SAXException {
109
110            this.doc = doc;
111            this.root = root;
112            this.top = root;
113            
114            oldContentHandler = digester.getCustomContentHandler();
115
116        }
117
118
119        // ------------------------------------------------- Instance Variables
120
121
122        /**
123         * The content handler used by Digester before it was set to this 
124         * content handler.
125         */
126        protected ContentHandler oldContentHandler = null;
127
128
129        /**
130         * Depth of the current node, relative to the element where the content
131         * handler was put into action.
132         */
133        protected int depth = 0;
134
135
136        /**
137         * A DOM Document used to create the various Node instances.
138         */
139        protected Document doc = null;
140
141
142        /**
143         * The DOM node that will be pushed on Digester's stack.
144         */
145        protected Node root = null;
146
147
148        /**
149         * The current top DOM mode.
150         */
151        protected Node top = null;
152
153        /**
154         * The text content of the current top DOM node.
155         */
156        protected StringBuffer topText = new StringBuffer();
157
158
159        // --------------------------------------------- Helper Methods
160
161        /**
162         * Appends a {@link org.w3c.dom.Text Text} node to the current node
163         * if the content reported by the parser is not purely whitespace.
164         */
165        private void addTextIfPresent() throws SAXException {
166            if (topText.length() > 0) {
167                String str = topText.toString();
168                topText.setLength(0);
169
170                if (str.trim().length() > 0) {
171                    // The contained text is not *pure* whitespace, so create
172                    // a text node to hold it. Note that the "untrimmed" text
173                    // is stored in the node.
174                    try {
175                        top.appendChild(doc.createTextNode(str));
176                    } catch (DOMException e) {
177                        throw new SAXException(e.getMessage());
178                    }
179                }
180            }
181        }
182
183        // --------------------------------------------- ContentHandler Methods
184
185
186        /**
187         * Handle notification about text embedded within the current node.
188         * <p>
189         * An xml parser calls this when text is found. We need to ensure that this
190         * text gets attached to the new Node we are creating - except in the case
191         * where the only text in the node is whitespace.
192         * <p>
193         * There is a catch, however. According to the sax specification, a parser
194         * does not need to pass all of the text content of a node in one go; it can
195         * make multiple calls passing part of the data on each call. In particular,
196         * when the body of an element includes xml entity-references, at least some
197         * parsers make a separate call to this method to pass just the entity content.
198         * <p>
199         * In this method, we therefore just append the provided text to a
200         * "current text" buffer. When the element end is found, or a child element
201         * is found then we can check whether we have all-whitespace. See method
202         * addTextIfPresent. 
203         * 
204         * @param ch the characters from the XML document
205         * @param start the start position in the array
206         * @param length the number of characters to read from the array
207         * @throws SAXException if the DOM implementation throws an exception
208         */
209        @Override
210        public void characters(char[] ch, int start, int length)
211            throws SAXException {
212
213            topText.append(ch, start, length);
214        }
215
216
217        /**
218         * Checks whether control needs to be returned to Digester.
219         * 
220         * @param namespaceURI the namespace URI
221         * @param localName the local name
222         * @param qName the qualified (prefixed) name
223         * @throws SAXException if the DOM implementation throws an exception
224         */
225        @Override
226        public void endElement(String namespaceURI, String localName,
227                               String qName)
228            throws SAXException {
229            
230            addTextIfPresent();
231
232            try {
233                if (depth == 0) {
234                    getDigester().setCustomContentHandler(oldContentHandler);
235                    getDigester().push(root);
236                    getDigester().endElement(namespaceURI, localName, qName);
237                }
238    
239                top = top.getParentNode();
240                depth--;
241            } catch (DOMException e) {
242                throw new SAXException(e.getMessage());
243            }
244
245        }
246
247
248        /**
249         * Adds a new
250         * {@link org.w3c.dom.ProcessingInstruction ProcessingInstruction} to 
251         * the current node.
252         * 
253         * @param target the processing instruction target
254         * @param data the processing instruction data, or null if none was 
255         *   supplied
256         * @throws SAXException if the DOM implementation throws an exception
257         */
258        @Override
259        public void processingInstruction(String target, String data)
260            throws SAXException {
261            
262            try {
263                top.appendChild(doc.createProcessingInstruction(target, data));
264            } catch (DOMException e) {
265                throw new SAXException(e.getMessage());
266            }
267
268        }
269
270
271        /**
272         * Adds a new child {@link org.w3c.dom.Element Element} to the current
273         * node.
274         * 
275         * @param namespaceURI the namespace URI
276         * @param localName the local name
277         * @param qName the qualified (prefixed) name
278         * @param atts the list of attributes
279         * @throws SAXException if the DOM implementation throws an exception
280         */
281        @Override
282        public void startElement(String namespaceURI, String localName,
283                                 String qName, Attributes atts)
284            throws SAXException {
285
286            addTextIfPresent();
287
288            try {
289                Node previousTop = top;
290                if ((localName == null) || (localName.length() == 0)) { 
291                    top = doc.createElement(qName);
292                } else {
293                    top = doc.createElementNS(namespaceURI, localName);
294                }
295                for (int i = 0; i < atts.getLength(); i++) {
296                    Attr attr = null;
297                    if ((atts.getLocalName(i) == null) ||
298                        (atts.getLocalName(i).length() == 0)) {
299                        attr = doc.createAttribute(atts.getQName(i));
300                        attr.setNodeValue(atts.getValue(i));
301                        ((Element)top).setAttributeNode(attr);
302                    } else {
303                        attr = doc.createAttributeNS(atts.getURI(i),
304                                                     atts.getLocalName(i));
305                        attr.setNodeValue(atts.getValue(i));
306                        ((Element)top).setAttributeNodeNS(attr);
307                    }
308                }
309                previousTop.appendChild(top);
310                depth++;
311            } catch (DOMException e) {
312                throw new SAXException(e.getMessage());
313            }
314
315        }
316
317    }
318
319
320    // ----------------------------------------------------------- Constructors
321
322
323    /**
324     * Default constructor. Creates an instance of this rule that will create a
325     * DOM {@link org.w3c.dom.Element Element}.
326     */
327    public NodeCreateRule() throws ParserConfigurationException {
328
329        this(Node.ELEMENT_NODE);
330
331    }
332
333
334    /**
335     * Constructor. Creates an instance of this rule that will create a DOM
336     * {@link org.w3c.dom.Element Element}, but lets you specify the JAXP 
337     * <code>DocumentBuilder</code> that should be used when constructing the
338     * node tree.
339     * 
340     * @param documentBuilder the JAXP <code>DocumentBuilder</code> to use
341     */
342    public NodeCreateRule(DocumentBuilder documentBuilder) {
343
344        this(Node.ELEMENT_NODE, documentBuilder);
345
346    }
347
348
349    /**
350     * Constructor. Creates an instance of this rule that will create either a 
351     * DOM {@link org.w3c.dom.Element Element} or a DOM 
352     * {@link org.w3c.dom.DocumentFragment DocumentFragment}, depending on the
353     * value of the <code>nodeType</code> parameter.
354     * 
355     * @param nodeType the type of node to create, which can be either
356     *   {@link org.w3c.dom.Node#ELEMENT_NODE Node.ELEMENT_NODE} or 
357     *   {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE}
358     * @throws IllegalArgumentException if the node type is not supported
359     */
360    public NodeCreateRule(int nodeType) throws ParserConfigurationException {
361
362        this(nodeType,
363             DocumentBuilderFactory.newInstance().newDocumentBuilder());
364
365    }
366
367
368    /**
369     * Constructor. Creates an instance of this rule that will create either a 
370     * DOM {@link org.w3c.dom.Element Element} or a DOM 
371     * {@link org.w3c.dom.DocumentFragment DocumentFragment}, depending on the
372     * value of the <code>nodeType</code> parameter. This constructor lets you
373     * specify the JAXP <code>DocumentBuilder</code> that should be used when
374     * constructing the node tree.
375     * 
376     * @param nodeType the type of node to create, which can be either
377     *   {@link org.w3c.dom.Node#ELEMENT_NODE Node.ELEMENT_NODE} or 
378     *   {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE}
379     * @param documentBuilder the JAXP <code>DocumentBuilder</code> to use
380     * @throws IllegalArgumentException if the node type is not supported
381     */
382    public NodeCreateRule(int nodeType, DocumentBuilder documentBuilder) {
383
384        if (!((nodeType == Node.DOCUMENT_FRAGMENT_NODE) ||
385              (nodeType == Node.ELEMENT_NODE))) {
386            throw new IllegalArgumentException(
387                "Can only create nodes of type DocumentFragment and Element");
388        }
389        this.nodeType = nodeType;
390        this.documentBuilder = documentBuilder;
391
392    }
393
394
395    // ----------------------------------------------------- Instance Variables
396
397
398    /**
399     * The JAXP <code>DocumentBuilder</code> to use.
400     */
401    private DocumentBuilder documentBuilder = null;
402
403
404    /**
405     * The type of the node that should be created. Must be one of the
406     * constants defined in {@link org.w3c.dom.Node Node}, but currently only
407     * {@link org.w3c.dom.Node#ELEMENT_NODE Node.ELEMENT_NODE} and 
408     * {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE}
409     * are allowed values.
410     */
411    private int nodeType = Node.ELEMENT_NODE;
412
413
414    // ----------------------------------------------------------- Rule Methods
415
416
417    /**
418     * When this method fires, the digester is told to forward all SAX
419     * ContentHandler events to the builder object, resulting in a DOM being
420     * built instead of normal digester rule-handling occurring. When the
421     * end of the current xml element is encountered, the original content 
422     * handler is restored (expected to be NULL, allowing normal Digester
423     * operations to continue).
424     * 
425     * @param namespaceURI the namespace URI of the matching element, or an 
426     *   empty string if the parser is not namespace aware or the element has
427     *   no namespace
428     * @param name the local name if the parser is namespace aware, or just 
429     *   the element name otherwise
430     * @param attributes The attribute list of this element
431     * @throws Exception indicates a JAXP configuration problem
432     */
433    @Override
434    public void begin(String namespaceURI, String name, Attributes attributes)
435        throws Exception {
436
437        Document doc = documentBuilder.newDocument();
438        NodeBuilder builder = null;
439        if (nodeType == Node.ELEMENT_NODE) {
440            Element element = null;
441            if (getDigester().getNamespaceAware()) {
442                element =
443                    doc.createElementNS(namespaceURI, name);
444                for (int i = 0; i < attributes.getLength(); i++) {
445                    element.setAttributeNS(attributes.getURI(i),
446                                           attributes.getQName(i),
447                                           attributes.getValue(i));
448                }
449            } else {
450                element = doc.createElement(name);
451                for (int i = 0; i < attributes.getLength(); i++) {
452                    element.setAttribute(attributes.getQName(i),
453                                         attributes.getValue(i));
454                }
455            }
456            builder = new NodeBuilder(doc, element);
457        } else {
458            builder = new NodeBuilder(doc, doc.createDocumentFragment());
459        }
460        // the NodeBuilder constructor has already saved the original
461        // value of the digester's custom content handler (expected to
462        // be null, but we save it just in case). So now we just
463        // need to tell the digester to forward events to the builder.
464        getDigester().setCustomContentHandler(builder);
465    }
466
467
468    /**
469     * Pop the Node off the top of the stack.
470     */
471    @Override
472    public void end() throws Exception {
473
474        digester.pop();
475
476    }
477
478
479}