001package biweekly.util;
002
003import java.io.BufferedInputStream;
004import java.io.File;
005import java.io.FileInputStream;
006import java.io.IOException;
007import java.io.InputStream;
008import java.io.Reader;
009import java.io.StringReader;
010import java.io.StringWriter;
011import java.io.Writer;
012import java.util.ArrayList;
013import java.util.HashMap;
014import java.util.List;
015import java.util.Map;
016
017import javax.xml.namespace.QName;
018import javax.xml.parsers.DocumentBuilder;
019import javax.xml.parsers.DocumentBuilderFactory;
020import javax.xml.parsers.ParserConfigurationException;
021import javax.xml.transform.OutputKeys;
022import javax.xml.transform.Transformer;
023import javax.xml.transform.TransformerConfigurationException;
024import javax.xml.transform.TransformerException;
025import javax.xml.transform.TransformerFactory;
026import javax.xml.transform.TransformerFactoryConfigurationError;
027import javax.xml.transform.dom.DOMSource;
028import javax.xml.transform.stream.StreamResult;
029
030import org.w3c.dom.Document;
031import org.w3c.dom.Element;
032import org.w3c.dom.Node;
033import org.w3c.dom.NodeList;
034import org.xml.sax.InputSource;
035import org.xml.sax.SAXException;
036
037/*
038 Copyright (c) 2013-2016, Michael Angstadt
039 All rights reserved.
040
041 Redistribution and use in source and binary forms, with or without
042 modification, are permitted provided that the following conditions are met: 
043
044 1. Redistributions of source code must retain the above copyright notice, this
045 list of conditions and the following disclaimer. 
046 2. Redistributions in binary form must reproduce the above copyright notice,
047 this list of conditions and the following disclaimer in the documentation
048 and/or other materials provided with the distribution. 
049
050 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
051 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
052 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
053 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
054 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
055 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
056 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
057 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
058 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
059 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
060
061 The views and conclusions contained in the software and documentation are those
062 of the authors and should not be interpreted as representing official policies, 
063 either expressed or implied, of the FreeBSD Project.
064 */
065
066/**
067 * Generic XML utility methods.
068 * @author Michael Angstadt
069 */
070public final class XmlUtils {
071        /**
072         * Creates a new XML document.
073         * @return the XML document
074         */
075        public static Document createDocument() {
076                try {
077                        DocumentBuilderFactory fact = DocumentBuilderFactory.newInstance();
078                        fact.setNamespaceAware(true);
079                        DocumentBuilder db = fact.newDocumentBuilder();
080                        return db.newDocument();
081                } catch (ParserConfigurationException e) {
082                        //will probably never be thrown because we're not doing anything fancy with the configuration
083                        throw new RuntimeException(e);
084                }
085        }
086
087        /**
088         * Parses an XML string into a DOM.
089         * @param xml the XML string
090         * @return the parsed DOM
091         * @throws SAXException if the string is not valid XML
092         */
093        public static Document toDocument(String xml) throws SAXException {
094                try {
095                        return toDocument(new StringReader(xml));
096                } catch (IOException e) {
097                        //reading from string
098                        throw new RuntimeException(e);
099                }
100        }
101
102        /**
103         * Parses an XML document from a file.
104         * @param file the file
105         * @return the parsed DOM
106         * @throws SAXException if the XML is not valid
107         * @throws IOException if there is a problem reading from the file
108         */
109        public static Document toDocument(File file) throws SAXException, IOException {
110                InputStream in = new BufferedInputStream(new FileInputStream(file));
111                try {
112                        return XmlUtils.toDocument(in);
113                } finally {
114                        in.close();
115                }
116        }
117
118        /**
119         * Parses an XML document from an input stream.
120         * @param in the input stream
121         * @return the parsed DOM
122         * @throws SAXException if the XML is not valid
123         * @throws IOException if there is a problem reading from the input stream
124         */
125        public static Document toDocument(InputStream in) throws SAXException, IOException {
126                return toDocument(new InputSource(in));
127        }
128
129        /**
130         * <p>
131         * Parses an XML document from a reader.
132         * </p>
133         * <p>
134         * Note that use of this method is discouraged. It ignores the character
135         * encoding that is defined within the XML document itself, and should only
136         * be used if the encoding is undefined or if the encoding needs to be
137         * ignored for whatever reason. The {@link #toDocument(InputStream)} method
138         * should be used instead, since it takes the XML document's character
139         * encoding into account when parsing.
140         * </p>
141         * @param reader the reader
142         * @return the parsed DOM
143         * @throws SAXException if the XML is not valid
144         * @throws IOException if there is a problem reading from the reader
145         * @see <a
146         * href="http://stackoverflow.com/q/3482494/13379">http://stackoverflow.com/q/3482494/13379</a>
147         */
148        public static Document toDocument(Reader reader) throws SAXException, IOException {
149                return toDocument(new InputSource(reader));
150        }
151
152        private static Document toDocument(InputSource in) throws SAXException, IOException {
153                DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
154                factory.setNamespaceAware(true);
155                factory.setIgnoringComments(true);
156                applyXXEProtection(factory);
157
158                DocumentBuilder builder;
159                try {
160                        builder = factory.newDocumentBuilder();
161                } catch (ParserConfigurationException e) {
162                        //should never be thrown because we're not doing anything fancy with the configuration
163                        throw new RuntimeException(e);
164                }
165
166                return builder.parse(in);
167        }
168
169        /**
170         * Configures a {@link DocumentBuilderFactory} to protect it against XML
171         * External Entity attacks.
172         * @param factory the factory
173         * @see <a href=
174         * "https://www.owasp.org/index.php/XML_External_Entity_%28XXE%29_Prevention_Cheat_Sheet#Java">
175         * XXE Cheat Sheet</a>
176         */
177        public static void applyXXEProtection(DocumentBuilderFactory factory) {
178                Map<String, Boolean> features = new HashMap<String, Boolean>();
179                features.put("http://apache.org/xml/features/disallow-doctype-decl", true);
180                features.put("http://xml.org/sax/features/external-general-entities", false);
181                features.put("http://xml.org/sax/features/external-parameter-entities", false);
182                features.put("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
183
184                for (Map.Entry<String, Boolean> entry : features.entrySet()) {
185                        String feature = entry.getKey();
186                        Boolean value = entry.getValue();
187                        try {
188                                factory.setFeature(feature, value);
189                        } catch (ParserConfigurationException e) {
190                                //feature is not supported by the local XML engine, skip it
191                        }
192                }
193
194                factory.setXIncludeAware(false);
195                factory.setExpandEntityReferences(false);
196        }
197
198        /**
199         * Configures a {@link TransformerFactory} to protect it against XML
200         * External Entity attacks.
201         * @param factory the factory
202         * @see <a href=
203         * "https://www.owasp.org/index.php/XML_External_Entity_%28XXE%29_Prevention_Cheat_Sheet#Java">
204         * XXE Cheat Sheet</a>
205         */
206        public static void applyXXEProtection(TransformerFactory factory) {
207                //@formatter:off
208                String[] attributes = {
209                        //XMLConstants.ACCESS_EXTERNAL_DTD (Java 7 only)
210                        "http://javax.xml.XMLConstants/property/accessExternalDTD",
211
212                        //XMLConstants.ACCESS_EXTERNAL_STYLESHEET (Java 7 only)
213                        "http://javax.xml.XMLConstants/property/accessExternalStylesheet"
214                };
215                //@formatter:on
216
217                for (String attribute : attributes) {
218                        try {
219                                factory.setAttribute(attribute, "");
220                        } catch (IllegalArgumentException e) {
221                                //attribute is not supported by the local XML engine, skip it
222                        }
223                }
224        }
225
226        /**
227         * Converts an XML node to a string.
228         * @param node the XML node
229         * @return the string
230         */
231        public static String toString(Node node) {
232                return toString(node, new HashMap<String, String>());
233        }
234
235        /**
236         * Converts an XML node to a string.
237         * @param node the XML node
238         * @param prettyPrint true to pretty print, false not to
239         * @return the string
240         */
241        public static String toString(Node node, boolean prettyPrint) {
242                Map<String, String> properties = new HashMap<String, String>();
243                if (prettyPrint) {
244                        properties.put(OutputKeys.INDENT, "yes");
245                        properties.put("{http://xml.apache.org/xslt}indent-amount", "2");
246                }
247                return toString(node, properties);
248        }
249
250        /**
251         * Converts an XML node to a string.
252         * @param node the XML node
253         * @param outputProperties the output properties
254         * @return the string
255         */
256        public static String toString(Node node, Map<String, String> outputProperties) {
257                try {
258                        StringWriter writer = new StringWriter();
259                        toWriter(node, writer, outputProperties);
260                        return writer.toString();
261                } catch (TransformerException e) {
262                        //should never be thrown because we're writing to string
263                        throw new RuntimeException(e);
264                }
265        }
266
267        /**
268         * Writes an XML node to a writer.
269         * @param node the XML node
270         * @param writer the writer
271         * @throws TransformerException if there's a problem writing to the writer
272         */
273        public static void toWriter(Node node, Writer writer) throws TransformerException {
274                toWriter(node, writer, new HashMap<String, String>());
275        }
276
277        /**
278         * Writes an XML node to a writer.
279         * @param node the XML node
280         * @param writer the writer
281         * @param outputProperties the output properties
282         * @throws TransformerException if there's a problem writing to the writer
283         */
284        public static void toWriter(Node node, Writer writer, Map<String, String> outputProperties) throws TransformerException {
285                try {
286                        Transformer transformer = TransformerFactory.newInstance().newTransformer();
287                        for (Map.Entry<String, String> property : outputProperties.entrySet()) {
288                                try {
289                                        transformer.setOutputProperty(property.getKey(), property.getValue());
290                                } catch (IllegalArgumentException e) {
291                                        //ignore invalid output properties
292                                }
293                        }
294
295                        DOMSource source = new DOMSource(node);
296                        StreamResult result = new StreamResult(writer);
297                        transformer.transform(source, result);
298                } catch (TransformerConfigurationException e) {
299                        //no complex configurations
300                } catch (TransformerFactoryConfigurationError e) {
301                        //no complex configurations
302                }
303        }
304
305        /**
306         * Gets all the elements out of a {@link NodeList}.
307         * @param nodeList the node list
308         * @return the elements
309         */
310        public static List<Element> toElementList(NodeList nodeList) {
311                List<Element> elements = new ArrayList<Element>();
312                for (int i = 0; i < nodeList.getLength(); i++) {
313                        Node node = nodeList.item(i);
314                        if (node instanceof Element) {
315                                elements.add((Element) node);
316                        }
317                }
318                return elements;
319        }
320
321        /**
322         * Gets the first child element of an element.
323         * @param parent the parent element
324         * @return the first child element or null if there are no child elements
325         */
326        public static Element getFirstChildElement(Element parent) {
327                return getFirstChildElement((Node) parent);
328        }
329
330        /**
331         * Gets the first child element of a node.
332         * @param parent the node
333         * @return the first child element or null if there are no child elements
334         */
335        private static Element getFirstChildElement(Node parent) {
336                NodeList nodeList = parent.getChildNodes();
337                for (int i = 0; i < nodeList.getLength(); i++) {
338                        Node node = nodeList.item(i);
339                        if (node instanceof Element) {
340                                return (Element) node;
341                        }
342                }
343                return null;
344        }
345
346        /**
347         * Determines if a node has a particular qualified name.
348         * @param node the node
349         * @param qname the qualified name
350         * @return true if the node has the given qualified name, false if not
351         */
352        public static boolean hasQName(Node node, QName qname) {
353                return qname.getNamespaceURI().equals(node.getNamespaceURI()) && qname.getLocalPart().equals(node.getLocalName());
354        }
355
356        private XmlUtils() {
357                //hide
358        }
359}