001package ezvcard.util;
002
003import java.io.BufferedInputStream;
004import java.io.IOException;
005import java.io.InputStream;
006import java.io.Reader;
007import java.io.StringReader;
008import java.io.StringWriter;
009import java.io.UncheckedIOException;
010import java.io.Writer;
011import java.nio.file.Files;
012import java.nio.file.Path;
013import java.util.HashMap;
014import java.util.List;
015import java.util.Map;
016import java.util.stream.Collectors;
017import java.util.stream.IntStream;
018import java.util.stream.Stream;
019
020import javax.xml.namespace.QName;
021import javax.xml.parsers.DocumentBuilder;
022import javax.xml.parsers.DocumentBuilderFactory;
023import javax.xml.parsers.ParserConfigurationException;
024import javax.xml.transform.Transformer;
025import javax.xml.transform.TransformerConfigurationException;
026import javax.xml.transform.TransformerException;
027import javax.xml.transform.TransformerFactory;
028import javax.xml.transform.TransformerFactoryConfigurationError;
029import javax.xml.transform.dom.DOMSource;
030import javax.xml.transform.stream.StreamResult;
031
032import org.w3c.dom.Document;
033import org.w3c.dom.Element;
034import org.w3c.dom.NamedNodeMap;
035import org.w3c.dom.Node;
036import org.w3c.dom.NodeList;
037import org.xml.sax.InputSource;
038import org.xml.sax.SAXException;
039
040/*
041 Copyright (c) 2012-2026, Michael Angstadt
042 All rights reserved.
043
044 Redistribution and use in source and binary forms, with or without
045 modification, are permitted provided that the following conditions are met: 
046
047 1. Redistributions of source code must retain the above copyright notice, this
048 list of conditions and the following disclaimer. 
049 2. Redistributions in binary form must reproduce the above copyright notice,
050 this list of conditions and the following disclaimer in the documentation
051 and/or other materials provided with the distribution. 
052
053 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
054 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
055 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
056 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
057 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
058 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
059 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
060 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
061 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
062 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
063
064 The views and conclusions contained in the software and documentation are those
065 of the authors and should not be interpreted as representing official policies, 
066 either expressed or implied, of the FreeBSD Project.
067 */
068
069/**
070 * Generic XML utility methods.
071 * @author Michael Angstadt
072 */
073public final class XmlUtils {
074        /**
075         * Creates a new XML document.
076         * @return the XML document
077         */
078        public static Document createDocument() {
079                try {
080                        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
081                        DocumentBuilder builder = factory.newDocumentBuilder();
082                        return builder.newDocument();
083                } catch (ParserConfigurationException e) {
084                        //should never be thrown because we're not doing anything fancy with the configuration
085                        throw new RuntimeException(e);
086                }
087        }
088
089        /**
090         * Parses an XML string into a DOM.
091         * @param xml the XML string
092         * @return the parsed DOM
093         * @throws SAXException if the string is not valid XML
094         */
095        public static Document toDocument(String xml) throws SAXException {
096                try {
097                        return toDocument(new StringReader(xml));
098                } catch (IOException e) {
099                        //should never be thrown because we're reading from a string
100                        throw new UncheckedIOException(e);
101                }
102        }
103
104        /**
105         * Parses an XML document from a file.
106         * @param file the file
107         * @return the parsed DOM
108         * @throws SAXException if the XML is not valid
109         * @throws IOException if there is a problem reading from the file
110         */
111        public static Document toDocument(Path file) throws SAXException, IOException {
112                try (InputStream in = new BufferedInputStream(Files.newInputStream(file))) {
113                        return XmlUtils.toDocument(in);
114                }
115        }
116
117        /**
118         * Parses an XML document from an input stream.
119         * @param in the input stream
120         * @return the parsed DOM
121         * @throws SAXException if the XML is not valid
122         * @throws IOException if there is a problem reading from the input stream
123         */
124        public static Document toDocument(InputStream in) throws SAXException, IOException {
125                return toDocument(new InputSource(in));
126        }
127
128        /**
129         * <p>
130         * Parses an XML document from a reader.
131         * </p>
132         * <p>
133         * Note that use of this method is discouraged. It ignores the character
134         * encoding that is defined within the XML document itself, and should only
135         * be used if the encoding is undefined or if the encoding needs to be
136         * ignored for whatever reason. The {@link #toDocument(InputStream)} method
137         * should be used instead, since it takes the XML document's character
138         * encoding into account when parsing.
139         * </p>
140         * @param reader the reader
141         * @return the parsed DOM
142         * @throws SAXException if the XML is not valid
143         * @throws IOException if there is a problem reading from the reader
144         * @see <a href=
145         * "http://stackoverflow.com/q/3482494/13379">http://stackoverflow.com/q/3482494/13379</a>
146         */
147        public static Document toDocument(Reader reader) throws SAXException, IOException {
148                return toDocument(new InputSource(reader));
149        }
150
151        private static Document toDocument(InputSource in) throws SAXException, IOException {
152                DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
153                factory.setNamespaceAware(true);
154                factory.setIgnoringComments(true);
155                applyXXEProtection(factory);
156
157                DocumentBuilder builder;
158                try {
159                        builder = factory.newDocumentBuilder();
160                } catch (ParserConfigurationException e) {
161                        //should never be thrown because we're not doing anything fancy with the configuration
162                        throw new RuntimeException(e);
163                }
164
165                return builder.parse(in);
166        }
167
168        /**
169         * Configures a {@link DocumentBuilderFactory} to protect it against XML
170         * External Entity attacks.
171         * @param factory the factory
172         * @see <a href=
173         * "https://www.owasp.org/index.php/XML_External_Entity_%28XXE%29_Prevention_Cheat_Sheet#Java">
174         * XXE Cheat Sheet</a>
175         */
176        public static void applyXXEProtection(DocumentBuilderFactory factory) {
177                setFeature(factory, "http://apache.org/xml/features/disallow-doctype-decl", true);
178                setFeature(factory, "http://xml.org/sax/features/external-general-entities", false);
179                setFeature(factory, "http://xml.org/sax/features/external-parameter-entities", false);
180                setFeature(factory, "http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
181
182                factory.setXIncludeAware(false);
183                factory.setExpandEntityReferences(false);
184        }
185
186        private static void setFeature(DocumentBuilderFactory factory, String feature, Boolean value) {
187                try {
188                        factory.setFeature(feature, value);
189                } catch (ParserConfigurationException e) {
190                        //feature is not supported by the local XML engine, skip it
191                }
192        }
193
194        /**
195         * Configures a {@link TransformerFactory} to protect it against XML
196         * External Entity attacks.
197         * @param factory the factory
198         * @see <a href=
199         * "https://www.owasp.org/index.php/XML_External_Entity_%28XXE%29_Prevention_Cheat_Sheet#Java">
200         * XXE Cheat Sheet</a>
201         */
202        public static void applyXXEProtection(TransformerFactory factory) {
203                //XMLConstants.ACCESS_EXTERNAL_DTD (Java 7 only)
204                setAttribute(factory, "http://javax.xml.XMLConstants/property/accessExternalDTD");
205
206                //XMLConstants.ACCESS_EXTERNAL_SCHEMA (Java 7 only)
207                setAttribute(factory, "http://javax.xml.XMLConstants/property/accessExternalSchema");
208
209                //XMLConstants.ACCESS_EXTERNAL_STYLESHEET (Java 7 only)
210                setAttribute(factory, "http://javax.xml.XMLConstants/property/accessExternalStylesheet");
211        }
212
213        private static void setAttribute(TransformerFactory factory, String name) {
214                try {
215                        factory.setAttribute(name, "");
216                } catch (IllegalArgumentException e) {
217                        //attribute is not supported by the local XML engine, skip it
218                }
219        }
220
221        /**
222         * Converts an XML node to a string.
223         * @param node the XML node
224         * @return the string
225         */
226        public static String toString(Node node) {
227                return toString(node, new HashMap<>());
228        }
229
230        /**
231         * Converts an XML node to a string.
232         * @param node the XML node
233         * @param outputProperties the output properties
234         * @return the string
235         */
236        public static String toString(Node node, Map<String, String> outputProperties) {
237                try {
238                        StringWriter writer = new StringWriter();
239                        toWriter(node, writer, outputProperties);
240                        return writer.toString();
241                } catch (TransformerException e) {
242                        //should never be thrown because we're writing to a string
243                        throw new RuntimeException(e);
244                }
245        }
246
247        /**
248         * Writes an XML node to a writer.
249         * @param node the XML node
250         * @param writer the writer
251         * @throws TransformerException if there's a problem writing to the writer
252         */
253        public static void toWriter(Node node, Writer writer) throws TransformerException {
254                toWriter(node, writer, new HashMap<>());
255        }
256
257        /**
258         * Writes an XML node to a writer.
259         * @param node the XML node
260         * @param writer the writer
261         * @param outputProperties the output properties
262         * @throws TransformerException if there's a problem writing to the writer
263         */
264        public static void toWriter(Node node, Writer writer, Map<String, String> outputProperties) throws TransformerException {
265                Transformer transformer;
266                try {
267                        TransformerFactory factory = TransformerFactory.newInstance();
268                        applyXXEProtection(factory);
269                        transformer = factory.newTransformer();
270                } catch (TransformerConfigurationException | TransformerFactoryConfigurationError e) {
271                        //should never be thrown because we're not doing anything fancy with the configuration
272                        throw new RuntimeException(e);
273                }
274
275                assignOutputProperties(transformer, outputProperties);
276
277                DOMSource source = new DOMSource(node);
278                StreamResult result = new StreamResult(writer);
279                transformer.transform(source, result);
280        }
281
282        /**
283         * Assigns the given output properties to the given transformer, ignoring
284         * invalid output properties.
285         * @param transformer the transformer
286         * @param outputProperties the output properties
287         */
288        public static void assignOutputProperties(Transformer transformer, Map<String, String> outputProperties) {
289                outputProperties.forEach((name, value) -> {
290                        try {
291                                transformer.setOutputProperty(name, value);
292                        } catch (IllegalArgumentException e) {
293                                //ignore invalid output properties
294                        }
295                });
296        }
297
298        /**
299         * Gets all the elements out of a {@link NodeList}.
300         * @param nodeList the node list
301         * @return the elements
302         */
303        public static List<Element> toElementList(NodeList nodeList) {
304                return toElementStream(nodeList).collect(Collectors.toList());
305        }
306
307        /**
308         * Generates a stream of just the elements in a {@link NodeList}.
309         * @param nodeList the node list
310         * @return the stream
311         */
312        public static Stream<Element> toElementStream(NodeList nodeList) {
313                //@formatter:off
314                return IntStream.range(0, nodeList.getLength())
315                        .mapToObj(nodeList::item)
316                        .filter(Element.class::isInstance)
317                        .map(Element.class::cast);
318                //@formatter:on
319        }
320
321        /**
322         * Allows a {@link NodeList} to be used in a for-each loop.
323         * @param nodeList the node list
324         * @return the iterable object
325         */
326        public static Iterable<Node> iterable(NodeList nodeList) {
327                return () -> IntStream.range(0, nodeList.getLength()).mapToObj(nodeList::item).iterator();
328        }
329
330        /**
331         * Creates a stream from a {@link NamedNodeMap}.
332         * @param namedNodeMap the named node map
333         * @return the stream
334         */
335        public static Stream<Node> stream(NamedNodeMap namedNodeMap) {
336                return IntStream.range(0, namedNodeMap.getLength()).mapToObj(namedNodeMap::item);
337        }
338
339        /**
340         * Gets the first child element of an element.
341         * @param parent the parent element
342         * @return the first child element or null if there are no child elements
343         */
344        public static Element getFirstChildElement(Element parent) {
345                return getFirstChildElement((Node) parent);
346        }
347
348        /**
349         * Gets the first child element of a node.
350         * @param parent the node
351         * @return the first child element or null if there are no child elements
352         */
353        private static Element getFirstChildElement(Node parent) {
354                NodeList nodeList = parent.getChildNodes();
355                return toElementStream(nodeList).findFirst().orElse(null);
356        }
357
358        /**
359         * Determines if a node has a particular qualified name.
360         * @param node the node
361         * @param qname the qualified name
362         * @return true if the node has the given qualified name, false if not
363         */
364        public static boolean hasQName(Node node, QName qname) {
365                return qname.getNamespaceURI().equals(node.getNamespaceURI()) && qname.getLocalPart().equals(node.getLocalName());
366        }
367
368        private XmlUtils() {
369                //hide
370        }
371}