001package ezvcard.util; 002 003import java.io.BufferedInputStream; 004import java.io.IOException; 005import java.io.InputStream; 006import java.io.Reader; 007import java.io.StringReader; 008import java.io.StringWriter; 009import java.io.UncheckedIOException; 010import java.io.Writer; 011import java.nio.file.Files; 012import java.nio.file.Path; 013import java.util.HashMap; 014import java.util.List; 015import java.util.Map; 016import java.util.stream.Collectors; 017import java.util.stream.IntStream; 018import java.util.stream.Stream; 019 020import javax.xml.namespace.QName; 021import javax.xml.parsers.DocumentBuilder; 022import javax.xml.parsers.DocumentBuilderFactory; 023import javax.xml.parsers.ParserConfigurationException; 024import javax.xml.transform.Transformer; 025import javax.xml.transform.TransformerConfigurationException; 026import javax.xml.transform.TransformerException; 027import javax.xml.transform.TransformerFactory; 028import javax.xml.transform.TransformerFactoryConfigurationError; 029import javax.xml.transform.dom.DOMSource; 030import javax.xml.transform.stream.StreamResult; 031 032import org.w3c.dom.Document; 033import org.w3c.dom.Element; 034import org.w3c.dom.NamedNodeMap; 035import org.w3c.dom.Node; 036import org.w3c.dom.NodeList; 037import org.xml.sax.InputSource; 038import org.xml.sax.SAXException; 039 040/* 041 Copyright (c) 2012-2026, Michael Angstadt 042 All rights reserved. 043 044 Redistribution and use in source and binary forms, with or without 045 modification, are permitted provided that the following conditions are met: 046 047 1. Redistributions of source code must retain the above copyright notice, this 048 list of conditions and the following disclaimer. 049 2. Redistributions in binary form must reproduce the above copyright notice, 050 this list of conditions and the following disclaimer in the documentation 051 and/or other materials provided with the distribution. 052 053 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 054 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 055 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 056 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 057 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 058 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 059 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 060 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 061 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 062 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 063 064 The views and conclusions contained in the software and documentation are those 065 of the authors and should not be interpreted as representing official policies, 066 either expressed or implied, of the FreeBSD Project. 067 */ 068 069/** 070 * Generic XML utility methods. 071 * @author Michael Angstadt 072 */ 073public final class XmlUtils { 074 /** 075 * Creates a new XML document. 076 * @return the XML document 077 */ 078 public static Document createDocument() { 079 try { 080 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 081 DocumentBuilder builder = factory.newDocumentBuilder(); 082 return builder.newDocument(); 083 } catch (ParserConfigurationException e) { 084 //should never be thrown because we're not doing anything fancy with the configuration 085 throw new RuntimeException(e); 086 } 087 } 088 089 /** 090 * Parses an XML string into a DOM. 091 * @param xml the XML string 092 * @return the parsed DOM 093 * @throws SAXException if the string is not valid XML 094 */ 095 public static Document toDocument(String xml) throws SAXException { 096 try { 097 return toDocument(new StringReader(xml)); 098 } catch (IOException e) { 099 //should never be thrown because we're reading from a string 100 throw new UncheckedIOException(e); 101 } 102 } 103 104 /** 105 * Parses an XML document from a file. 106 * @param file the file 107 * @return the parsed DOM 108 * @throws SAXException if the XML is not valid 109 * @throws IOException if there is a problem reading from the file 110 */ 111 public static Document toDocument(Path file) throws SAXException, IOException { 112 try (InputStream in = new BufferedInputStream(Files.newInputStream(file))) { 113 return XmlUtils.toDocument(in); 114 } 115 } 116 117 /** 118 * Parses an XML document from an input stream. 119 * @param in the input stream 120 * @return the parsed DOM 121 * @throws SAXException if the XML is not valid 122 * @throws IOException if there is a problem reading from the input stream 123 */ 124 public static Document toDocument(InputStream in) throws SAXException, IOException { 125 return toDocument(new InputSource(in)); 126 } 127 128 /** 129 * <p> 130 * Parses an XML document from a reader. 131 * </p> 132 * <p> 133 * Note that use of this method is discouraged. It ignores the character 134 * encoding that is defined within the XML document itself, and should only 135 * be used if the encoding is undefined or if the encoding needs to be 136 * ignored for whatever reason. The {@link #toDocument(InputStream)} method 137 * should be used instead, since it takes the XML document's character 138 * encoding into account when parsing. 139 * </p> 140 * @param reader the reader 141 * @return the parsed DOM 142 * @throws SAXException if the XML is not valid 143 * @throws IOException if there is a problem reading from the reader 144 * @see <a href= 145 * "http://stackoverflow.com/q/3482494/13379">http://stackoverflow.com/q/3482494/13379</a> 146 */ 147 public static Document toDocument(Reader reader) throws SAXException, IOException { 148 return toDocument(new InputSource(reader)); 149 } 150 151 private static Document toDocument(InputSource in) throws SAXException, IOException { 152 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 153 factory.setNamespaceAware(true); 154 factory.setIgnoringComments(true); 155 applyXXEProtection(factory); 156 157 DocumentBuilder builder; 158 try { 159 builder = factory.newDocumentBuilder(); 160 } catch (ParserConfigurationException e) { 161 //should never be thrown because we're not doing anything fancy with the configuration 162 throw new RuntimeException(e); 163 } 164 165 return builder.parse(in); 166 } 167 168 /** 169 * Configures a {@link DocumentBuilderFactory} to protect it against XML 170 * External Entity attacks. 171 * @param factory the factory 172 * @see <a href= 173 * "https://www.owasp.org/index.php/XML_External_Entity_%28XXE%29_Prevention_Cheat_Sheet#Java"> 174 * XXE Cheat Sheet</a> 175 */ 176 public static void applyXXEProtection(DocumentBuilderFactory factory) { 177 setFeature(factory, "http://apache.org/xml/features/disallow-doctype-decl", true); 178 setFeature(factory, "http://xml.org/sax/features/external-general-entities", false); 179 setFeature(factory, "http://xml.org/sax/features/external-parameter-entities", false); 180 setFeature(factory, "http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 181 182 factory.setXIncludeAware(false); 183 factory.setExpandEntityReferences(false); 184 } 185 186 private static void setFeature(DocumentBuilderFactory factory, String feature, Boolean value) { 187 try { 188 factory.setFeature(feature, value); 189 } catch (ParserConfigurationException e) { 190 //feature is not supported by the local XML engine, skip it 191 } 192 } 193 194 /** 195 * Configures a {@link TransformerFactory} to protect it against XML 196 * External Entity attacks. 197 * @param factory the factory 198 * @see <a href= 199 * "https://www.owasp.org/index.php/XML_External_Entity_%28XXE%29_Prevention_Cheat_Sheet#Java"> 200 * XXE Cheat Sheet</a> 201 */ 202 public static void applyXXEProtection(TransformerFactory factory) { 203 //XMLConstants.ACCESS_EXTERNAL_DTD (Java 7 only) 204 setAttribute(factory, "http://javax.xml.XMLConstants/property/accessExternalDTD"); 205 206 //XMLConstants.ACCESS_EXTERNAL_SCHEMA (Java 7 only) 207 setAttribute(factory, "http://javax.xml.XMLConstants/property/accessExternalSchema"); 208 209 //XMLConstants.ACCESS_EXTERNAL_STYLESHEET (Java 7 only) 210 setAttribute(factory, "http://javax.xml.XMLConstants/property/accessExternalStylesheet"); 211 } 212 213 private static void setAttribute(TransformerFactory factory, String name) { 214 try { 215 factory.setAttribute(name, ""); 216 } catch (IllegalArgumentException e) { 217 //attribute is not supported by the local XML engine, skip it 218 } 219 } 220 221 /** 222 * Converts an XML node to a string. 223 * @param node the XML node 224 * @return the string 225 */ 226 public static String toString(Node node) { 227 return toString(node, new HashMap<>()); 228 } 229 230 /** 231 * Converts an XML node to a string. 232 * @param node the XML node 233 * @param outputProperties the output properties 234 * @return the string 235 */ 236 public static String toString(Node node, Map<String, String> outputProperties) { 237 try { 238 StringWriter writer = new StringWriter(); 239 toWriter(node, writer, outputProperties); 240 return writer.toString(); 241 } catch (TransformerException e) { 242 //should never be thrown because we're writing to a string 243 throw new RuntimeException(e); 244 } 245 } 246 247 /** 248 * Writes an XML node to a writer. 249 * @param node the XML node 250 * @param writer the writer 251 * @throws TransformerException if there's a problem writing to the writer 252 */ 253 public static void toWriter(Node node, Writer writer) throws TransformerException { 254 toWriter(node, writer, new HashMap<>()); 255 } 256 257 /** 258 * Writes an XML node to a writer. 259 * @param node the XML node 260 * @param writer the writer 261 * @param outputProperties the output properties 262 * @throws TransformerException if there's a problem writing to the writer 263 */ 264 public static void toWriter(Node node, Writer writer, Map<String, String> outputProperties) throws TransformerException { 265 Transformer transformer; 266 try { 267 TransformerFactory factory = TransformerFactory.newInstance(); 268 applyXXEProtection(factory); 269 transformer = factory.newTransformer(); 270 } catch (TransformerConfigurationException | TransformerFactoryConfigurationError e) { 271 //should never be thrown because we're not doing anything fancy with the configuration 272 throw new RuntimeException(e); 273 } 274 275 assignOutputProperties(transformer, outputProperties); 276 277 DOMSource source = new DOMSource(node); 278 StreamResult result = new StreamResult(writer); 279 transformer.transform(source, result); 280 } 281 282 /** 283 * Assigns the given output properties to the given transformer, ignoring 284 * invalid output properties. 285 * @param transformer the transformer 286 * @param outputProperties the output properties 287 */ 288 public static void assignOutputProperties(Transformer transformer, Map<String, String> outputProperties) { 289 outputProperties.forEach((name, value) -> { 290 try { 291 transformer.setOutputProperty(name, value); 292 } catch (IllegalArgumentException e) { 293 //ignore invalid output properties 294 } 295 }); 296 } 297 298 /** 299 * Gets all the elements out of a {@link NodeList}. 300 * @param nodeList the node list 301 * @return the elements 302 */ 303 public static List<Element> toElementList(NodeList nodeList) { 304 return toElementStream(nodeList).collect(Collectors.toList()); 305 } 306 307 /** 308 * Generates a stream of just the elements in a {@link NodeList}. 309 * @param nodeList the node list 310 * @return the stream 311 */ 312 public static Stream<Element> toElementStream(NodeList nodeList) { 313 //@formatter:off 314 return IntStream.range(0, nodeList.getLength()) 315 .mapToObj(nodeList::item) 316 .filter(Element.class::isInstance) 317 .map(Element.class::cast); 318 //@formatter:on 319 } 320 321 /** 322 * Allows a {@link NodeList} to be used in a for-each loop. 323 * @param nodeList the node list 324 * @return the iterable object 325 */ 326 public static Iterable<Node> iterable(NodeList nodeList) { 327 return () -> IntStream.range(0, nodeList.getLength()).mapToObj(nodeList::item).iterator(); 328 } 329 330 /** 331 * Creates a stream from a {@link NamedNodeMap}. 332 * @param namedNodeMap the named node map 333 * @return the stream 334 */ 335 public static Stream<Node> stream(NamedNodeMap namedNodeMap) { 336 return IntStream.range(0, namedNodeMap.getLength()).mapToObj(namedNodeMap::item); 337 } 338 339 /** 340 * Gets the first child element of an element. 341 * @param parent the parent element 342 * @return the first child element or null if there are no child elements 343 */ 344 public static Element getFirstChildElement(Element parent) { 345 return getFirstChildElement((Node) parent); 346 } 347 348 /** 349 * Gets the first child element of a node. 350 * @param parent the node 351 * @return the first child element or null if there are no child elements 352 */ 353 private static Element getFirstChildElement(Node parent) { 354 NodeList nodeList = parent.getChildNodes(); 355 return toElementStream(nodeList).findFirst().orElse(null); 356 } 357 358 /** 359 * Determines if a node has a particular qualified name. 360 * @param node the node 361 * @param qname the qualified name 362 * @return true if the node has the given qualified name, false if not 363 */ 364 public static boolean hasQName(Node node, QName qname) { 365 return qname.getNamespaceURI().equals(node.getNamespaceURI()) && qname.getLocalPart().equals(node.getLocalName()); 366 } 367 368 private XmlUtils() { 369 //hide 370 } 371}