001package ezvcard.util; 002 003import java.io.BufferedInputStream; 004import java.io.IOException; 005import java.io.InputStream; 006import java.io.Reader; 007import java.io.StringReader; 008import java.io.StringWriter; 009import java.io.UncheckedIOException; 010import java.io.Writer; 011import java.nio.file.Files; 012import java.nio.file.Path; 013import java.util.ArrayList; 014import java.util.HashMap; 015import java.util.List; 016import java.util.Map; 017 018import javax.xml.namespace.QName; 019import javax.xml.parsers.DocumentBuilder; 020import javax.xml.parsers.DocumentBuilderFactory; 021import javax.xml.parsers.ParserConfigurationException; 022import javax.xml.transform.Transformer; 023import javax.xml.transform.TransformerConfigurationException; 024import javax.xml.transform.TransformerException; 025import javax.xml.transform.TransformerFactory; 026import javax.xml.transform.TransformerFactoryConfigurationError; 027import javax.xml.transform.dom.DOMSource; 028import javax.xml.transform.stream.StreamResult; 029 030import org.w3c.dom.Document; 031import org.w3c.dom.Element; 032import org.w3c.dom.Node; 033import org.w3c.dom.NodeList; 034import org.xml.sax.InputSource; 035import org.xml.sax.SAXException; 036 037/* 038 Copyright (c) 2012-2023, Michael Angstadt 039 All rights reserved. 040 041 Redistribution and use in source and binary forms, with or without 042 modification, are permitted provided that the following conditions are met: 043 044 1. Redistributions of source code must retain the above copyright notice, this 045 list of conditions and the following disclaimer. 046 2. Redistributions in binary form must reproduce the above copyright notice, 047 this list of conditions and the following disclaimer in the documentation 048 and/or other materials provided with the distribution. 049 050 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 051 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 052 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 053 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 054 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 055 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 056 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 057 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 058 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 059 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 060 061 The views and conclusions contained in the software and documentation are those 062 of the authors and should not be interpreted as representing official policies, 063 either expressed or implied, of the FreeBSD Project. 064 */ 065 066/** 067 * Generic XML utility methods. 068 * @author Michael Angstadt 069 */ 070public final class XmlUtils { 071 /** 072 * Creates a new XML document. 073 * @return the XML document 074 */ 075 public static Document createDocument() { 076 try { 077 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 078 DocumentBuilder builder = factory.newDocumentBuilder(); 079 return builder.newDocument(); 080 } catch (ParserConfigurationException e) { 081 //should never be thrown because we're not doing anything fancy with the configuration 082 throw new RuntimeException(e); 083 } 084 } 085 086 /** 087 * Parses an XML string into a DOM. 088 * @param xml the XML string 089 * @return the parsed DOM 090 * @throws SAXException if the string is not valid XML 091 */ 092 public static Document toDocument(String xml) throws SAXException { 093 try { 094 return toDocument(new StringReader(xml)); 095 } catch (IOException e) { 096 //should never be thrown because we're reading from a string 097 throw new UncheckedIOException(e); 098 } 099 } 100 101 /** 102 * Parses an XML document from a file. 103 * @param file the file 104 * @return the parsed DOM 105 * @throws SAXException if the XML is not valid 106 * @throws IOException if there is a problem reading from the file 107 */ 108 public static Document toDocument(Path file) throws SAXException, IOException { 109 try (InputStream in = new BufferedInputStream(Files.newInputStream(file))) { 110 return XmlUtils.toDocument(in); 111 } 112 } 113 114 /** 115 * Parses an XML document from an input stream. 116 * @param in the input stream 117 * @return the parsed DOM 118 * @throws SAXException if the XML is not valid 119 * @throws IOException if there is a problem reading from the input stream 120 */ 121 public static Document toDocument(InputStream in) throws SAXException, IOException { 122 return toDocument(new InputSource(in)); 123 } 124 125 /** 126 * <p> 127 * Parses an XML document from a reader. 128 * </p> 129 * <p> 130 * Note that use of this method is discouraged. It ignores the character 131 * encoding that is defined within the XML document itself, and should only 132 * be used if the encoding is undefined or if the encoding needs to be 133 * ignored for whatever reason. The {@link #toDocument(InputStream)} method 134 * should be used instead, since it takes the XML document's character 135 * encoding into account when parsing. 136 * </p> 137 * @param reader the reader 138 * @return the parsed DOM 139 * @throws SAXException if the XML is not valid 140 * @throws IOException if there is a problem reading from the reader 141 * @see <a 142 * href="http://stackoverflow.com/q/3482494/13379">http://stackoverflow.com/q/3482494/13379</a> 143 */ 144 public static Document toDocument(Reader reader) throws SAXException, IOException { 145 return toDocument(new InputSource(reader)); 146 } 147 148 private static Document toDocument(InputSource in) throws SAXException, IOException { 149 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 150 factory.setNamespaceAware(true); 151 factory.setIgnoringComments(true); 152 applyXXEProtection(factory); 153 154 DocumentBuilder builder; 155 try { 156 builder = factory.newDocumentBuilder(); 157 } catch (ParserConfigurationException e) { 158 //should never be thrown because we're not doing anything fancy with the configuration 159 throw new RuntimeException(e); 160 } 161 162 return builder.parse(in); 163 } 164 165 /** 166 * Configures a {@link DocumentBuilderFactory} to protect it against XML 167 * External Entity attacks. 168 * @param factory the factory 169 * @see <a href= 170 * "https://www.owasp.org/index.php/XML_External_Entity_%28XXE%29_Prevention_Cheat_Sheet#Java"> 171 * XXE Cheat Sheet</a> 172 */ 173 public static void applyXXEProtection(DocumentBuilderFactory factory) { 174 Map<String, Boolean> features = new HashMap<>(); 175 features.put("http://apache.org/xml/features/disallow-doctype-decl", true); 176 features.put("http://xml.org/sax/features/external-general-entities", false); 177 features.put("http://xml.org/sax/features/external-parameter-entities", false); 178 features.put("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 179 180 for (Map.Entry<String, Boolean> entry : features.entrySet()) { 181 String feature = entry.getKey(); 182 Boolean value = entry.getValue(); 183 try { 184 factory.setFeature(feature, value); 185 } catch (ParserConfigurationException e) { 186 //feature is not supported by the local XML engine, skip it 187 } 188 } 189 190 factory.setXIncludeAware(false); 191 factory.setExpandEntityReferences(false); 192 } 193 194 /** 195 * Configures a {@link TransformerFactory} to protect it against XML 196 * External Entity attacks. 197 * @param factory the factory 198 * @see <a href= 199 * "https://www.owasp.org/index.php/XML_External_Entity_%28XXE%29_Prevention_Cheat_Sheet#Java"> 200 * XXE Cheat Sheet</a> 201 */ 202 public static void applyXXEProtection(TransformerFactory factory) { 203 //@formatter:off 204 String[] attributes = { 205 //XMLConstants.ACCESS_EXTERNAL_DTD (Java 7 only) 206 "http://javax.xml.XMLConstants/property/accessExternalDTD", 207 208 //XMLConstants.ACCESS_EXTERNAL_STYLESHEET (Java 7 only) 209 "http://javax.xml.XMLConstants/property/accessExternalStylesheet" 210 }; 211 //@formatter:on 212 213 for (String attribute : attributes) { 214 try { 215 factory.setAttribute(attribute, ""); 216 } catch (IllegalArgumentException e) { 217 //attribute is not supported by the local XML engine, skip it 218 } 219 } 220 } 221 222 /** 223 * Converts an XML node to a string. 224 * @param node the XML node 225 * @return the string 226 */ 227 public static String toString(Node node) { 228 return toString(node, new HashMap<>()); 229 } 230 231 /** 232 * Converts an XML node to a string. 233 * @param node the XML node 234 * @param outputProperties the output properties 235 * @return the string 236 */ 237 public static String toString(Node node, Map<String, String> outputProperties) { 238 try { 239 StringWriter writer = new StringWriter(); 240 toWriter(node, writer, outputProperties); 241 return writer.toString(); 242 } catch (TransformerException e) { 243 //should never be thrown because we're writing to a string 244 throw new RuntimeException(e); 245 } 246 } 247 248 /** 249 * Writes an XML node to a writer. 250 * @param node the XML node 251 * @param writer the writer 252 * @throws TransformerException if there's a problem writing to the writer 253 */ 254 public static void toWriter(Node node, Writer writer) throws TransformerException { 255 toWriter(node, writer, new HashMap<>()); 256 } 257 258 /** 259 * Writes an XML node to a writer. 260 * @param node the XML node 261 * @param writer the writer 262 * @param outputProperties the output properties 263 * @throws TransformerException if there's a problem writing to the writer 264 */ 265 public static void toWriter(Node node, Writer writer, Map<String, String> outputProperties) throws TransformerException { 266 Transformer transformer; 267 try { 268 transformer = TransformerFactory.newInstance().newTransformer(); 269 } catch (TransformerConfigurationException e) { 270 //should never be thrown because we're not doing anything fancy with the configuration 271 throw new RuntimeException(e); 272 } catch (TransformerFactoryConfigurationError e) { 273 //should never be thrown because we're not doing anything fancy with the configuration 274 throw new RuntimeException(e); 275 } 276 277 assignOutputProperties(transformer, outputProperties); 278 279 DOMSource source = new DOMSource(node); 280 StreamResult result = new StreamResult(writer); 281 transformer.transform(source, result); 282 } 283 284 /** 285 * Assigns the given output properties to the given transformer, ignoring 286 * invalid output properties. 287 * @param transformer the transformer 288 * @param outputProperties the output properties 289 */ 290 public static void assignOutputProperties(Transformer transformer, Map<String, String> outputProperties) { 291 for (Map.Entry<String, String> property : outputProperties.entrySet()) { 292 try { 293 transformer.setOutputProperty(property.getKey(), property.getValue()); 294 } catch (IllegalArgumentException e) { 295 //ignore invalid output properties 296 } 297 } 298 } 299 300 /** 301 * Gets all the elements out of a {@link NodeList}. 302 * @param nodeList the node list 303 * @return the elements 304 */ 305 public static List<Element> toElementList(NodeList nodeList) { 306 List<Element> elements = new ArrayList<>(); 307 for (int i = 0; i < nodeList.getLength(); i++) { 308 Node node = nodeList.item(i); 309 if (node instanceof Element) { 310 elements.add((Element) node); 311 } 312 } 313 return elements; 314 } 315 316 /** 317 * Gets the first child element of an element. 318 * @param parent the parent element 319 * @return the first child element or null if there are no child elements 320 */ 321 public static Element getFirstChildElement(Element parent) { 322 return getFirstChildElement((Node) parent); 323 } 324 325 /** 326 * Gets the first child element of a node. 327 * @param parent the node 328 * @return the first child element or null if there are no child elements 329 */ 330 private static Element getFirstChildElement(Node parent) { 331 NodeList nodeList = parent.getChildNodes(); 332 for (int i = 0; i < nodeList.getLength(); i++) { 333 Node node = nodeList.item(i); 334 if (node instanceof Element) { 335 return (Element) node; 336 } 337 } 338 return null; 339 } 340 341 /** 342 * Determines if a node has a particular qualified name. 343 * @param node the node 344 * @param qname the qualified name 345 * @return true if the node has the given qualified name, false if not 346 */ 347 public static boolean hasQName(Node node, QName qname) { 348 return qname.getNamespaceURI().equals(node.getNamespaceURI()) && qname.getLocalPart().equals(node.getLocalName()); 349 } 350 351 private XmlUtils() { 352 //hide 353 } 354}