001package biweekly.util; 002 003import java.io.BufferedInputStream; 004import java.io.File; 005import java.io.FileInputStream; 006import java.io.IOException; 007import java.io.InputStream; 008import java.io.Reader; 009import java.io.StringReader; 010import java.io.StringWriter; 011import java.io.Writer; 012import java.util.ArrayList; 013import java.util.HashMap; 014import java.util.List; 015import java.util.Map; 016 017import javax.xml.namespace.QName; 018import javax.xml.parsers.DocumentBuilder; 019import javax.xml.parsers.DocumentBuilderFactory; 020import javax.xml.parsers.ParserConfigurationException; 021import javax.xml.transform.OutputKeys; 022import javax.xml.transform.Transformer; 023import javax.xml.transform.TransformerConfigurationException; 024import javax.xml.transform.TransformerException; 025import javax.xml.transform.TransformerFactory; 026import javax.xml.transform.TransformerFactoryConfigurationError; 027import javax.xml.transform.dom.DOMSource; 028import javax.xml.transform.stream.StreamResult; 029 030import org.w3c.dom.Document; 031import org.w3c.dom.Element; 032import org.w3c.dom.Node; 033import org.w3c.dom.NodeList; 034import org.xml.sax.InputSource; 035import org.xml.sax.SAXException; 036 037/* 038 Copyright (c) 2013-2016, Michael Angstadt 039 All rights reserved. 040 041 Redistribution and use in source and binary forms, with or without 042 modification, are permitted provided that the following conditions are met: 043 044 1. Redistributions of source code must retain the above copyright notice, this 045 list of conditions and the following disclaimer. 046 2. Redistributions in binary form must reproduce the above copyright notice, 047 this list of conditions and the following disclaimer in the documentation 048 and/or other materials provided with the distribution. 049 050 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 051 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 052 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 053 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 054 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 055 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 056 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 057 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 058 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 059 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 060 061 The views and conclusions contained in the software and documentation are those 062 of the authors and should not be interpreted as representing official policies, 063 either expressed or implied, of the FreeBSD Project. 064 */ 065 066/** 067 * Generic XML utility methods. 068 * @author Michael Angstadt 069 */ 070public final class XmlUtils { 071 /** 072 * Creates a new XML document. 073 * @return the XML document 074 */ 075 public static Document createDocument() { 076 try { 077 DocumentBuilderFactory fact = DocumentBuilderFactory.newInstance(); 078 fact.setNamespaceAware(true); 079 DocumentBuilder db = fact.newDocumentBuilder(); 080 return db.newDocument(); 081 } catch (ParserConfigurationException e) { 082 //will probably never be thrown because we're not doing anything fancy with the configuration 083 throw new RuntimeException(e); 084 } 085 } 086 087 /** 088 * Parses an XML string into a DOM. 089 * @param xml the XML string 090 * @return the parsed DOM 091 * @throws SAXException if the string is not valid XML 092 */ 093 public static Document toDocument(String xml) throws SAXException { 094 try { 095 return toDocument(new StringReader(xml)); 096 } catch (IOException e) { 097 //reading from string 098 throw new RuntimeException(e); 099 } 100 } 101 102 /** 103 * Parses an XML document from a file. 104 * @param file the file 105 * @return the parsed DOM 106 * @throws SAXException if the XML is not valid 107 * @throws IOException if there is a problem reading from the file 108 */ 109 public static Document toDocument(File file) throws SAXException, IOException { 110 InputStream in = new BufferedInputStream(new FileInputStream(file)); 111 try { 112 return XmlUtils.toDocument(in); 113 } finally { 114 in.close(); 115 } 116 } 117 118 /** 119 * Parses an XML document from an input stream. 120 * @param in the input stream 121 * @return the parsed DOM 122 * @throws SAXException if the XML is not valid 123 * @throws IOException if there is a problem reading from the input stream 124 */ 125 public static Document toDocument(InputStream in) throws SAXException, IOException { 126 return toDocument(new InputSource(in)); 127 } 128 129 /** 130 * <p> 131 * Parses an XML document from a reader. 132 * </p> 133 * <p> 134 * Note that use of this method is discouraged. It ignores the character 135 * encoding that is defined within the XML document itself, and should only 136 * be used if the encoding is undefined or if the encoding needs to be 137 * ignored for whatever reason. The {@link #toDocument(InputStream)} method 138 * should be used instead, since it takes the XML document's character 139 * encoding into account when parsing. 140 * </p> 141 * @param reader the reader 142 * @return the parsed DOM 143 * @throws SAXException if the XML is not valid 144 * @throws IOException if there is a problem reading from the reader 145 * @see <a 146 * href="http://stackoverflow.com/q/3482494/13379">http://stackoverflow.com/q/3482494/13379</a> 147 */ 148 public static Document toDocument(Reader reader) throws SAXException, IOException { 149 return toDocument(new InputSource(reader)); 150 } 151 152 private static Document toDocument(InputSource in) throws SAXException, IOException { 153 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 154 factory.setNamespaceAware(true); 155 factory.setIgnoringComments(true); 156 applyXXEProtection(factory); 157 158 DocumentBuilder builder; 159 try { 160 builder = factory.newDocumentBuilder(); 161 } catch (ParserConfigurationException e) { 162 //should never be thrown because we're not doing anything fancy with the configuration 163 throw new RuntimeException(e); 164 } 165 166 return builder.parse(in); 167 } 168 169 /** 170 * Configures a {@link DocumentBuilderFactory} to protect it against XML 171 * External Entity attacks. 172 * @param factory the factory 173 * @see <a href= 174 * "https://www.owasp.org/index.php/XML_External_Entity_%28XXE%29_Prevention_Cheat_Sheet#Java"> 175 * XXE Cheat Sheet</a> 176 */ 177 public static void applyXXEProtection(DocumentBuilderFactory factory) { 178 Map<String, Boolean> features = new HashMap<String, Boolean>(); 179 features.put("http://apache.org/xml/features/disallow-doctype-decl", true); 180 features.put("http://xml.org/sax/features/external-general-entities", false); 181 features.put("http://xml.org/sax/features/external-parameter-entities", false); 182 features.put("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 183 184 for (Map.Entry<String, Boolean> entry : features.entrySet()) { 185 String feature = entry.getKey(); 186 Boolean value = entry.getValue(); 187 try { 188 factory.setFeature(feature, value); 189 } catch (ParserConfigurationException e) { 190 //feature is not supported by the local XML engine, skip it 191 } 192 } 193 194 factory.setXIncludeAware(false); 195 factory.setExpandEntityReferences(false); 196 } 197 198 /** 199 * Configures a {@link TransformerFactory} to protect it against XML 200 * External Entity attacks. 201 * @param factory the factory 202 * @see <a href= 203 * "https://www.owasp.org/index.php/XML_External_Entity_%28XXE%29_Prevention_Cheat_Sheet#Java"> 204 * XXE Cheat Sheet</a> 205 */ 206 public static void applyXXEProtection(TransformerFactory factory) { 207 //@formatter:off 208 String[] attributes = { 209 //XMLConstants.ACCESS_EXTERNAL_DTD (Java 7 only) 210 "http://javax.xml.XMLConstants/property/accessExternalDTD", 211 212 //XMLConstants.ACCESS_EXTERNAL_STYLESHEET (Java 7 only) 213 "http://javax.xml.XMLConstants/property/accessExternalStylesheet" 214 }; 215 //@formatter:on 216 217 for (String attribute : attributes) { 218 try { 219 factory.setAttribute(attribute, ""); 220 } catch (IllegalArgumentException e) { 221 //attribute is not supported by the local XML engine, skip it 222 } 223 } 224 } 225 226 /** 227 * Converts an XML node to a string. 228 * @param node the XML node 229 * @return the string 230 */ 231 public static String toString(Node node) { 232 return toString(node, new HashMap<String, String>()); 233 } 234 235 /** 236 * Converts an XML node to a string. 237 * @param node the XML node 238 * @param prettyPrint true to pretty print, false not to 239 * @return the string 240 */ 241 public static String toString(Node node, boolean prettyPrint) { 242 Map<String, String> properties = new HashMap<String, String>(); 243 if (prettyPrint) { 244 properties.put(OutputKeys.INDENT, "yes"); 245 properties.put("{http://xml.apache.org/xslt}indent-amount", "2"); 246 } 247 return toString(node, properties); 248 } 249 250 /** 251 * Converts an XML node to a string. 252 * @param node the XML node 253 * @param outputProperties the output properties 254 * @return the string 255 */ 256 public static String toString(Node node, Map<String, String> outputProperties) { 257 try { 258 StringWriter writer = new StringWriter(); 259 toWriter(node, writer, outputProperties); 260 return writer.toString(); 261 } catch (TransformerException e) { 262 //should never be thrown because we're writing to string 263 throw new RuntimeException(e); 264 } 265 } 266 267 /** 268 * Writes an XML node to a writer. 269 * @param node the XML node 270 * @param writer the writer 271 * @throws TransformerException if there's a problem writing to the writer 272 */ 273 public static void toWriter(Node node, Writer writer) throws TransformerException { 274 toWriter(node, writer, new HashMap<String, String>()); 275 } 276 277 /** 278 * Writes an XML node to a writer. 279 * @param node the XML node 280 * @param writer the writer 281 * @param outputProperties the output properties 282 * @throws TransformerException if there's a problem writing to the writer 283 */ 284 public static void toWriter(Node node, Writer writer, Map<String, String> outputProperties) throws TransformerException { 285 try { 286 Transformer transformer = TransformerFactory.newInstance().newTransformer(); 287 for (Map.Entry<String, String> property : outputProperties.entrySet()) { 288 try { 289 transformer.setOutputProperty(property.getKey(), property.getValue()); 290 } catch (IllegalArgumentException e) { 291 //ignore invalid output properties 292 } 293 } 294 295 DOMSource source = new DOMSource(node); 296 StreamResult result = new StreamResult(writer); 297 transformer.transform(source, result); 298 } catch (TransformerConfigurationException e) { 299 //no complex configurations 300 } catch (TransformerFactoryConfigurationError e) { 301 //no complex configurations 302 } 303 } 304 305 /** 306 * Gets all the elements out of a {@link NodeList}. 307 * @param nodeList the node list 308 * @return the elements 309 */ 310 public static List<Element> toElementList(NodeList nodeList) { 311 List<Element> elements = new ArrayList<Element>(); 312 for (int i = 0; i < nodeList.getLength(); i++) { 313 Node node = nodeList.item(i); 314 if (node instanceof Element) { 315 elements.add((Element) node); 316 } 317 } 318 return elements; 319 } 320 321 /** 322 * Gets the first child element of an element. 323 * @param parent the parent element 324 * @return the first child element or null if there are no child elements 325 */ 326 public static Element getFirstChildElement(Element parent) { 327 return getFirstChildElement((Node) parent); 328 } 329 330 /** 331 * Gets the first child element of a node. 332 * @param parent the node 333 * @return the first child element or null if there are no child elements 334 */ 335 private static Element getFirstChildElement(Node parent) { 336 NodeList nodeList = parent.getChildNodes(); 337 for (int i = 0; i < nodeList.getLength(); i++) { 338 Node node = nodeList.item(i); 339 if (node instanceof Element) { 340 return (Element) node; 341 } 342 } 343 return null; 344 } 345 346 /** 347 * Determines if a node has a particular qualified name. 348 * @param node the node 349 * @param qname the qualified name 350 * @return true if the node has the given qualified name, false if not 351 */ 352 public static boolean hasQName(Node node, QName qname) { 353 return qname.getNamespaceURI().equals(node.getNamespaceURI()) && qname.getLocalPart().equals(node.getLocalName()); 354 } 355 356 private XmlUtils() { 357 //hide 358 } 359}