001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.oozie.util; 020 021import java.io.ByteArrayInputStream; 022import java.io.IOException; 023import java.io.InputStream; 024import java.io.StringReader; 025import java.io.StringWriter; 026import java.text.CharacterIterator; 027import java.text.StringCharacterIterator; 028import java.util.Enumeration; 029import java.util.Iterator; 030import java.util.List; 031import java.util.Map; 032import java.util.Properties; 033 034import javax.xml.XMLConstants; 035import javax.xml.parsers.DocumentBuilder; 036import javax.xml.parsers.DocumentBuilderFactory; 037import javax.xml.parsers.ParserConfigurationException; 038import javax.xml.transform.Result; 039import javax.xml.transform.Source; 040import javax.xml.transform.Transformer; 041import javax.xml.transform.TransformerFactory; 042import javax.xml.transform.dom.DOMSource; 043import javax.xml.transform.stream.StreamResult; 044import javax.xml.transform.stream.StreamSource; 045import javax.xml.validation.Schema; 046import javax.xml.validation.SchemaFactory; 047import javax.xml.validation.Validator; 048 049import org.apache.hadoop.conf.Configuration; 050import org.apache.oozie.service.SchemaService; 051import org.apache.oozie.service.SchemaService.SchemaName; 052import org.apache.oozie.service.Services; 053import org.jdom.Comment; 054import org.jdom.Document; 055import org.jdom.Element; 056import org.jdom.JDOMException; 057import org.jdom.Namespace; 058import org.jdom.input.SAXBuilder; 059import org.jdom.output.Format; 060import org.jdom.output.XMLOutputter; 061import org.xml.sax.SAXException; 062 063/** 064 * XML utility methods. 065 */ 066public class XmlUtils { 067 068 private static SAXBuilder createSAXBuilder() { 069 SAXBuilder saxBuilder = new SAXBuilder(); 070 saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true); 071 saxBuilder.setFeature("http://xml.org/sax/features/external-general-entities", false); 072 saxBuilder.setFeature("http://xml.org/sax/features/external-parameter-entities", false); 073 saxBuilder.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 074 return saxBuilder; 075 } 076 077 /** 078 * Remove comments from any Xml String. 079 * 080 * @param xmlStr XML string to remove comments. 081 * @return String after removing comments. 082 * @throws JDOMException thrown if an error happend while XML parsing. 083 */ 084 public static String removeComments(String xmlStr) throws JDOMException { 085 if (xmlStr == null) { 086 return null; 087 } 088 try { 089 SAXBuilder saxBuilder = createSAXBuilder(); 090 Document document = saxBuilder.build(new StringReader(xmlStr)); 091 removeComments(document); 092 return prettyPrint(document.getRootElement()).toString(); 093 } 094 catch (IOException ex) { 095 throw new RuntimeException("It should not happen, " + ex.getMessage(), ex); 096 } 097 } 098 099 private static void removeComments(List l) { 100 for (Iterator i = l.iterator(); i.hasNext();) { 101 Object node = i.next(); 102 if (node instanceof Comment) { 103 i.remove(); 104 } 105 else { 106 if (node instanceof Element) { 107 removeComments(((Element) node).getContent()); 108 } 109 } 110 } 111 } 112 113 private static void removeComments(Document doc) { 114 removeComments(doc.getContent()); 115 } 116 117 /** 118 * Parse a string assuming it is a valid XML document and return an JDOM Element for it. 119 * 120 * @param xmlStr XML string to parse. 121 * @return JDOM element for the parsed XML string. 122 * @throws JDOMException thrown if an error happend while XML parsing. 123 */ 124 public static Element parseXml(String xmlStr) throws JDOMException { 125 ParamChecker.notNull(xmlStr, "xmlStr"); 126 try { 127 SAXBuilder saxBuilder = createSAXBuilder(); 128 Document document = saxBuilder.build(new StringReader(xmlStr)); 129 return document.getRootElement(); 130 } 131 catch (IOException ex) { 132 throw new RuntimeException("It should not happen, " + ex.getMessage(), ex); 133 } 134 } 135 136 /** 137 * Parse a inputstream assuming it is a valid XML document and return an JDOM Element for it. 138 * 139 * @param is inputstream to parse. 140 * @return JDOM element for the parsed XML string. 141 * @throws JDOMException thrown if an error happend while XML parsing. 142 * @throws IOException thrown if an IO error occurred. 143 */ 144 public static Element parseXml(InputStream is) throws JDOMException, IOException { 145 ParamChecker.notNull(is, "is"); 146 SAXBuilder saxBuilder = createSAXBuilder(); 147 Document document = saxBuilder.build(is); 148 return document.getRootElement(); 149 } 150 151 /** 152 * //TODO move this to action registry method Return the value of an attribute from the root element of an XML 153 * document. 154 * 155 * @param filePath path of the XML document. 156 * @param attributeName attribute to retrieve value for. 157 * @return value of the specified attribute. 158 */ 159 public static String getRootAttribute(String filePath, String attributeName) { 160 ParamChecker.notNull(filePath, "filePath"); 161 ParamChecker.notNull(attributeName, "attributeName"); 162 SAXBuilder saxBuilder = createSAXBuilder(); 163 try { 164 Document doc = saxBuilder.build(Thread.currentThread().getContextClassLoader().getResourceAsStream(filePath)); 165 return doc.getRootElement().getAttributeValue(attributeName); 166 } 167 catch (JDOMException e) { 168 throw new RuntimeException(); 169 } 170 catch (IOException e) { 171 throw new RuntimeException(); 172 } 173 } 174 175 /** 176 * Pretty print string representation of an XML document that generates the pretty print on lazy mode when the 177 * {@link #toString} method is invoked. 178 */ 179 public static class PrettyPrint { 180 private String str; 181 private Element element; 182 183 private PrettyPrint(String str) { 184 this.str = str; 185 } 186 187 private PrettyPrint(Element element) { 188 this.element = ParamChecker.notNull(element, "element"); 189 } 190 191 /** 192 * Return the pretty print representation of an XML document. 193 * 194 * @return the pretty print representation of an XML document. 195 */ 196 @Override 197 public String toString() { 198 if (str != null) { 199 return str; 200 } 201 else { 202 XMLOutputter outputter = new XMLOutputter(); 203 StringWriter stringWriter = new StringWriter(); 204 outputter.setFormat(Format.getPrettyFormat()); 205 try { 206 outputter.output(element, stringWriter); 207 } 208 catch (Exception ex) { 209 throw new RuntimeException(ex); 210 } 211 return stringWriter.toString(); 212 } 213 } 214 } 215 216 /** 217 * Return a pretty print string for a JDOM Element. 218 * 219 * @param element JDOM element. 220 * @return pretty print of the given JDOM Element. 221 */ 222 public static PrettyPrint prettyPrint(Element element) { 223 return new PrettyPrint(element); 224 225 } 226 227 /** 228 * Return a pretty print string for a XML string. If the given string is not valid XML it returns the original 229 * string. 230 * 231 * @param xmlStr XML string. 232 * @return prettyprint of the given XML string or the original string if the given string is not valid XML. 233 */ 234 public static PrettyPrint prettyPrint(String xmlStr) { 235 try { 236 return new PrettyPrint(parseXml(xmlStr)); 237 } 238 catch (Exception e) { 239 return new PrettyPrint(xmlStr); 240 } 241 } 242 243 /** 244 * Return a pretty print string for a Configuration object. 245 * 246 * @param conf Configuration object. 247 * @return prettyprint of the given Configuration object. 248 */ 249 public static PrettyPrint prettyPrint(Configuration conf) { 250 Element root = new Element("configuration"); 251 for (Map.Entry<String, String> entry : conf) { 252 Element property = new Element("property"); 253 Element name = new Element("name"); 254 name.setText(entry.getKey()); 255 Element value = new Element("value"); 256 value.setText(entry.getValue()); 257 property.addContent(name); 258 property.addContent(value); 259 root.addContent(property); 260 } 261 return new PrettyPrint(root); 262 } 263 264 /** 265 * Schema validation for a given xml. <p> 266 * 267 * @param schema for validation 268 * @param xml to be validated 269 * @throws SAXException in case of validation error 270 * @throws IOException in case of IO error 271 */ 272 public static void validateXml(Schema schema, String xml) throws SAXException, IOException { 273 Validator validator = SchemaService.getValidator(schema); 274 validator.validate(new StreamSource(new ByteArrayInputStream(xml.getBytes()))); 275 } 276 277 public static void validateData(String xmlData, SchemaName xsdFile) throws SAXException, IOException { 278 if (xmlData == null || xmlData.length() == 0) { 279 return; 280 } 281 javax.xml.validation.Schema schema = Services.get().get(SchemaService.class).getSchema(xsdFile); 282 validateXml(schema, xmlData); 283 } 284 285 /** 286 * Convert Properties to string 287 * 288 * @param props the properties to convert 289 * @return xml string 290 * @throws IOException if there is an error during conversion 291 */ 292 public static String writePropToString(Properties props) throws IOException { 293 try { 294 org.w3c.dom.Document doc = getDocumentBuilder().newDocument(); 295 org.w3c.dom.Element conf = doc.createElement("configuration"); 296 doc.appendChild(conf); 297 conf.appendChild(doc.createTextNode("\n")); 298 for (Enumeration e = props.keys(); e.hasMoreElements();) { 299 String name = (String) e.nextElement(); 300 Object object = props.get(name); 301 String value; 302 if (object instanceof String) { 303 value = (String) object; 304 } 305 else { 306 continue; 307 } 308 org.w3c.dom.Element propNode = doc.createElement("property"); 309 conf.appendChild(propNode); 310 311 org.w3c.dom.Element nameNode = doc.createElement("name"); 312 nameNode.appendChild(doc.createTextNode(name.trim())); 313 propNode.appendChild(nameNode); 314 315 org.w3c.dom.Element valueNode = doc.createElement("value"); 316 valueNode.appendChild(doc.createTextNode(value.trim())); 317 propNode.appendChild(valueNode); 318 319 conf.appendChild(doc.createTextNode("\n")); 320 } 321 322 Source source = new DOMSource(doc); 323 StringWriter stringWriter = new StringWriter(); 324 Result result = new StreamResult(stringWriter); 325 TransformerFactory factory = TransformerFactory.newInstance(); 326 factory.setFeature("http://javax.xml.XMLConstants/feature/secure-processing", true); 327 Transformer transformer = factory.newTransformer(); 328 transformer.transform(source, result); 329 330 return stringWriter.getBuffer().toString(); 331 } 332 catch (Exception e) { 333 throw new IOException(e); 334 } 335 } 336 337 /** 338 * Returns a DocumentBuilder 339 * @return DocumentBuilder 340 * @throws ParserConfigurationException 341 */ 342 private static DocumentBuilder getDocumentBuilder() throws ParserConfigurationException { 343 DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance(); 344 docBuilderFactory.setNamespaceAware(true); 345 docBuilderFactory.setXIncludeAware(false); 346 docBuilderFactory.setExpandEntityReferences(false); 347 docBuilderFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true); 348 //Redundant with disallow-doctype, but just in case 349 docBuilderFactory.setFeature("http://xml.org/sax/features/external-general-entities", false); 350 docBuilderFactory.setFeature("http://xml.org/sax/features/external-parameter-entities", false); 351 docBuilderFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 352 // ignore all comments inside the xml file 353 docBuilderFactory.setIgnoringComments(true); 354 return docBuilderFactory.newDocumentBuilder(); 355 } 356 357 /** 358 * Escape characters for text appearing as XML data, between tags. 359 * <p> 360 * The following characters are replaced with corresponding character entities : 361 * '<' to '<' 362 * '>' to '>' 363 * '&' to '&' 364 * '"' to '"' 365 * "'" to "'" 366 * <p> 367 * Note that JSTL's {@code <c:out>} escapes the exact same set of characters as this method. 368 * 369 * @param aText the text to escape 370 * @return the escaped text 371 */ 372 public static String escapeCharsForXML(String aText) { 373 final StringBuilder result = new StringBuilder(); 374 final StringCharacterIterator iterator = new StringCharacterIterator(aText); 375 char character = iterator.current(); 376 while (character != CharacterIterator.DONE) { 377 if (character == '<') { 378 result.append("<"); 379 } 380 else if (character == '>') { 381 result.append(">"); 382 } 383 else if (character == '\"') { 384 result.append("""); 385 } 386 else if (character == '\'') { 387 result.append("'"); 388 } 389 else if (character == '&') { 390 result.append("&"); 391 } 392 else { 393 // the char is not a special one 394 // add it to the result as is 395 result.append(character); 396 } 397 character = iterator.next(); 398 } 399 return result.toString(); 400 } 401 402 public static Element getSLAElement(Element elem) { 403 Element eSla_1 = elem.getChild("info", Namespace.getNamespace(SchemaService.SLA_NAME_SPACE_URI)); 404 Element eSla_2 = elem.getChild("info", Namespace.getNamespace(SchemaService.SLA_NAMESPACE_URI_2)); 405 Element eSla = (eSla_2 != null) ? eSla_2 : eSla_1; 406 407 return eSla; 408 } 409 410}