001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *      http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.oozie.util;
020
021import java.io.ByteArrayInputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.io.StringReader;
025import java.io.StringWriter;
026import java.text.CharacterIterator;
027import java.text.StringCharacterIterator;
028import java.util.Enumeration;
029import java.util.Iterator;
030import java.util.List;
031import java.util.Map;
032import java.util.Properties;
033
034import javax.xml.XMLConstants;
035import javax.xml.parsers.DocumentBuilder;
036import javax.xml.parsers.DocumentBuilderFactory;
037import javax.xml.parsers.ParserConfigurationException;
038import javax.xml.transform.Result;
039import javax.xml.transform.Source;
040import javax.xml.transform.Transformer;
041import javax.xml.transform.TransformerFactory;
042import javax.xml.transform.dom.DOMSource;
043import javax.xml.transform.stream.StreamResult;
044import javax.xml.transform.stream.StreamSource;
045import javax.xml.validation.Schema;
046import javax.xml.validation.SchemaFactory;
047import javax.xml.validation.Validator;
048
049import org.apache.hadoop.conf.Configuration;
050import org.apache.oozie.service.SchemaService;
051import org.apache.oozie.service.SchemaService.SchemaName;
052import org.apache.oozie.service.Services;
053import org.jdom.Comment;
054import org.jdom.Document;
055import org.jdom.Element;
056import org.jdom.JDOMException;
057import org.jdom.Namespace;
058import org.jdom.input.SAXBuilder;
059import org.jdom.output.Format;
060import org.jdom.output.XMLOutputter;
061import org.xml.sax.SAXException;
062
063/**
064 * XML utility methods.
065 */
066public class XmlUtils {
067
068    private static SAXBuilder createSAXBuilder() {
069        SAXBuilder saxBuilder = new SAXBuilder();
070        saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true);
071        saxBuilder.setFeature("http://xml.org/sax/features/external-general-entities", false);
072        saxBuilder.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
073        saxBuilder.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
074        return saxBuilder;
075    }
076
077    /**
078     * Remove comments from any Xml String.
079     *
080     * @param xmlStr XML string to remove comments.
081     * @return String after removing comments.
082     * @throws JDOMException thrown if an error happend while XML parsing.
083     */
084    public static String removeComments(String xmlStr) throws JDOMException {
085        if (xmlStr == null) {
086            return null;
087        }
088        try {
089            SAXBuilder saxBuilder = createSAXBuilder();
090            Document document = saxBuilder.build(new StringReader(xmlStr));
091            removeComments(document);
092            return prettyPrint(document.getRootElement()).toString();
093        }
094        catch (IOException ex) {
095            throw new RuntimeException("It should not happen, " + ex.getMessage(), ex);
096        }
097    }
098
099    private static void removeComments(List l) {
100        for (Iterator i = l.iterator(); i.hasNext();) {
101            Object node = i.next();
102            if (node instanceof Comment) {
103                i.remove();
104            }
105            else {
106                if (node instanceof Element) {
107                    removeComments(((Element) node).getContent());
108                }
109            }
110        }
111    }
112
113    private static void removeComments(Document doc) {
114        removeComments(doc.getContent());
115    }
116
117    /**
118     * Parse a string assuming it is a valid XML document and return an JDOM Element for it.
119     *
120     * @param xmlStr XML string to parse.
121     * @return JDOM element for the parsed XML string.
122     * @throws JDOMException thrown if an error happend while XML parsing.
123     */
124    public static Element parseXml(String xmlStr) throws JDOMException {
125        ParamChecker.notNull(xmlStr, "xmlStr");
126        try {
127            SAXBuilder saxBuilder = createSAXBuilder();
128            Document document = saxBuilder.build(new StringReader(xmlStr));
129            return document.getRootElement();
130        }
131        catch (IOException ex) {
132            throw new RuntimeException("It should not happen, " + ex.getMessage(), ex);
133        }
134    }
135
136    /**
137     * Parse a inputstream assuming it is a valid XML document and return an JDOM Element for it.
138     *
139     * @param is inputstream to parse.
140     * @return JDOM element for the parsed XML string.
141     * @throws JDOMException thrown if an error happend while XML parsing.
142     * @throws IOException thrown if an IO error occurred.
143     */
144    public static Element parseXml(InputStream is) throws JDOMException, IOException {
145        ParamChecker.notNull(is, "is");
146        SAXBuilder saxBuilder = createSAXBuilder();
147        Document document = saxBuilder.build(is);
148        return document.getRootElement();
149    }
150
151    /**
152     * //TODO move this to action registry method Return the value of an attribute from the root element of an XML
153     * document.
154     *
155     * @param filePath path of the XML document.
156     * @param attributeName attribute to retrieve value for.
157     * @return value of the specified attribute.
158     */
159    public static String getRootAttribute(String filePath, String attributeName) {
160        ParamChecker.notNull(filePath, "filePath");
161        ParamChecker.notNull(attributeName, "attributeName");
162        SAXBuilder saxBuilder = createSAXBuilder();
163        try {
164            Document doc = saxBuilder.build(Thread.currentThread().getContextClassLoader().getResourceAsStream(filePath));
165            return doc.getRootElement().getAttributeValue(attributeName);
166        }
167        catch (JDOMException e) {
168            throw new RuntimeException();
169        }
170        catch (IOException e) {
171            throw new RuntimeException();
172        }
173    }
174
175    /**
176     * Pretty print string representation of an XML document that generates the pretty print on lazy mode when the
177     * {@link #toString} method is invoked.
178     */
179    public static class PrettyPrint {
180        private String str;
181        private Element element;
182
183        private PrettyPrint(String str) {
184            this.str = str;
185        }
186
187        private PrettyPrint(Element element) {
188            this.element = ParamChecker.notNull(element, "element");
189        }
190
191        /**
192         * Return the pretty print representation of an XML document.
193         *
194         * @return the pretty print representation of an XML document.
195         */
196        @Override
197        public String toString() {
198            if (str != null) {
199                return str;
200            }
201            else {
202                XMLOutputter outputter = new XMLOutputter();
203                StringWriter stringWriter = new StringWriter();
204                outputter.setFormat(Format.getPrettyFormat());
205                try {
206                    outputter.output(element, stringWriter);
207                }
208                catch (Exception ex) {
209                    throw new RuntimeException(ex);
210                }
211                return stringWriter.toString();
212            }
213        }
214    }
215
216    /**
217     * Return a pretty print string for a JDOM Element.
218     *
219     * @param element JDOM element.
220     * @return pretty print of the given JDOM Element.
221     */
222    public static PrettyPrint prettyPrint(Element element) {
223        return new PrettyPrint(element);
224
225    }
226
227    /**
228     * Return a pretty print string for a XML string. If the given string is not valid XML it returns the original
229     * string.
230     *
231     * @param xmlStr XML string.
232     * @return prettyprint of the given XML string or the original string if the given string is not valid XML.
233     */
234    public static PrettyPrint prettyPrint(String xmlStr) {
235        try {
236            return new PrettyPrint(parseXml(xmlStr));
237        }
238        catch (Exception e) {
239            return new PrettyPrint(xmlStr);
240        }
241    }
242
243    /**
244     * Return a pretty print string for a Configuration object.
245     *
246     * @param conf Configuration object.
247     * @return prettyprint of the given Configuration object.
248     */
249    public static PrettyPrint prettyPrint(Configuration conf) {
250        Element root = new Element("configuration");
251        for (Map.Entry<String, String> entry : conf) {
252            Element property = new Element("property");
253            Element name = new Element("name");
254            name.setText(entry.getKey());
255            Element value = new Element("value");
256            value.setText(entry.getValue());
257            property.addContent(name);
258            property.addContent(value);
259            root.addContent(property);
260        }
261        return new PrettyPrint(root);
262    }
263
264    /**
265     * Schema validation for a given xml. <p>
266     *
267     * @param schema for validation
268     * @param xml to be validated
269     * @throws SAXException in case of validation error
270     * @throws IOException in case of IO error
271     */
272    public static void validateXml(Schema schema, String xml) throws SAXException, IOException {
273        Validator validator = SchemaService.getValidator(schema);
274        validator.validate(new StreamSource(new ByteArrayInputStream(xml.getBytes())));
275    }
276
277    public static void validateData(String xmlData, SchemaName xsdFile) throws SAXException, IOException {
278        if (xmlData == null || xmlData.length() == 0) {
279            return;
280        }
281        javax.xml.validation.Schema schema = Services.get().get(SchemaService.class).getSchema(xsdFile);
282        validateXml(schema, xmlData);
283    }
284
285    /**
286     * Convert Properties to string
287     *
288     * @param props the properties to convert
289     * @return xml string
290     * @throws IOException if there is an error during conversion
291     */
292    public static String writePropToString(Properties props) throws IOException {
293        try {
294            org.w3c.dom.Document doc = getDocumentBuilder().newDocument();
295            org.w3c.dom.Element conf = doc.createElement("configuration");
296            doc.appendChild(conf);
297            conf.appendChild(doc.createTextNode("\n"));
298            for (Enumeration e = props.keys(); e.hasMoreElements();) {
299                String name = (String) e.nextElement();
300                Object object = props.get(name);
301                String value;
302                if (object instanceof String) {
303                    value = (String) object;
304                }
305                else {
306                    continue;
307                }
308                org.w3c.dom.Element propNode = doc.createElement("property");
309                conf.appendChild(propNode);
310
311                org.w3c.dom.Element nameNode = doc.createElement("name");
312                nameNode.appendChild(doc.createTextNode(name.trim()));
313                propNode.appendChild(nameNode);
314
315                org.w3c.dom.Element valueNode = doc.createElement("value");
316                valueNode.appendChild(doc.createTextNode(value.trim()));
317                propNode.appendChild(valueNode);
318
319                conf.appendChild(doc.createTextNode("\n"));
320            }
321
322            Source source = new DOMSource(doc);
323            StringWriter stringWriter = new StringWriter();
324            Result result = new StreamResult(stringWriter);
325            TransformerFactory factory = TransformerFactory.newInstance();
326            factory.setFeature("http://javax.xml.XMLConstants/feature/secure-processing", true);
327            Transformer transformer = factory.newTransformer();
328            transformer.transform(source, result);
329
330            return stringWriter.getBuffer().toString();
331        }
332        catch (Exception e) {
333            throw new IOException(e);
334        }
335    }
336
337    /**
338     * Returns a DocumentBuilder
339     * @return DocumentBuilder
340     * @throws ParserConfigurationException
341     */
342    private static DocumentBuilder getDocumentBuilder() throws ParserConfigurationException {
343        DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
344        docBuilderFactory.setNamespaceAware(true);
345        docBuilderFactory.setXIncludeAware(false);
346        docBuilderFactory.setExpandEntityReferences(false);
347        docBuilderFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true);
348        //Redundant with disallow-doctype, but just in case
349        docBuilderFactory.setFeature("http://xml.org/sax/features/external-general-entities", false);
350        docBuilderFactory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
351        docBuilderFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
352        // ignore all comments inside the xml file
353        docBuilderFactory.setIgnoringComments(true);
354        return docBuilderFactory.newDocumentBuilder();
355    }
356
357    /**
358     * Escape characters for text appearing as XML data, between tags.
359     * <p>
360     * The following characters are replaced with corresponding character entities :
361     * '&lt;' to '&lt;'
362     * '&gt;' to '&gt;'
363     * '&amp;' to '&amp;'
364     * '"' to '&quot;'
365     * "'" to "&#039;"
366     * <p>
367     * Note that JSTL's {@code <c:out>} escapes the exact same set of characters as this method.
368     *
369     * @param aText the text to escape
370     * @return the escaped text
371     */
372    public static String escapeCharsForXML(String aText) {
373        final StringBuilder result = new StringBuilder();
374        final StringCharacterIterator iterator = new StringCharacterIterator(aText);
375        char character = iterator.current();
376        while (character != CharacterIterator.DONE) {
377            if (character == '<') {
378                result.append("&lt;");
379            }
380            else if (character == '>') {
381                result.append("&gt;");
382            }
383            else if (character == '\"') {
384                result.append("&quot;");
385            }
386            else if (character == '\'') {
387                result.append("&#039;");
388            }
389            else if (character == '&') {
390                result.append("&amp;");
391            }
392            else {
393                // the char is not a special one
394                // add it to the result as is
395                result.append(character);
396            }
397            character = iterator.next();
398        }
399        return result.toString();
400    }
401
402    public static Element getSLAElement(Element elem) {
403        Element eSla_1 = elem.getChild("info", Namespace.getNamespace(SchemaService.SLA_NAME_SPACE_URI));
404        Element eSla_2 = elem.getChild("info", Namespace.getNamespace(SchemaService.SLA_NAMESPACE_URI_2));
405        Element eSla = (eSla_2 != null) ? eSla_2 : eSla_1;
406
407        return eSla;
408    }
409
410}