001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *      http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.oozie.command.coord;
020
021import java.io.IOException;
022import java.io.InputStreamReader;
023import java.io.Reader;
024import java.io.StringReader;
025import java.io.StringWriter;
026import java.net.URI;
027import java.net.URISyntaxException;
028import java.util.ArrayList;
029import java.util.Calendar;
030import java.util.Date;
031import java.util.HashMap;
032import java.util.HashSet;
033import java.util.Iterator;
034import java.util.List;
035import java.util.Map;
036import java.util.Set;
037import java.util.TreeSet;
038
039import javax.xml.transform.stream.StreamSource;
040import javax.xml.validation.Validator;
041
042import org.apache.hadoop.conf.Configuration;
043import org.apache.hadoop.fs.FileSystem;
044import org.apache.hadoop.fs.Path;
045import org.apache.oozie.CoordinatorJobBean;
046import org.apache.oozie.ErrorCode;
047import org.apache.oozie.client.CoordinatorJob;
048import org.apache.oozie.client.Job;
049import org.apache.oozie.client.OozieClient;
050import org.apache.oozie.client.CoordinatorJob.Execution;
051import org.apache.oozie.command.CommandException;
052import org.apache.oozie.command.SubmitTransitionXCommand;
053import org.apache.oozie.command.bundle.BundleStatusUpdateXCommand;
054import org.apache.oozie.coord.CoordELEvaluator;
055import org.apache.oozie.coord.CoordELFunctions;
056import org.apache.oozie.coord.CoordUtils;
057import org.apache.oozie.coord.CoordinatorJobException;
058import org.apache.oozie.coord.TimeUnit;
059import org.apache.oozie.coord.input.logic.CoordInputLogicEvaluator;
060import org.apache.oozie.executor.jpa.CoordJobQueryExecutor;
061import org.apache.oozie.executor.jpa.JPAExecutorException;
062import org.apache.oozie.service.CoordMaterializeTriggerService;
063import org.apache.oozie.service.ConfigurationService;
064import org.apache.oozie.service.HadoopAccessorException;
065import org.apache.oozie.service.HadoopAccessorService;
066import org.apache.oozie.service.JPAService;
067import org.apache.oozie.service.SchemaService;
068import org.apache.oozie.service.Service;
069import org.apache.oozie.service.Services;
070import org.apache.oozie.service.UUIDService;
071import org.apache.oozie.service.SchemaService.SchemaName;
072import org.apache.oozie.service.UUIDService.ApplicationType;
073import org.apache.oozie.util.ConfigUtils;
074import org.apache.oozie.util.DateUtils;
075import org.apache.oozie.util.ELEvaluator;
076import org.apache.oozie.util.ELUtils;
077import org.apache.oozie.util.IOUtils;
078import org.apache.oozie.util.InstrumentUtils;
079import org.apache.oozie.util.LogUtils;
080import org.apache.oozie.util.ParamChecker;
081import org.apache.oozie.util.ParameterVerifier;
082import org.apache.oozie.util.ParameterVerifierException;
083import org.apache.oozie.util.PropertiesUtils;
084import org.apache.oozie.util.XConfiguration;
085import org.apache.oozie.util.XmlUtils;
086import org.jdom.Attribute;
087import org.jdom.Element;
088import org.jdom.JDOMException;
089import org.jdom.Namespace;
090import org.xml.sax.SAXException;
091
092/**
093 * This class provides the functionalities to resolve a coordinator job XML and write the job information into a DB
094 * table.
095 * <p>
096 * Specifically it performs the following functions: 1. Resolve all the variables or properties using job
097 * configurations. 2. Insert all datasets definition as part of the &lt;data-in&gt; and &lt;data-out&gt; tags. 3. Validate the XML
098 * at runtime.
099 */
100public class CoordSubmitXCommand extends SubmitTransitionXCommand {
101
102    protected Configuration conf;
103    protected final String bundleId;
104    protected final String coordName;
105    protected boolean dryrun;
106    protected JPAService jpaService = null;
107    private CoordinatorJob.Status prevStatus = CoordinatorJob.Status.PREP;
108
109    public static final String CONFIG_DEFAULT = "coord-config-default.xml";
110    public static final String COORDINATOR_XML_FILE = "coordinator.xml";
111    public final String COORD_INPUT_EVENTS ="input-events";
112    public final String COORD_OUTPUT_EVENTS = "output-events";
113    public final String COORD_INPUT_EVENTS_DATA_IN ="data-in";
114    public final String COORD_OUTPUT_EVENTS_DATA_OUT = "data-out";
115
116    private static final Set<String> DISALLOWED_USER_PROPERTIES = new HashSet<String>();
117    private static final Set<String> DISALLOWED_DEFAULT_PROPERTIES = new HashSet<String>();
118
119    protected CoordinatorJobBean coordJob = null;
120    /**
121     * Default timeout for normal jobs, in minutes, after which coordinator input check will timeout
122     */
123    public static final String CONF_DEFAULT_TIMEOUT_NORMAL = Service.CONF_PREFIX + "coord.normal.default.timeout";
124
125    public static final String CONF_DEFAULT_CONCURRENCY = Service.CONF_PREFIX + "coord.default.concurrency";
126
127    public static final String CONF_DEFAULT_THROTTLE = Service.CONF_PREFIX + "coord.default.throttle";
128
129    public static final String CONF_MAT_THROTTLING_FACTOR = Service.CONF_PREFIX
130            + "coord.materialization.throttling.factor";
131
132    /**
133     * Default MAX timeout in minutes, after which coordinator input check will timeout
134     */
135    public static final String CONF_DEFAULT_MAX_TIMEOUT = Service.CONF_PREFIX + "coord.default.max.timeout";
136
137    public static final String CONF_QUEUE_SIZE = Service.CONF_PREFIX + "CallableQueueService.queue.size";
138
139    public static final String CONF_CHECK_MAX_FREQUENCY = Service.CONF_PREFIX + "coord.check.maximum.frequency";
140
141    private ELEvaluator evalFreq = null;
142    private ELEvaluator evalNofuncs = null;
143    private ELEvaluator evalData = null;
144    private ELEvaluator evalInst = null;
145    private ELEvaluator evalAction = null;
146    private ELEvaluator evalSla = null;
147    private ELEvaluator evalTimeout = null;
148    private ELEvaluator evalInitialInstance = null;
149
150    static {
151        String[] badUserProps = { PropertiesUtils.YEAR, PropertiesUtils.MONTH, PropertiesUtils.DAY,
152                PropertiesUtils.HOUR, PropertiesUtils.MINUTE, PropertiesUtils.DAYS, PropertiesUtils.HOURS,
153                PropertiesUtils.MINUTES, PropertiesUtils.KB, PropertiesUtils.MB, PropertiesUtils.GB,
154                PropertiesUtils.TB, PropertiesUtils.PB, PropertiesUtils.RECORDS, PropertiesUtils.MAP_IN,
155                PropertiesUtils.MAP_OUT, PropertiesUtils.REDUCE_IN, PropertiesUtils.REDUCE_OUT, PropertiesUtils.GROUPS };
156        PropertiesUtils.createPropertySet(badUserProps, DISALLOWED_USER_PROPERTIES);
157
158        String[] badDefaultProps = { PropertiesUtils.HADOOP_USER};
159        PropertiesUtils.createPropertySet(badUserProps, DISALLOWED_DEFAULT_PROPERTIES);
160        PropertiesUtils.createPropertySet(badDefaultProps, DISALLOWED_DEFAULT_PROPERTIES);
161    }
162
163    /**
164     * Constructor to create the Coordinator Submit Command.
165     *
166     * @param conf : Configuration for Coordinator job
167     */
168    public CoordSubmitXCommand(Configuration conf) {
169        super("coord_submit", "coord_submit", 1);
170        this.conf = ParamChecker.notNull(conf, "conf");
171        this.bundleId = null;
172        this.coordName = null;
173    }
174
175    /**
176     * Constructor to create the Coordinator Submit Command by bundle job.
177     *
178     * @param conf : Configuration for Coordinator job
179     * @param bundleId : bundle id
180     * @param coordName : coord name
181     */
182    protected CoordSubmitXCommand(Configuration conf, String bundleId, String coordName) {
183        super("coord_submit", "coord_submit", 1);
184        this.conf = ParamChecker.notNull(conf, "conf");
185        this.bundleId = ParamChecker.notEmpty(bundleId, "bundleId");
186        this.coordName = ParamChecker.notEmpty(coordName, "coordName");
187    }
188
189    /**
190     * Constructor to create the Coordinator Submit Command.
191     *
192     * @param dryrun : if dryrun
193     * @param conf : Configuration for Coordinator job
194     */
195    public CoordSubmitXCommand(boolean dryrun, Configuration conf) {
196        this(conf);
197        this.dryrun = dryrun;
198    }
199
200    /* (non-Javadoc)
201     * @see org.apache.oozie.command.XCommand#execute()
202     */
203    @Override
204    protected String submit() throws CommandException {
205        LOG.info("STARTED Coordinator Submit");
206        String jobId = submitJob();
207        LOG.info("ENDED Coordinator Submit jobId=" + jobId);
208        return jobId;
209    }
210
211    protected String submitJob() throws CommandException {
212        String jobId = null;
213        InstrumentUtils.incrJobCounter(getName(), 1, getInstrumentation());
214
215        boolean exceptionOccured = false;
216        try {
217            mergeDefaultConfig();
218
219            String appXml = readAndValidateXml();
220            coordJob.setOrigJobXml(appXml);
221            LOG.debug("jobXml after initial validation " + XmlUtils.prettyPrint(appXml).toString());
222
223            Element eXml = XmlUtils.parseXml(appXml);
224
225            String appNamespace = readAppNamespace(eXml);
226            coordJob.setAppNamespace(appNamespace);
227
228            ParameterVerifier.verifyParameters(conf, eXml);
229
230            appXml = XmlUtils.removeComments(appXml);
231            initEvaluators();
232            Element eJob = basicResolveAndIncludeDS(appXml, conf, coordJob);
233
234            validateCoordinatorJob();
235
236            // checking if the coordinator application data input/output events
237            // specify multiple data instance values in erroneous manner
238            checkMultipleTimeInstances(eJob, COORD_INPUT_EVENTS, COORD_INPUT_EVENTS_DATA_IN);
239            checkMultipleTimeInstances(eJob, COORD_OUTPUT_EVENTS, COORD_OUTPUT_EVENTS_DATA_OUT);
240
241            LOG.debug("jobXml after all validation " + XmlUtils.prettyPrint(eJob).toString());
242
243            jobId = storeToDB(appXml, eJob, coordJob);
244            // log job info for coordinator job
245            LogUtils.setLogInfo(coordJob);
246
247            if (!dryrun) {
248                queueMaterializeTransitionXCommand(jobId);
249            }
250            else {
251                return getDryRun(coordJob);
252            }
253        }
254        catch (JDOMException jex) {
255            exceptionOccured = true;
256            LOG.warn("ERROR: ", jex);
257            throw new CommandException(ErrorCode.E0700, jex.getMessage(), jex);
258        }
259        catch (CoordinatorJobException cex) {
260            exceptionOccured = true;
261            LOG.warn("ERROR:  ", cex);
262            throw new CommandException(cex);
263        }
264        catch (ParameterVerifierException pex) {
265            exceptionOccured = true;
266            LOG.warn("ERROR: ", pex);
267            throw new CommandException(pex);
268        }
269        catch (IllegalArgumentException iex) {
270            exceptionOccured = true;
271            LOG.warn("ERROR:  ", iex);
272            throw new CommandException(ErrorCode.E1003, iex.getMessage(), iex);
273        }
274        catch (Exception ex) {
275            exceptionOccured = true;
276            LOG.warn("ERROR:  ", ex);
277            throw new CommandException(ErrorCode.E0803, ex.getMessage(), ex);
278        }
279        finally {
280            if (exceptionOccured) {
281                if (coordJob.getId() == null || coordJob.getId().equalsIgnoreCase("")) {
282                    coordJob.setStatus(CoordinatorJob.Status.FAILED);
283                    coordJob.resetPending();
284                }
285            }
286        }
287        return jobId;
288    }
289
290    /**
291     * Gets the dryrun output.
292     *
293     * @param coordJob the coordinatorJobBean
294     * @return the dry run
295     * @throws Exception the exception
296     */
297    protected String getDryRun(CoordinatorJobBean coordJob) throws Exception{
298        int materializationWindow = ConfigurationService
299                .getInt(CoordMaterializeTriggerService.CONF_MATERIALIZATION_WINDOW);
300        Date startTime = coordJob.getStartTime();
301        long startTimeMilli = startTime.getTime();
302        long endTimeMilli = startTimeMilli + (materializationWindow * 1000);
303        Date jobEndTime = coordJob.getEndTime();
304        Date endTime = new Date(endTimeMilli);
305        if (endTime.compareTo(jobEndTime) > 0) {
306            endTime = jobEndTime;
307        }
308        String jobId = coordJob.getId();
309        LOG.info("[" + jobId + "]: Update status to RUNNING");
310        coordJob.setStatus(Job.Status.RUNNING);
311        coordJob.setPending();
312        Configuration jobConf = null;
313        try {
314            jobConf = new XConfiguration(new StringReader(coordJob.getConf()));
315        }
316        catch (IOException e1) {
317            LOG.warn("Configuration parse error. read from DB :" + coordJob.getConf(), e1);
318        }
319        String action = new CoordMaterializeTransitionXCommand(coordJob, materializationWindow, startTime,
320                endTime).materializeActions(true);
321        String output = coordJob.getJobXml() + System.getProperty("line.separator")
322        + "***actions for instance***" + action;
323        return output;
324    }
325
326    /**
327     * Queue MaterializeTransitionXCommand
328     */
329    protected void queueMaterializeTransitionXCommand(String jobId) {
330        int materializationWindow = ConfigurationService
331                .getInt(CoordMaterializeTriggerService.CONF_MATERIALIZATION_WINDOW);
332        queue(new CoordMaterializeTransitionXCommand(jobId, materializationWindow), 100);
333    }
334
335    /**
336     * Method that validates values in the definition for correctness. Placeholder to add more.
337     */
338    private void validateCoordinatorJob() throws Exception {
339        // check if startTime < endTime
340        if (!coordJob.getStartTime().before(coordJob.getEndTime())) {
341            throw new IllegalArgumentException("Coordinator Start Time must be earlier than End Time.");
342        }
343
344        try {
345            // Check if a coord job with cron frequency will materialize actions
346            int freq = Integer.parseInt(coordJob.getFrequency());
347
348            // Check if the frequency is faster than 5 min if enabled
349            if (ConfigurationService.getBoolean(CONF_CHECK_MAX_FREQUENCY)) {
350                CoordinatorJob.Timeunit unit = coordJob.getTimeUnit();
351                if (freq == 0 || (freq < 5 && unit == CoordinatorJob.Timeunit.MINUTE)) {
352                    throw new IllegalArgumentException("Coordinator job with frequency [" + freq +
353                            "] minutes is faster than allowed maximum of 5 minutes ("
354                            + CONF_CHECK_MAX_FREQUENCY + " is set to true)");
355                }
356            }
357        } catch (NumberFormatException e) {
358            Date start = coordJob.getStartTime();
359            Calendar cal = Calendar.getInstance();
360            cal.setTime(start);
361            cal.add(Calendar.MINUTE, -1);
362            start = cal.getTime();
363
364            Date nextTime = CoordCommandUtils.getNextValidActionTimeForCronFrequency(start, coordJob);
365            if (nextTime == null) {
366                throw new IllegalArgumentException("Invalid coordinator cron frequency: " + coordJob.getFrequency());
367            }
368            if (!nextTime.before(coordJob.getEndTime())) {
369                throw new IllegalArgumentException("Coordinator job with frequency '" +
370                        coordJob.getFrequency() + "' materializes no actions between start and end time.");
371            }
372        }
373    }
374
375  /*
376  * Check against multiple data instance values inside a single <instance> <start-instance> or <end-instance> tag
377  * If found, the job is not submitted and user is informed to correct the error,
378  *  instead of defaulting to the first instance value in the list
379  */
380    private void checkMultipleTimeInstances(Element eCoordJob, String eventType, String dataType) throws CoordinatorJobException {
381        Element eventsSpec, dataSpec, instance;
382        List<Element> instanceSpecList;
383        Namespace ns = eCoordJob.getNamespace();
384        String instanceValue;
385        eventsSpec = eCoordJob.getChild(eventType, ns);
386        if (eventsSpec != null) {
387            dataSpec = eventsSpec.getChild(dataType, ns);
388            if (dataSpec != null) {
389                // In case of input-events, there can be multiple child <instance> datasets.
390                // Iterating to ensure none of them have errors
391                instanceSpecList = dataSpec.getChildren("instance", ns);
392                Iterator instanceIter = instanceSpecList.iterator();
393                while(instanceIter.hasNext()) {
394                    instance = ((Element) instanceIter.next());
395                    if(instance.getContentSize() == 0) { //empty string or whitespace
396                        throw new CoordinatorJobException(ErrorCode.E1021, "<instance> tag within " + eventType + " is empty!");
397                    }
398                    instanceValue = instance.getContent(0).toString();
399                    boolean isInvalid = false;
400                    try {
401                        isInvalid = evalAction.checkForExistence(instanceValue, ",");
402                    } catch (Exception e) {
403                        handleELParseException(eventType, dataType, instanceValue);
404                    }
405                    if (isInvalid) { // reaching this block implies instance is not empty i.e. length > 0
406                        handleExpresionWithMultipleInstances(eventType, dataType, instanceValue);
407                    }
408                }
409
410                // In case of input-events, there can be multiple child <start-instance> datasets.
411                // Iterating to ensure none of them have errors
412                instanceSpecList = dataSpec.getChildren("start-instance", ns);
413                instanceIter = instanceSpecList.iterator();
414                while(instanceIter.hasNext()) {
415                    instance = ((Element) instanceIter.next());
416                    if(instance.getContentSize() == 0) { //empty string or whitespace
417                        throw new CoordinatorJobException(ErrorCode.E1021, "<start-instance> tag within " + eventType
418                                + " is empty!");
419                    }
420                    instanceValue = instance.getContent(0).toString();
421                    boolean isInvalid = false;
422                    try {
423                        isInvalid = evalAction.checkForExistence(instanceValue, ",");
424                    } catch (Exception e) {
425                        handleELParseException(eventType, dataType, instanceValue);
426                    }
427                    if (isInvalid) { // reaching this block implies start instance is not empty i.e. length > 0
428                        handleExpresionWithStartMultipleInstances(eventType, dataType, instanceValue);
429                    }
430                }
431
432                // In case of input-events, there can be multiple child <end-instance> datasets.
433                // Iterating to ensure none of them have errors
434                instanceSpecList = dataSpec.getChildren("end-instance", ns);
435                instanceIter = instanceSpecList.iterator();
436                while(instanceIter.hasNext()) {
437                    instance = ((Element) instanceIter.next());
438                    if(instance.getContentSize() == 0) { //empty string or whitespace
439                        throw new CoordinatorJobException(ErrorCode.E1021, "<end-instance> tag within " + eventType + " is empty!");
440                    }
441                    instanceValue = instance.getContent(0).toString();
442                    boolean isInvalid = false;
443                    try {
444                        isInvalid = evalAction.checkForExistence(instanceValue, ",");
445                    } catch (Exception e) {
446                        handleELParseException(eventType, dataType, instanceValue);
447                    }
448                    if (isInvalid) { // reaching this block implies instance is not empty i.e. length > 0
449                        handleExpresionWithMultipleEndInstances(eventType, dataType, instanceValue);
450                    }
451                }
452
453            }
454        }
455    }
456
457    private void handleELParseException(String eventType, String dataType, String instanceValue)
458            throws CoordinatorJobException {
459        String correctAction = null;
460        if(dataType.equals(COORD_INPUT_EVENTS_DATA_IN)) {
461            correctAction = "Coordinator app definition should have valid <instance> tag for data-in";
462        } else if(dataType.equals(COORD_OUTPUT_EVENTS_DATA_OUT)) {
463            correctAction = "Coordinator app definition should have valid <instance> tag for data-out";
464        }
465        throw new CoordinatorJobException(ErrorCode.E1021, eventType + " instance '" + instanceValue
466                + "' is not valid. Coordinator job NOT SUBMITTED. " + correctAction);
467    }
468
469    private void handleExpresionWithMultipleInstances(String eventType, String dataType, String instanceValue)
470            throws CoordinatorJobException {
471        String correctAction = null;
472        if(dataType.equals(COORD_INPUT_EVENTS_DATA_IN)) {
473            correctAction = "Coordinator app definition should have separate <instance> tag per data-in instance";
474        } else if(dataType.equals(COORD_OUTPUT_EVENTS_DATA_OUT)) {
475            correctAction = "Coordinator app definition can have only one <instance> tag per data-out instance";
476        }
477        throw new CoordinatorJobException(ErrorCode.E1021, eventType + " instance '" + instanceValue
478                + "' contains more than one date instance. Coordinator job NOT SUBMITTED. " + correctAction);
479    }
480
481    private void handleExpresionWithStartMultipleInstances(String eventType, String dataType, String instanceValue)
482            throws CoordinatorJobException {
483        String correctAction = "Coordinator app definition should not have multiple start-instances";
484        throw new CoordinatorJobException(ErrorCode.E1021, eventType + " start-instance '" + instanceValue
485                + "' contains more than one date start-instance. Coordinator job NOT SUBMITTED. " + correctAction);
486    }
487
488    private void handleExpresionWithMultipleEndInstances(String eventType, String dataType, String instanceValue)
489            throws CoordinatorJobException {
490        String correctAction = "Coordinator app definition should not have multiple end-instances";
491        throw new CoordinatorJobException(ErrorCode.E1021, eventType + " end-instance '" + instanceValue
492                + "' contains more than one date end-instance. Coordinator job NOT SUBMITTED. " + correctAction);
493    }
494    /**
495     * Read the application XML and validate against coordinator Schema
496     *
497     * @return validated coordinator XML
498     * @throws CoordinatorJobException thrown if unable to read or validate coordinator xml
499     */
500    protected String readAndValidateXml() throws CoordinatorJobException {
501        String appPath = ParamChecker.notEmpty(conf.get(OozieClient.COORDINATOR_APP_PATH),
502                OozieClient.COORDINATOR_APP_PATH);
503        String coordXml = readDefinition(appPath);
504        validateXml(coordXml);
505        return coordXml;
506    }
507
508    /**
509     * Validate against Coordinator XSD file
510     *
511     * @param xmlContent : Input coordinator xml
512     * @throws CoordinatorJobException thrown if unable to validate coordinator xml
513     */
514    private void validateXml(String xmlContent) throws CoordinatorJobException {
515        try {
516            Validator validator = Services.get().get(SchemaService.class).getValidator(SchemaName.COORDINATOR);
517            validator.validate(new StreamSource(new StringReader(xmlContent)));
518        }
519        catch (SAXException ex) {
520            LOG.warn("SAXException :", ex);
521            throw new CoordinatorJobException(ErrorCode.E0701, ex.getMessage(), ex);
522        }
523        catch (IOException ex) {
524            LOG.warn("IOException :", ex);
525            throw new CoordinatorJobException(ErrorCode.E0702, ex.getMessage(), ex);
526        }
527    }
528
529    /**
530     * Read the application XML schema namespace
531     *
532     * @param coordXmlElement input coordinator xml Element
533     * @return app xml namespace
534     * @throws CoordinatorJobException
535     */
536    private String readAppNamespace(Element coordXmlElement) throws CoordinatorJobException {
537        Namespace ns = coordXmlElement.getNamespace();
538        if (ns != null && bundleId != null && ns.getURI().equals(SchemaService.COORDINATOR_NAMESPACE_URI_1)) {
539            throw new CoordinatorJobException(ErrorCode.E1319, "bundle app can not submit coordinator namespace "
540                    + SchemaService.COORDINATOR_NAMESPACE_URI_1 + ", please use 0.2 or later");
541        }
542        if (ns != null) {
543            return ns.getURI();
544        }
545        else {
546            throw new CoordinatorJobException(ErrorCode.E0700, "the application xml namespace is not given");
547        }
548    }
549
550    /**
551     * Merge default configuration with user-defined configuration.
552     *
553     * @throws CommandException thrown if failed to read or merge configurations
554     */
555    protected void mergeDefaultConfig() throws CommandException {
556        Path configDefault = null;
557        try {
558            String coordAppPathStr = conf.get(OozieClient.COORDINATOR_APP_PATH);
559            Path coordAppPath = new Path(coordAppPathStr);
560            String user = ParamChecker.notEmpty(conf.get(OozieClient.USER_NAME), OozieClient.USER_NAME);
561            HadoopAccessorService has = Services.get().get(HadoopAccessorService.class);
562            Configuration fsConf = has.createConfiguration(coordAppPath.toUri().getAuthority());
563            FileSystem fs = has.createFileSystem(user, coordAppPath.toUri(), fsConf);
564
565            // app path could be a directory
566            if (!fs.isFile(coordAppPath)) {
567                configDefault = new Path(coordAppPath, CONFIG_DEFAULT);
568            } else {
569                configDefault = new Path(coordAppPath.getParent(), CONFIG_DEFAULT);
570            }
571
572            if (fs.exists(configDefault)) {
573                Configuration defaultConf = new XConfiguration(fs.open(configDefault));
574                PropertiesUtils.checkDisallowedProperties(defaultConf, DISALLOWED_DEFAULT_PROPERTIES);
575                XConfiguration.injectDefaults(defaultConf, conf);
576            }
577            else {
578                LOG.info("configDefault Doesn't exist " + configDefault);
579            }
580            PropertiesUtils.checkDisallowedProperties(conf, DISALLOWED_USER_PROPERTIES);
581
582            // Resolving all variables in the job properties.
583            // This ensures the Hadoop Configuration semantics is preserved.
584            XConfiguration resolvedVarsConf = new XConfiguration();
585            for (Map.Entry<String, String> entry : conf) {
586                resolvedVarsConf.set(entry.getKey(), conf.get(entry.getKey()));
587            }
588            conf = resolvedVarsConf;
589        }
590        catch (IOException e) {
591            throw new CommandException(ErrorCode.E0702, e.getMessage() + " : Problem reading default config "
592                    + configDefault, e);
593        }
594        catch (HadoopAccessorException e) {
595            throw new CommandException(e);
596        }
597        LOG.debug("Merged CONF :" + XmlUtils.prettyPrint(conf).toString());
598    }
599
600    /**
601     * The method resolve all the variables that are defined in configuration. It also include the data set definition
602     * from dataset file into XML.
603     *
604     * @param appXml : Original job XML
605     * @param conf : Configuration of the job
606     * @param coordJob : Coordinator job bean to be populated.
607     * @return Resolved and modified job XML element.
608     * @throws CoordinatorJobException thrown if failed to resolve basic entities or include referred datasets
609     * @throws Exception thrown if failed to resolve basic entities or include referred datasets
610     */
611    public Element basicResolveAndIncludeDS(String appXml, Configuration conf, CoordinatorJobBean coordJob)
612    throws CoordinatorJobException, Exception {
613        Element basicResolvedApp = resolveInitial(conf, appXml, coordJob);
614        includeDataSets(basicResolvedApp, conf);
615        return basicResolvedApp;
616    }
617
618    /**
619     * Insert data set into data-in and data-out tags.
620     *
621     * @param eAppXml : coordinator application XML
622     * @param eDatasets : DataSet XML
623     */
624    @SuppressWarnings("unchecked")
625    private void insertDataSet(Element eAppXml, Element eDatasets) {
626        // Adding DS definition in the coordinator XML
627        Element inputList = eAppXml.getChild("input-events", eAppXml.getNamespace());
628        if (inputList != null) {
629            for (Element dataIn : (List<Element>) inputList.getChildren("data-in", eAppXml.getNamespace())) {
630                Element eDataset = findDataSet(eDatasets, dataIn.getAttributeValue("dataset"));
631                dataIn.getContent().add(0, eDataset);
632            }
633        }
634        Element outputList = eAppXml.getChild("output-events", eAppXml.getNamespace());
635        if (outputList != null) {
636            for (Element dataOut : (List<Element>) outputList.getChildren("data-out", eAppXml.getNamespace())) {
637                Element eDataset = findDataSet(eDatasets, dataOut.getAttributeValue("dataset"));
638                dataOut.getContent().add(0, eDataset);
639            }
640        }
641    }
642
643    /**
644     * Find a specific dataset from a list of Datasets.
645     *
646     * @param eDatasets : List of data sets
647     * @param name : queried data set name
648     * @return one Dataset element. otherwise throw Exception
649     */
650    @SuppressWarnings("unchecked")
651    private static Element findDataSet(Element eDatasets, String name) {
652        for (Element eDataset : (List<Element>) eDatasets.getChildren("dataset", eDatasets.getNamespace())) {
653            if (eDataset.getAttributeValue("name").equals(name)) {
654                eDataset = (Element) eDataset.clone();
655                eDataset.detach();
656                return eDataset;
657            }
658        }
659        throw new RuntimeException("undefined dataset: " + name);
660    }
661
662    /**
663     * Initialize all the required EL Evaluators.
664     */
665    protected void initEvaluators() {
666        evalFreq = CoordELEvaluator.createELEvaluatorForGroup(conf, "coord-job-submit-freq");
667        evalNofuncs = CoordELEvaluator.createELEvaluatorForGroup(conf, "coord-job-submit-nofuncs");
668        evalInst = CoordELEvaluator.createELEvaluatorForGroup(conf, "coord-job-submit-instances");
669        evalAction = CoordELEvaluator.createELEvaluatorForGroup(conf, "coord-action-start");
670        evalTimeout = CoordELEvaluator.createELEvaluatorForGroup(conf, "coord-job-wait-timeout");
671        evalInitialInstance = CoordELEvaluator.createELEvaluatorForGroup(conf, "coord-job-submit-initial-instance");
672
673    }
674
675    /**
676     * Resolve basic entities using job Configuration.
677     *
678     * @param conf :Job configuration
679     * @param appXml : Original job XML
680     * @param coordJob : Coordinator job bean to be populated.
681     * @return Resolved job XML element.
682     * @throws CoordinatorJobException thrown if failed to resolve basic entities
683     * @throws Exception thrown if failed to resolve basic entities
684     */
685    @SuppressWarnings("unchecked")
686    protected Element resolveInitial(Configuration conf, String appXml, CoordinatorJobBean coordJob)
687    throws CoordinatorJobException, Exception {
688        Element eAppXml = XmlUtils.parseXml(appXml);
689        // job's main attributes
690        // frequency
691        String val = resolveAttribute("frequency", eAppXml, evalFreq);
692        int ival = 0;
693
694        val = ParamChecker.checkFrequency(val);
695        coordJob.setFrequency(val);
696        TimeUnit tmp = (evalFreq.getVariable("timeunit") == null) ? TimeUnit.MINUTE : ((TimeUnit) evalFreq
697                .getVariable("timeunit"));
698        try {
699            Integer.parseInt(val);
700        }
701        catch (NumberFormatException ex) {
702            tmp=TimeUnit.CRON;
703        }
704
705        addAnAttribute("freq_timeunit", eAppXml, tmp.toString());
706        // TimeUnit
707        coordJob.setTimeUnit(CoordinatorJob.Timeunit.valueOf(tmp.toString()));
708        // End Of Duration
709        tmp = evalFreq.getVariable("endOfDuration") == null ? TimeUnit.NONE : ((TimeUnit) evalFreq
710                .getVariable("endOfDuration"));
711        addAnAttribute("end_of_duration", eAppXml, tmp.toString());
712        // coordJob.setEndOfDuration(tmp) // TODO: Add new attribute in Job bean
713
714        // Application name
715        if (this.coordName == null) {
716            String name = ELUtils.resolveAppName(eAppXml.getAttribute("name").getValue(), conf);
717            coordJob.setAppName(name);
718        }
719        else {
720            // this coord job is created from bundle
721            coordJob.setAppName(this.coordName);
722        }
723
724        // start time
725        val = resolveAttribute("start", eAppXml, evalNofuncs);
726        ParamChecker.checkDateOozieTZ(val, "start");
727        coordJob.setStartTime(DateUtils.parseDateOozieTZ(val));
728        // end time
729        val = resolveAttribute("end", eAppXml, evalNofuncs);
730        ParamChecker.checkDateOozieTZ(val, "end");
731        coordJob.setEndTime(DateUtils.parseDateOozieTZ(val));
732        // Time zone
733        val = resolveAttribute("timezone", eAppXml, evalNofuncs);
734        ParamChecker.checkTimeZone(val, "timezone");
735        coordJob.setTimeZone(val);
736
737        // controls
738        val = resolveTagContents("timeout", eAppXml.getChild("controls", eAppXml.getNamespace()), evalTimeout);
739        if (val != null && val != "") {
740            int t = Integer.parseInt(val);
741            tmp = (evalTimeout.getVariable("timeunit") == null) ? TimeUnit.MINUTE : ((TimeUnit) evalTimeout
742                    .getVariable("timeunit"));
743            switch (tmp) {
744                case HOUR:
745                    val = String.valueOf(t * 60);
746                    break;
747                case DAY:
748                    val = String.valueOf(t * 60 * 24);
749                    break;
750                case MONTH:
751                    val = String.valueOf(t * 60 * 24 * 30);
752                    break;
753                default:
754                    break;
755            }
756        }
757        else {
758            val = ConfigurationService.get(CONF_DEFAULT_TIMEOUT_NORMAL);
759        }
760
761        ival = ParamChecker.checkInteger(val, "timeout");
762        if (ival < 0 || ival > ConfigurationService.getInt(CONF_DEFAULT_MAX_TIMEOUT)) {
763            ival = ConfigurationService.getInt(CONF_DEFAULT_MAX_TIMEOUT);
764        }
765        coordJob.setTimeout(ival);
766
767        val = resolveTagContents("concurrency", eAppXml.getChild("controls", eAppXml.getNamespace()), evalNofuncs);
768        if (val == null || val.isEmpty()) {
769            val = ConfigurationService.get(CONF_DEFAULT_CONCURRENCY);
770        }
771        ival = ParamChecker.checkInteger(val, "concurrency");
772        coordJob.setConcurrency(ival);
773
774        val = resolveTagContents("throttle", eAppXml.getChild("controls", eAppXml.getNamespace()), evalNofuncs);
775        if (val == null || val.isEmpty()) {
776            int defaultThrottle = ConfigurationService.getInt(CONF_DEFAULT_THROTTLE);
777            ival = defaultThrottle;
778        }
779        else {
780            ival = ParamChecker.checkInteger(val, "throttle");
781        }
782        int maxQueue = ConfigurationService.getInt(CONF_QUEUE_SIZE);
783        float factor = ConfigurationService.getFloat(CONF_MAT_THROTTLING_FACTOR);
784        int maxThrottle = (int) (maxQueue * factor);
785        if (ival > maxThrottle || ival < 1) {
786            ival = maxThrottle;
787        }
788        LOG.debug("max throttle " + ival);
789        coordJob.setMatThrottling(ival);
790
791        val = resolveTagContents("execution", eAppXml.getChild("controls", eAppXml.getNamespace()), evalNofuncs);
792        if (val == "") {
793            val = Execution.FIFO.toString();
794        }
795        coordJob.setExecutionOrder(Execution.valueOf(val));
796        String[] acceptedVals = { Execution.LIFO.toString(), Execution.FIFO.toString(), Execution.LAST_ONLY.toString(),
797            Execution.NONE.toString()};
798        ParamChecker.isMember(val, acceptedVals, "execution");
799
800        // datasets
801        resolveTagContents("include", eAppXml.getChild("datasets", eAppXml.getNamespace()), evalNofuncs);
802        // for each data set
803        resolveDataSets(eAppXml);
804        HashMap<String, String> dataNameList = new HashMap<String, String>();
805        resolveIODataset(eAppXml);
806        resolveIOEvents(eAppXml, dataNameList);
807
808        if (CoordUtils.isInputLogicSpecified(eAppXml)) {
809            resolveInputLogic(eAppXml.getChild(CoordInputLogicEvaluator.INPUT_LOGIC, eAppXml.getNamespace()), evalInst,
810                    dataNameList);
811        }
812
813        resolveTagContents("app-path", eAppXml.getChild("action", eAppXml.getNamespace()).getChild("workflow",
814                eAppXml.getNamespace()), evalNofuncs);
815        // TODO: If action or workflow tag is missing, NullPointerException will
816        // occur
817        Element configElem = eAppXml.getChild("action", eAppXml.getNamespace()).getChild("workflow",
818                eAppXml.getNamespace()).getChild("configuration", eAppXml.getNamespace());
819        evalData = CoordELEvaluator.createELEvaluatorForDataEcho(conf, "coord-job-submit-data", dataNameList);
820        if (configElem != null) {
821            for (Element propElem : (List<Element>) configElem.getChildren("property", configElem.getNamespace())) {
822                resolveTagContents("name", propElem, evalData);
823                // Want to check the data-integrity but don't want to modify the
824                // XML
825                // for properties only
826                Element tmpProp = (Element) propElem.clone();
827                resolveTagContents("value", tmpProp, evalData);
828            }
829        }
830        evalSla = CoordELEvaluator.createELEvaluatorForDataAndConf(conf, "coord-sla-submit", dataNameList);
831        resolveSLA(eAppXml, coordJob);
832        return eAppXml;
833    }
834
835    /**
836     * Resolve SLA events
837     *
838     * @param eAppXml job XML
839     * @param coordJob coordinator job bean
840     * @throws CommandException thrown if failed to resolve sla events
841     */
842    private void resolveSLA(Element eAppXml, CoordinatorJobBean coordJob) throws CommandException {
843        Element eSla = XmlUtils.getSLAElement(eAppXml.getChild("action", eAppXml.getNamespace()));
844
845        if (eSla != null) {
846            resolveSLAContent(eSla);
847            String slaXml = XmlUtils.prettyPrint(eSla).toString();
848            try {
849                // EL evaluation
850                slaXml = evalSla.evaluate(slaXml, String.class);
851                // Validate against semantic SXD
852                XmlUtils.validateData(slaXml, SchemaName.SLA_ORIGINAL);
853            }
854            catch (Exception e) {
855                throw new CommandException(ErrorCode.E1004, "Validation ERROR :" + e.getMessage(), e);
856            }
857        }
858    }
859
860    /**
861     * Resolve an SLA value.
862     *
863     * @param elem : XML Element where attribute is defiend
864     */
865    private void resolveSLAContent(Element elem) {
866        for (Element tagElem : (List<Element>) elem.getChildren()) {
867            if (tagElem != null) {
868                try {
869                    String val = CoordELFunctions.evalAndWrap(evalNofuncs, tagElem.getText().trim());
870                    tagElem.setText(val);
871                }
872                catch (Exception e) {
873                    LOG.warn("Variable is not defined in job.properties. Here is the message: {0}", e.getMessage());
874                    continue;
875                }
876            }
877        }
878    }
879
880    /**
881     * Resolve input-events/data-in and output-events/data-out tags.
882     *
883     * @param eJobOrg : Job element
884     * @throws CoordinatorJobException thrown if failed to resolve input and output events
885     */
886    @SuppressWarnings("unchecked")
887    private void resolveIOEvents(Element eJobOrg, HashMap<String, String> dataNameList) throws CoordinatorJobException {
888        // Resolving input-events/data-in
889        // Clone the job and don't update anything in the original
890        Element eJob = (Element) eJobOrg.clone();
891        Element inputList = eJob.getChild("input-events", eJob.getNamespace());
892        if (inputList != null) {
893            TreeSet<String> eventNameSet = new TreeSet<String>();
894            for (Element dataIn : (List<Element>) inputList.getChildren("data-in", eJob.getNamespace())) {
895                String dataInName = dataIn.getAttributeValue("name");
896                dataNameList.put(dataInName, "data-in");
897                // check whether there is any duplicate data-in name
898                if (eventNameSet.contains(dataInName)) {
899                    throw new RuntimeException("Duplicate dataIn name " + dataInName);
900                }
901                else {
902                    eventNameSet.add(dataInName);
903                }
904                resolveTagContents("instance", dataIn, evalInst);
905                resolveTagContents("start-instance", dataIn, evalInst);
906                resolveTagContents("end-instance", dataIn, evalInst);
907
908            }
909        }
910        // Resolving output-events/data-out
911        Element outputList = eJob.getChild("output-events", eJob.getNamespace());
912        if (outputList != null) {
913            TreeSet<String> eventNameSet = new TreeSet<String>();
914            for (Element dataOut : (List<Element>) outputList.getChildren("data-out", eJob.getNamespace())) {
915                String dataOutName = dataOut.getAttributeValue("name");
916                dataNameList.put(dataOutName, "data-out");
917                // check whether there is any duplicate data-out name
918                if (eventNameSet.contains(dataOutName)) {
919                    throw new RuntimeException("Duplicate dataIn name " + dataOutName);
920                }
921                else {
922                    eventNameSet.add(dataOutName);
923                }
924                resolveTagContents("instance", dataOut, evalInst);
925
926            }
927        }
928
929    }
930
931    private void resolveInputLogic(Element root, ELEvaluator evalInputLogic, HashMap<String, String> dataNameList)
932            throws Exception {
933        for (Object event : root.getChildren()) {
934            Element inputElement = (Element) event;
935            resolveAttribute("dataset", inputElement, evalInputLogic);
936            String name=resolveAttribute("name", inputElement, evalInputLogic);
937            resolveAttribute("or", inputElement, evalInputLogic);
938            resolveAttribute("and", inputElement, evalInputLogic);
939            resolveAttribute("combine", inputElement, evalInputLogic);
940
941            if (name != null) {
942                dataNameList.put(name, "data-in");
943            }
944
945            if (!inputElement.getChildren().isEmpty()) {
946                resolveInputLogic(inputElement, evalInputLogic, dataNameList);
947            }
948        }
949    }
950
951    /**
952     * Resolve input-events/dataset and output-events/dataset tags.
953     *
954     * @param eJob : Job element
955     * @throws CoordinatorJobException thrown if failed to resolve input and output events
956     */
957    @SuppressWarnings("unchecked")
958    private void resolveIODataset(Element eAppXml) throws CoordinatorJobException {
959        // Resolving input-events/data-in
960        Element inputList = eAppXml.getChild("input-events", eAppXml.getNamespace());
961        if (inputList != null) {
962            for (Element dataIn : (List<Element>) inputList.getChildren("data-in", eAppXml.getNamespace())) {
963                resolveAttribute("dataset", dataIn, evalInst);
964
965            }
966        }
967        // Resolving output-events/data-out
968        Element outputList = eAppXml.getChild("output-events", eAppXml.getNamespace());
969        if (outputList != null) {
970            for (Element dataOut : (List<Element>) outputList.getChildren("data-out", eAppXml.getNamespace())) {
971                resolveAttribute("dataset", dataOut, evalInst);
972
973            }
974        }
975
976    }
977
978
979    /**
980     * Add an attribute into XML element.
981     *
982     * @param attrName :attribute name
983     * @param elem : Element to add attribute
984     * @param value :Value of attribute
985     */
986    private void addAnAttribute(String attrName, Element elem, String value) {
987        elem.setAttribute(attrName, value);
988    }
989
990    /**
991     * Resolve datasets using job configuration.
992     *
993     * @param eAppXml : Job Element XML
994     * @throws Exception thrown if failed to resolve datasets
995     */
996    @SuppressWarnings("unchecked")
997    private void resolveDataSets(Element eAppXml) throws Exception {
998        Element datasetList = eAppXml.getChild("datasets", eAppXml.getNamespace());
999        if (datasetList != null) {
1000
1001            List<Element> dsElems = datasetList.getChildren("dataset", eAppXml.getNamespace());
1002            resolveDataSets(dsElems);
1003            resolveTagContents("app-path", eAppXml.getChild("action", eAppXml.getNamespace()).getChild("workflow",
1004                    eAppXml.getNamespace()), evalNofuncs);
1005        }
1006    }
1007
1008    /**
1009     * Resolve datasets using job configuration.
1010     *
1011     * @param dsElems : Data set XML element.
1012     * @throws CoordinatorJobException thrown if failed to resolve datasets
1013     */
1014    private void resolveDataSets(List<Element> dsElems) throws CoordinatorJobException {
1015        for (Element dsElem : dsElems) {
1016            // Setting up default TimeUnit and EndOFDuraion
1017            evalFreq.setVariable("timeunit", TimeUnit.MINUTE);
1018            evalFreq.setVariable("endOfDuration", TimeUnit.NONE);
1019
1020            String val = resolveAttribute("frequency", dsElem, evalFreq);
1021            int ival = ParamChecker.checkInteger(val, "frequency");
1022            ParamChecker.checkGTZero(ival, "frequency");
1023            addAnAttribute("freq_timeunit", dsElem, evalFreq.getVariable("timeunit") == null ? TimeUnit.MINUTE
1024                    .toString() : ((TimeUnit) evalFreq.getVariable("timeunit")).toString());
1025            addAnAttribute("end_of_duration", dsElem, evalFreq.getVariable("endOfDuration") == null ? TimeUnit.NONE
1026                    .toString() : ((TimeUnit) evalFreq.getVariable("endOfDuration")).toString());
1027            val = resolveAttribute("initial-instance", dsElem, evalInitialInstance);
1028            ParamChecker.checkDateOozieTZ(val, "initial-instance");
1029            checkInitialInstance(val);
1030            val = resolveAttribute("timezone", dsElem, evalNofuncs);
1031            ParamChecker.checkTimeZone(val, "timezone");
1032            resolveTagContents("uri-template", dsElem, evalNofuncs);
1033            resolveTagContents("done-flag", dsElem, evalNofuncs);
1034        }
1035    }
1036
1037    /**
1038     * Resolve the content of a tag.
1039     *
1040     * @param tagName : Tag name of job XML i.e. <timeout> 10 </timeout>
1041     * @param elem : Element where the tag exists.
1042     * @param eval : EL evealuator
1043     * @return Resolved tag content.
1044     * @throws CoordinatorJobException thrown if failed to resolve tag content
1045     */
1046    @SuppressWarnings("unchecked")
1047    private String resolveTagContents(String tagName, Element elem, ELEvaluator eval) throws CoordinatorJobException {
1048        String ret = "";
1049        if (elem != null) {
1050            for (Element tagElem : (List<Element>) elem.getChildren(tagName, elem.getNamespace())) {
1051                if (tagElem != null) {
1052                    String updated;
1053                    try {
1054                        updated = CoordELFunctions.evalAndWrap(eval, tagElem.getText().trim());
1055
1056                    }
1057                    catch (Exception e) {
1058                        throw new CoordinatorJobException(ErrorCode.E1004, e.getMessage(), e);
1059                    }
1060                    tagElem.removeContent();
1061                    tagElem.addContent(updated);
1062                    ret += updated;
1063                }
1064            }
1065        }
1066        return ret;
1067    }
1068
1069    /**
1070     * Resolve an attribute value.
1071     *
1072     * @param attrName : Attribute name.
1073     * @param elem : XML Element where attribute is defiend
1074     * @param eval : ELEvaluator used to resolve
1075     * @return Resolved attribute value
1076     * @throws CoordinatorJobException thrown if failed to resolve an attribute value
1077     */
1078    private String resolveAttribute(String attrName, Element elem, ELEvaluator eval) throws CoordinatorJobException {
1079        Attribute attr = elem.getAttribute(attrName);
1080        String val = null;
1081        if (attr != null) {
1082            try {
1083                val = CoordELFunctions.evalAndWrap(eval, attr.getValue().trim());
1084            }
1085            catch (Exception e) {
1086                throw new CoordinatorJobException(ErrorCode.E1004, e.getMessage(), e);
1087            }
1088            attr.setValue(val);
1089        }
1090        return val;
1091    }
1092
1093    /**
1094     * Include referred datasets into XML.
1095     *
1096     * @param resolvedXml : Job XML element.
1097     * @param conf : Job configuration
1098     * @throws CoordinatorJobException thrown if failed to include referred datasets into XML
1099     */
1100    @SuppressWarnings("unchecked")
1101    protected void includeDataSets(Element resolvedXml, Configuration conf) throws CoordinatorJobException {
1102        Element datasets = resolvedXml.getChild("datasets", resolvedXml.getNamespace());
1103        Element allDataSets = new Element("all_datasets", resolvedXml.getNamespace());
1104        List<String> dsList = new ArrayList<String>();
1105        if (datasets != null) {
1106            for (Element includeElem : (List<Element>) datasets.getChildren("include", datasets.getNamespace())) {
1107                String incDSFile = includeElem.getTextTrim();
1108                includeOneDSFile(incDSFile, dsList, allDataSets, datasets.getNamespace());
1109            }
1110            for (Element e : (List<Element>) datasets.getChildren("dataset", datasets.getNamespace())) {
1111                String dsName = e.getAttributeValue("name");
1112                if (dsList.contains(dsName)) {// Override with this DS
1113                    // Remove duplicate
1114                    removeDataSet(allDataSets, dsName);
1115                }
1116                else {
1117                    dsList.add(dsName);
1118                }
1119                allDataSets.addContent((Element) e.clone());
1120            }
1121        }
1122        insertDataSet(resolvedXml, allDataSets);
1123        resolvedXml.removeChild("datasets", resolvedXml.getNamespace());
1124    }
1125
1126    /**
1127     * Include one dataset file.
1128     *
1129     * @param incDSFile : Include data set filename.
1130     * @param dsList :List of dataset names to verify the duplicate.
1131     * @param allDataSets : Element that includes all dataset definitions.
1132     * @param dsNameSpace : Data set name space
1133     * @throws CoordinatorJobException thrown if failed to include one dataset file
1134     */
1135    @SuppressWarnings("unchecked")
1136    private void includeOneDSFile(String incDSFile, List<String> dsList, Element allDataSets, Namespace dsNameSpace)
1137    throws CoordinatorJobException {
1138        Element tmpDataSets = null;
1139        try {
1140            String dsXml = readDefinition(incDSFile);
1141            LOG.debug("DSFILE :" + incDSFile + "\n" + dsXml);
1142            tmpDataSets = XmlUtils.parseXml(dsXml);
1143        }
1144        catch (JDOMException e) {
1145            LOG.warn("Error parsing included dataset [{0}].  Message [{1}]", incDSFile, e.getMessage());
1146            throw new CoordinatorJobException(ErrorCode.E0700, e.getMessage());
1147        }
1148        resolveDataSets(tmpDataSets.getChildren("dataset"));
1149        for (Element e : (List<Element>) tmpDataSets.getChildren("dataset")) {
1150            String dsName = e.getAttributeValue("name");
1151            if (dsList.contains(dsName)) {
1152                throw new RuntimeException("Duplicate Dataset " + dsName);
1153            }
1154            dsList.add(dsName);
1155            Element tmp = (Element) e.clone();
1156            // TODO: Don't like to over-write the external/include DS's namespace
1157            tmp.setNamespace(dsNameSpace);
1158            tmp.getChild("uri-template").setNamespace(dsNameSpace);
1159            if (e.getChild("done-flag") != null) {
1160                tmp.getChild("done-flag").setNamespace(dsNameSpace);
1161            }
1162            allDataSets.addContent(tmp);
1163        }
1164        // nested include
1165        for (Element includeElem : (List<Element>) tmpDataSets.getChildren("include", tmpDataSets.getNamespace())) {
1166            String incFile = includeElem.getTextTrim();
1167            includeOneDSFile(incFile, dsList, allDataSets, dsNameSpace);
1168        }
1169    }
1170
1171    /**
1172     * Remove a dataset from a list of dataset.
1173     *
1174     * @param eDatasets : List of dataset
1175     * @param name : Dataset name to be removed.
1176     */
1177    @SuppressWarnings("unchecked")
1178    private static void removeDataSet(Element eDatasets, String name) {
1179        for (Element eDataset : (List<Element>) eDatasets.getChildren("dataset", eDatasets.getNamespace())) {
1180            if (eDataset.getAttributeValue("name").equals(name)) {
1181                eDataset.detach();
1182                return;
1183            }
1184        }
1185        throw new RuntimeException("undefined dataset: " + name);
1186    }
1187
1188    /**
1189     * Read coordinator definition.
1190     *
1191     * @param appPath application path.
1192     * @return coordinator definition.
1193     * @throws CoordinatorJobException thrown if the definition could not be read.
1194     */
1195    protected String readDefinition(String appPath) throws CoordinatorJobException {
1196        String user = ParamChecker.notEmpty(conf.get(OozieClient.USER_NAME), OozieClient.USER_NAME);
1197        // Configuration confHadoop = CoordUtils.getHadoopConf(conf);
1198        try {
1199            URI uri = new URI(appPath);
1200            LOG.debug("user =" + user);
1201            HadoopAccessorService has = Services.get().get(HadoopAccessorService.class);
1202            Configuration fsConf = has.createConfiguration(uri.getAuthority());
1203            FileSystem fs = has.createFileSystem(user, uri, fsConf);
1204            Path appDefPath = null;
1205
1206            // app path could be a directory
1207            Path path = new Path(uri.getPath());
1208            // check file exists for dataset include file, app xml already checked
1209            if (!fs.exists(path)) {
1210                throw new URISyntaxException(path.toString(), "path not existed : " + path.toString());
1211            }
1212            if (!fs.isFile(path)) {
1213                appDefPath = new Path(path, COORDINATOR_XML_FILE);
1214            } else {
1215                appDefPath = path;
1216            }
1217
1218            Reader reader = new InputStreamReader(fs.open(appDefPath));
1219            StringWriter writer = new StringWriter();
1220            IOUtils.copyCharStream(reader, writer);
1221            return writer.toString();
1222        }
1223        catch (IOException ex) {
1224            LOG.warn("IOException :" + XmlUtils.prettyPrint(conf), ex);
1225            throw new CoordinatorJobException(ErrorCode.E1001, ex.getMessage(), ex);
1226        }
1227        catch (URISyntaxException ex) {
1228            LOG.warn("URISyException :" + ex.getMessage());
1229            throw new CoordinatorJobException(ErrorCode.E1002, appPath, ex.getMessage(), ex);
1230        }
1231        catch (HadoopAccessorException ex) {
1232            throw new CoordinatorJobException(ex);
1233        }
1234        catch (Exception ex) {
1235            LOG.warn("Exception :", ex);
1236            throw new CoordinatorJobException(ErrorCode.E1001, ex.getMessage(), ex);
1237        }
1238    }
1239
1240    /**
1241     * Write a coordinator job into database
1242     *
1243     *@param appXML : Coordinator definition xml
1244     * @param eJob : XML element of job
1245     * @param coordJob : Coordinator job bean
1246     * @return Job id
1247     * @throws CommandException thrown if unable to save coordinator job to db
1248     */
1249    protected String storeToDB(String appXML, Element eJob, CoordinatorJobBean coordJob) throws CommandException {
1250        String jobId = Services.get().get(UUIDService.class).generateId(ApplicationType.COORDINATOR);
1251        coordJob.setId(jobId);
1252
1253        coordJob.setAppPath(conf.get(OozieClient.COORDINATOR_APP_PATH));
1254        coordJob.setCreatedTime(new Date());
1255        coordJob.setUser(conf.get(OozieClient.USER_NAME));
1256        String group = ConfigUtils.getWithDeprecatedCheck(conf, OozieClient.JOB_ACL, OozieClient.GROUP_NAME, null);
1257        coordJob.setGroup(group);
1258        coordJob.setConf(XmlUtils.prettyPrint(conf).toString());
1259        coordJob.setJobXml(XmlUtils.prettyPrint(eJob).toString());
1260        coordJob.setLastActionNumber(0);
1261        coordJob.setLastModifiedTime(new Date());
1262
1263        if (!dryrun) {
1264            coordJob.setLastModifiedTime(new Date());
1265            try {
1266                CoordJobQueryExecutor.getInstance().insert(coordJob);
1267            }
1268            catch (JPAExecutorException jpaee) {
1269                coordJob.setId(null);
1270                coordJob.setStatus(CoordinatorJob.Status.FAILED);
1271                throw new CommandException(jpaee);
1272            }
1273        }
1274        return jobId;
1275    }
1276
1277    /*
1278     * this method checks if the initial-instance specified for a particular
1279       is not a date earlier than the oozie server default Jan 01, 1970 00:00Z UTC
1280     */
1281    private void checkInitialInstance(String val) throws CoordinatorJobException, IllegalArgumentException {
1282        Date initialInstance, givenInstance;
1283        try {
1284            initialInstance = DateUtils.parseDateUTC("1970-01-01T00:00Z");
1285            givenInstance = DateUtils.parseDateOozieTZ(val);
1286        }
1287        catch (Exception e) {
1288            throw new IllegalArgumentException("Unable to parse dataset initial-instance string '" + val +
1289                                               "' to Date object. ",e);
1290        }
1291        if(givenInstance.compareTo(initialInstance) < 0) {
1292            throw new CoordinatorJobException(ErrorCode.E1021, "Dataset initial-instance " + val +
1293                    " is earlier than the default initial instance " + DateUtils.formatDateOozieTZ(initialInstance));
1294        }
1295    }
1296
1297    /* (non-Javadoc)
1298     * @see org.apache.oozie.command.XCommand#getEntityKey()
1299     */
1300    @Override
1301    public String getEntityKey() {
1302        return null;
1303    }
1304
1305    /* (non-Javadoc)
1306     * @see org.apache.oozie.command.XCommand#isLockRequired()
1307     */
1308    @Override
1309    protected boolean isLockRequired() {
1310        return false;
1311    }
1312
1313    /* (non-Javadoc)
1314     * @see org.apache.oozie.command.XCommand#loadState()
1315     */
1316    @Override
1317    protected void loadState() throws CommandException {
1318        jpaService = Services.get().get(JPAService.class);
1319        if (jpaService == null) {
1320            throw new CommandException(ErrorCode.E0610);
1321        }
1322        coordJob = new CoordinatorJobBean();
1323        if (this.bundleId != null) {
1324            // this coord job is created from bundle
1325            coordJob.setBundleId(this.bundleId);
1326            // first use bundle id if submit thru bundle
1327            LogUtils.setLogInfo(this.bundleId);
1328        }
1329        if (this.coordName != null) {
1330            // this coord job is created from bundle
1331            coordJob.setAppName(this.coordName);
1332        }
1333        setJob(coordJob);
1334
1335    }
1336
1337    /* (non-Javadoc)
1338     * @see org.apache.oozie.command.XCommand#verifyPrecondition()
1339     */
1340    @Override
1341    protected void verifyPrecondition() throws CommandException {
1342
1343    }
1344
1345    /* (non-Javadoc)
1346     * @see org.apache.oozie.command.TransitionXCommand#notifyParent()
1347     */
1348    @Override
1349    public void notifyParent() throws CommandException {
1350        // update bundle action
1351        if (coordJob.getBundleId() != null) {
1352            LOG.debug("Updating bundle record: " + coordJob.getBundleId() + " for coord id: " + coordJob.getId());
1353            BundleStatusUpdateXCommand bundleStatusUpdate = new BundleStatusUpdateXCommand(coordJob, prevStatus);
1354            bundleStatusUpdate.call();
1355        }
1356    }
1357
1358    /* (non-Javadoc)
1359     * @see org.apache.oozie.command.TransitionXCommand#updateJob()
1360     */
1361    @Override
1362    public void updateJob() throws CommandException {
1363    }
1364
1365    /* (non-Javadoc)
1366     * @see org.apache.oozie.command.TransitionXCommand#getJob()
1367     */
1368    @Override
1369    public Job getJob() {
1370        return coordJob;
1371    }
1372
1373    @Override
1374    public void performWrites() throws CommandException {
1375    }
1376}