001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.oozie.command.coord; 020 021import java.io.IOException; 022import java.io.InputStreamReader; 023import java.io.Reader; 024import java.io.StringReader; 025import java.io.StringWriter; 026import java.net.URI; 027import java.net.URISyntaxException; 028import java.util.ArrayList; 029import java.util.Calendar; 030import java.util.Date; 031import java.util.HashMap; 032import java.util.HashSet; 033import java.util.Iterator; 034import java.util.List; 035import java.util.Map; 036import java.util.Set; 037import java.util.TreeSet; 038 039import javax.xml.transform.stream.StreamSource; 040import javax.xml.validation.Validator; 041 042import org.apache.hadoop.conf.Configuration; 043import org.apache.hadoop.fs.FileSystem; 044import org.apache.hadoop.fs.Path; 045import org.apache.oozie.CoordinatorJobBean; 046import org.apache.oozie.ErrorCode; 047import org.apache.oozie.client.CoordinatorJob; 048import org.apache.oozie.client.Job; 049import org.apache.oozie.client.OozieClient; 050import org.apache.oozie.client.CoordinatorJob.Execution; 051import org.apache.oozie.command.CommandException; 052import org.apache.oozie.command.SubmitTransitionXCommand; 053import org.apache.oozie.command.bundle.BundleStatusUpdateXCommand; 054import org.apache.oozie.coord.CoordELEvaluator; 055import org.apache.oozie.coord.CoordELFunctions; 056import org.apache.oozie.coord.CoordUtils; 057import org.apache.oozie.coord.CoordinatorJobException; 058import org.apache.oozie.coord.TimeUnit; 059import org.apache.oozie.coord.input.logic.CoordInputLogicEvaluator; 060import org.apache.oozie.executor.jpa.CoordJobQueryExecutor; 061import org.apache.oozie.executor.jpa.JPAExecutorException; 062import org.apache.oozie.service.CoordMaterializeTriggerService; 063import org.apache.oozie.service.ConfigurationService; 064import org.apache.oozie.service.HadoopAccessorException; 065import org.apache.oozie.service.HadoopAccessorService; 066import org.apache.oozie.service.JPAService; 067import org.apache.oozie.service.SchemaService; 068import org.apache.oozie.service.Service; 069import org.apache.oozie.service.Services; 070import org.apache.oozie.service.UUIDService; 071import org.apache.oozie.service.SchemaService.SchemaName; 072import org.apache.oozie.service.UUIDService.ApplicationType; 073import org.apache.oozie.util.ConfigUtils; 074import org.apache.oozie.util.DateUtils; 075import org.apache.oozie.util.ELEvaluator; 076import org.apache.oozie.util.ELUtils; 077import org.apache.oozie.util.IOUtils; 078import org.apache.oozie.util.InstrumentUtils; 079import org.apache.oozie.util.LogUtils; 080import org.apache.oozie.util.ParamChecker; 081import org.apache.oozie.util.ParameterVerifier; 082import org.apache.oozie.util.ParameterVerifierException; 083import org.apache.oozie.util.PropertiesUtils; 084import org.apache.oozie.util.XConfiguration; 085import org.apache.oozie.util.XmlUtils; 086import org.jdom.Attribute; 087import org.jdom.Element; 088import org.jdom.JDOMException; 089import org.jdom.Namespace; 090import org.xml.sax.SAXException; 091 092/** 093 * This class provides the functionalities to resolve a coordinator job XML and write the job information into a DB 094 * table. 095 * <p> 096 * Specifically it performs the following functions: 1. Resolve all the variables or properties using job 097 * configurations. 2. Insert all datasets definition as part of the <data-in> and <data-out> tags. 3. Validate the XML 098 * at runtime. 099 */ 100public class CoordSubmitXCommand extends SubmitTransitionXCommand { 101 102 protected Configuration conf; 103 protected final String bundleId; 104 protected final String coordName; 105 protected boolean dryrun; 106 protected JPAService jpaService = null; 107 private CoordinatorJob.Status prevStatus = CoordinatorJob.Status.PREP; 108 109 public static final String CONFIG_DEFAULT = "coord-config-default.xml"; 110 public static final String COORDINATOR_XML_FILE = "coordinator.xml"; 111 public final String COORD_INPUT_EVENTS ="input-events"; 112 public final String COORD_OUTPUT_EVENTS = "output-events"; 113 public final String COORD_INPUT_EVENTS_DATA_IN ="data-in"; 114 public final String COORD_OUTPUT_EVENTS_DATA_OUT = "data-out"; 115 116 private static final Set<String> DISALLOWED_USER_PROPERTIES = new HashSet<String>(); 117 private static final Set<String> DISALLOWED_DEFAULT_PROPERTIES = new HashSet<String>(); 118 119 protected CoordinatorJobBean coordJob = null; 120 /** 121 * Default timeout for normal jobs, in minutes, after which coordinator input check will timeout 122 */ 123 public static final String CONF_DEFAULT_TIMEOUT_NORMAL = Service.CONF_PREFIX + "coord.normal.default.timeout"; 124 125 public static final String CONF_DEFAULT_CONCURRENCY = Service.CONF_PREFIX + "coord.default.concurrency"; 126 127 public static final String CONF_DEFAULT_THROTTLE = Service.CONF_PREFIX + "coord.default.throttle"; 128 129 public static final String CONF_MAT_THROTTLING_FACTOR = Service.CONF_PREFIX 130 + "coord.materialization.throttling.factor"; 131 132 /** 133 * Default MAX timeout in minutes, after which coordinator input check will timeout 134 */ 135 public static final String CONF_DEFAULT_MAX_TIMEOUT = Service.CONF_PREFIX + "coord.default.max.timeout"; 136 137 public static final String CONF_QUEUE_SIZE = Service.CONF_PREFIX + "CallableQueueService.queue.size"; 138 139 public static final String CONF_CHECK_MAX_FREQUENCY = Service.CONF_PREFIX + "coord.check.maximum.frequency"; 140 141 private ELEvaluator evalFreq = null; 142 private ELEvaluator evalNofuncs = null; 143 private ELEvaluator evalData = null; 144 private ELEvaluator evalInst = null; 145 private ELEvaluator evalAction = null; 146 private ELEvaluator evalSla = null; 147 private ELEvaluator evalTimeout = null; 148 private ELEvaluator evalInitialInstance = null; 149 150 static { 151 String[] badUserProps = { PropertiesUtils.YEAR, PropertiesUtils.MONTH, PropertiesUtils.DAY, 152 PropertiesUtils.HOUR, PropertiesUtils.MINUTE, PropertiesUtils.DAYS, PropertiesUtils.HOURS, 153 PropertiesUtils.MINUTES, PropertiesUtils.KB, PropertiesUtils.MB, PropertiesUtils.GB, 154 PropertiesUtils.TB, PropertiesUtils.PB, PropertiesUtils.RECORDS, PropertiesUtils.MAP_IN, 155 PropertiesUtils.MAP_OUT, PropertiesUtils.REDUCE_IN, PropertiesUtils.REDUCE_OUT, PropertiesUtils.GROUPS }; 156 PropertiesUtils.createPropertySet(badUserProps, DISALLOWED_USER_PROPERTIES); 157 158 String[] badDefaultProps = { PropertiesUtils.HADOOP_USER}; 159 PropertiesUtils.createPropertySet(badUserProps, DISALLOWED_DEFAULT_PROPERTIES); 160 PropertiesUtils.createPropertySet(badDefaultProps, DISALLOWED_DEFAULT_PROPERTIES); 161 } 162 163 /** 164 * Constructor to create the Coordinator Submit Command. 165 * 166 * @param conf : Configuration for Coordinator job 167 */ 168 public CoordSubmitXCommand(Configuration conf) { 169 super("coord_submit", "coord_submit", 1); 170 this.conf = ParamChecker.notNull(conf, "conf"); 171 this.bundleId = null; 172 this.coordName = null; 173 } 174 175 /** 176 * Constructor to create the Coordinator Submit Command by bundle job. 177 * 178 * @param conf : Configuration for Coordinator job 179 * @param bundleId : bundle id 180 * @param coordName : coord name 181 */ 182 protected CoordSubmitXCommand(Configuration conf, String bundleId, String coordName) { 183 super("coord_submit", "coord_submit", 1); 184 this.conf = ParamChecker.notNull(conf, "conf"); 185 this.bundleId = ParamChecker.notEmpty(bundleId, "bundleId"); 186 this.coordName = ParamChecker.notEmpty(coordName, "coordName"); 187 } 188 189 /** 190 * Constructor to create the Coordinator Submit Command. 191 * 192 * @param dryrun : if dryrun 193 * @param conf : Configuration for Coordinator job 194 */ 195 public CoordSubmitXCommand(boolean dryrun, Configuration conf) { 196 this(conf); 197 this.dryrun = dryrun; 198 } 199 200 /* (non-Javadoc) 201 * @see org.apache.oozie.command.XCommand#execute() 202 */ 203 @Override 204 protected String submit() throws CommandException { 205 LOG.info("STARTED Coordinator Submit"); 206 String jobId = submitJob(); 207 LOG.info("ENDED Coordinator Submit jobId=" + jobId); 208 return jobId; 209 } 210 211 protected String submitJob() throws CommandException { 212 String jobId = null; 213 InstrumentUtils.incrJobCounter(getName(), 1, getInstrumentation()); 214 215 boolean exceptionOccured = false; 216 try { 217 mergeDefaultConfig(); 218 219 String appXml = readAndValidateXml(); 220 coordJob.setOrigJobXml(appXml); 221 LOG.debug("jobXml after initial validation " + XmlUtils.prettyPrint(appXml).toString()); 222 223 Element eXml = XmlUtils.parseXml(appXml); 224 225 String appNamespace = readAppNamespace(eXml); 226 coordJob.setAppNamespace(appNamespace); 227 228 ParameterVerifier.verifyParameters(conf, eXml); 229 230 appXml = XmlUtils.removeComments(appXml); 231 initEvaluators(); 232 Element eJob = basicResolveAndIncludeDS(appXml, conf, coordJob); 233 234 validateCoordinatorJob(); 235 236 // checking if the coordinator application data input/output events 237 // specify multiple data instance values in erroneous manner 238 checkMultipleTimeInstances(eJob, COORD_INPUT_EVENTS, COORD_INPUT_EVENTS_DATA_IN); 239 checkMultipleTimeInstances(eJob, COORD_OUTPUT_EVENTS, COORD_OUTPUT_EVENTS_DATA_OUT); 240 241 LOG.debug("jobXml after all validation " + XmlUtils.prettyPrint(eJob).toString()); 242 243 jobId = storeToDB(appXml, eJob, coordJob); 244 // log job info for coordinator job 245 LogUtils.setLogInfo(coordJob); 246 247 if (!dryrun) { 248 queueMaterializeTransitionXCommand(jobId); 249 } 250 else { 251 return getDryRun(coordJob); 252 } 253 } 254 catch (JDOMException jex) { 255 exceptionOccured = true; 256 LOG.warn("ERROR: ", jex); 257 throw new CommandException(ErrorCode.E0700, jex.getMessage(), jex); 258 } 259 catch (CoordinatorJobException cex) { 260 exceptionOccured = true; 261 LOG.warn("ERROR: ", cex); 262 throw new CommandException(cex); 263 } 264 catch (ParameterVerifierException pex) { 265 exceptionOccured = true; 266 LOG.warn("ERROR: ", pex); 267 throw new CommandException(pex); 268 } 269 catch (IllegalArgumentException iex) { 270 exceptionOccured = true; 271 LOG.warn("ERROR: ", iex); 272 throw new CommandException(ErrorCode.E1003, iex.getMessage(), iex); 273 } 274 catch (Exception ex) { 275 exceptionOccured = true; 276 LOG.warn("ERROR: ", ex); 277 throw new CommandException(ErrorCode.E0803, ex.getMessage(), ex); 278 } 279 finally { 280 if (exceptionOccured) { 281 if (coordJob.getId() == null || coordJob.getId().equalsIgnoreCase("")) { 282 coordJob.setStatus(CoordinatorJob.Status.FAILED); 283 coordJob.resetPending(); 284 } 285 } 286 } 287 return jobId; 288 } 289 290 /** 291 * Gets the dryrun output. 292 * 293 * @param coordJob the coordinatorJobBean 294 * @return the dry run 295 * @throws Exception the exception 296 */ 297 protected String getDryRun(CoordinatorJobBean coordJob) throws Exception{ 298 int materializationWindow = ConfigurationService 299 .getInt(CoordMaterializeTriggerService.CONF_MATERIALIZATION_WINDOW); 300 Date startTime = coordJob.getStartTime(); 301 long startTimeMilli = startTime.getTime(); 302 long endTimeMilli = startTimeMilli + (materializationWindow * 1000); 303 Date jobEndTime = coordJob.getEndTime(); 304 Date endTime = new Date(endTimeMilli); 305 if (endTime.compareTo(jobEndTime) > 0) { 306 endTime = jobEndTime; 307 } 308 String jobId = coordJob.getId(); 309 LOG.info("[" + jobId + "]: Update status to RUNNING"); 310 coordJob.setStatus(Job.Status.RUNNING); 311 coordJob.setPending(); 312 Configuration jobConf = null; 313 try { 314 jobConf = new XConfiguration(new StringReader(coordJob.getConf())); 315 } 316 catch (IOException e1) { 317 LOG.warn("Configuration parse error. read from DB :" + coordJob.getConf(), e1); 318 } 319 String action = new CoordMaterializeTransitionXCommand(coordJob, materializationWindow, startTime, 320 endTime).materializeActions(true); 321 String output = coordJob.getJobXml() + System.getProperty("line.separator") 322 + "***actions for instance***" + action; 323 return output; 324 } 325 326 /** 327 * Queue MaterializeTransitionXCommand 328 */ 329 protected void queueMaterializeTransitionXCommand(String jobId) { 330 int materializationWindow = ConfigurationService 331 .getInt(CoordMaterializeTriggerService.CONF_MATERIALIZATION_WINDOW); 332 queue(new CoordMaterializeTransitionXCommand(jobId, materializationWindow), 100); 333 } 334 335 /** 336 * Method that validates values in the definition for correctness. Placeholder to add more. 337 */ 338 private void validateCoordinatorJob() throws Exception { 339 // check if startTime < endTime 340 if (!coordJob.getStartTime().before(coordJob.getEndTime())) { 341 throw new IllegalArgumentException("Coordinator Start Time must be earlier than End Time."); 342 } 343 344 try { 345 // Check if a coord job with cron frequency will materialize actions 346 int freq = Integer.parseInt(coordJob.getFrequency()); 347 348 // Check if the frequency is faster than 5 min if enabled 349 if (ConfigurationService.getBoolean(CONF_CHECK_MAX_FREQUENCY)) { 350 CoordinatorJob.Timeunit unit = coordJob.getTimeUnit(); 351 if (freq == 0 || (freq < 5 && unit == CoordinatorJob.Timeunit.MINUTE)) { 352 throw new IllegalArgumentException("Coordinator job with frequency [" + freq + 353 "] minutes is faster than allowed maximum of 5 minutes (" 354 + CONF_CHECK_MAX_FREQUENCY + " is set to true)"); 355 } 356 } 357 } catch (NumberFormatException e) { 358 Date start = coordJob.getStartTime(); 359 Calendar cal = Calendar.getInstance(); 360 cal.setTime(start); 361 cal.add(Calendar.MINUTE, -1); 362 start = cal.getTime(); 363 364 Date nextTime = CoordCommandUtils.getNextValidActionTimeForCronFrequency(start, coordJob); 365 if (nextTime == null) { 366 throw new IllegalArgumentException("Invalid coordinator cron frequency: " + coordJob.getFrequency()); 367 } 368 if (!nextTime.before(coordJob.getEndTime())) { 369 throw new IllegalArgumentException("Coordinator job with frequency '" + 370 coordJob.getFrequency() + "' materializes no actions between start and end time."); 371 } 372 } 373 } 374 375 /* 376 * Check against multiple data instance values inside a single <instance> <start-instance> or <end-instance> tag 377 * If found, the job is not submitted and user is informed to correct the error, 378 * instead of defaulting to the first instance value in the list 379 */ 380 private void checkMultipleTimeInstances(Element eCoordJob, String eventType, String dataType) throws CoordinatorJobException { 381 Element eventsSpec, dataSpec, instance; 382 List<Element> instanceSpecList; 383 Namespace ns = eCoordJob.getNamespace(); 384 String instanceValue; 385 eventsSpec = eCoordJob.getChild(eventType, ns); 386 if (eventsSpec != null) { 387 dataSpec = eventsSpec.getChild(dataType, ns); 388 if (dataSpec != null) { 389 // In case of input-events, there can be multiple child <instance> datasets. 390 // Iterating to ensure none of them have errors 391 instanceSpecList = dataSpec.getChildren("instance", ns); 392 Iterator instanceIter = instanceSpecList.iterator(); 393 while(instanceIter.hasNext()) { 394 instance = ((Element) instanceIter.next()); 395 if(instance.getContentSize() == 0) { //empty string or whitespace 396 throw new CoordinatorJobException(ErrorCode.E1021, "<instance> tag within " + eventType + " is empty!"); 397 } 398 instanceValue = instance.getContent(0).toString(); 399 boolean isInvalid = false; 400 try { 401 isInvalid = evalAction.checkForExistence(instanceValue, ","); 402 } catch (Exception e) { 403 handleELParseException(eventType, dataType, instanceValue); 404 } 405 if (isInvalid) { // reaching this block implies instance is not empty i.e. length > 0 406 handleExpresionWithMultipleInstances(eventType, dataType, instanceValue); 407 } 408 } 409 410 // In case of input-events, there can be multiple child <start-instance> datasets. 411 // Iterating to ensure none of them have errors 412 instanceSpecList = dataSpec.getChildren("start-instance", ns); 413 instanceIter = instanceSpecList.iterator(); 414 while(instanceIter.hasNext()) { 415 instance = ((Element) instanceIter.next()); 416 if(instance.getContentSize() == 0) { //empty string or whitespace 417 throw new CoordinatorJobException(ErrorCode.E1021, "<start-instance> tag within " + eventType 418 + " is empty!"); 419 } 420 instanceValue = instance.getContent(0).toString(); 421 boolean isInvalid = false; 422 try { 423 isInvalid = evalAction.checkForExistence(instanceValue, ","); 424 } catch (Exception e) { 425 handleELParseException(eventType, dataType, instanceValue); 426 } 427 if (isInvalid) { // reaching this block implies start instance is not empty i.e. length > 0 428 handleExpresionWithStartMultipleInstances(eventType, dataType, instanceValue); 429 } 430 } 431 432 // In case of input-events, there can be multiple child <end-instance> datasets. 433 // Iterating to ensure none of them have errors 434 instanceSpecList = dataSpec.getChildren("end-instance", ns); 435 instanceIter = instanceSpecList.iterator(); 436 while(instanceIter.hasNext()) { 437 instance = ((Element) instanceIter.next()); 438 if(instance.getContentSize() == 0) { //empty string or whitespace 439 throw new CoordinatorJobException(ErrorCode.E1021, "<end-instance> tag within " + eventType + " is empty!"); 440 } 441 instanceValue = instance.getContent(0).toString(); 442 boolean isInvalid = false; 443 try { 444 isInvalid = evalAction.checkForExistence(instanceValue, ","); 445 } catch (Exception e) { 446 handleELParseException(eventType, dataType, instanceValue); 447 } 448 if (isInvalid) { // reaching this block implies instance is not empty i.e. length > 0 449 handleExpresionWithMultipleEndInstances(eventType, dataType, instanceValue); 450 } 451 } 452 453 } 454 } 455 } 456 457 private void handleELParseException(String eventType, String dataType, String instanceValue) 458 throws CoordinatorJobException { 459 String correctAction = null; 460 if(dataType.equals(COORD_INPUT_EVENTS_DATA_IN)) { 461 correctAction = "Coordinator app definition should have valid <instance> tag for data-in"; 462 } else if(dataType.equals(COORD_OUTPUT_EVENTS_DATA_OUT)) { 463 correctAction = "Coordinator app definition should have valid <instance> tag for data-out"; 464 } 465 throw new CoordinatorJobException(ErrorCode.E1021, eventType + " instance '" + instanceValue 466 + "' is not valid. Coordinator job NOT SUBMITTED. " + correctAction); 467 } 468 469 private void handleExpresionWithMultipleInstances(String eventType, String dataType, String instanceValue) 470 throws CoordinatorJobException { 471 String correctAction = null; 472 if(dataType.equals(COORD_INPUT_EVENTS_DATA_IN)) { 473 correctAction = "Coordinator app definition should have separate <instance> tag per data-in instance"; 474 } else if(dataType.equals(COORD_OUTPUT_EVENTS_DATA_OUT)) { 475 correctAction = "Coordinator app definition can have only one <instance> tag per data-out instance"; 476 } 477 throw new CoordinatorJobException(ErrorCode.E1021, eventType + " instance '" + instanceValue 478 + "' contains more than one date instance. Coordinator job NOT SUBMITTED. " + correctAction); 479 } 480 481 private void handleExpresionWithStartMultipleInstances(String eventType, String dataType, String instanceValue) 482 throws CoordinatorJobException { 483 String correctAction = "Coordinator app definition should not have multiple start-instances"; 484 throw new CoordinatorJobException(ErrorCode.E1021, eventType + " start-instance '" + instanceValue 485 + "' contains more than one date start-instance. Coordinator job NOT SUBMITTED. " + correctAction); 486 } 487 488 private void handleExpresionWithMultipleEndInstances(String eventType, String dataType, String instanceValue) 489 throws CoordinatorJobException { 490 String correctAction = "Coordinator app definition should not have multiple end-instances"; 491 throw new CoordinatorJobException(ErrorCode.E1021, eventType + " end-instance '" + instanceValue 492 + "' contains more than one date end-instance. Coordinator job NOT SUBMITTED. " + correctAction); 493 } 494 /** 495 * Read the application XML and validate against coordinator Schema 496 * 497 * @return validated coordinator XML 498 * @throws CoordinatorJobException thrown if unable to read or validate coordinator xml 499 */ 500 protected String readAndValidateXml() throws CoordinatorJobException { 501 String appPath = ParamChecker.notEmpty(conf.get(OozieClient.COORDINATOR_APP_PATH), 502 OozieClient.COORDINATOR_APP_PATH); 503 String coordXml = readDefinition(appPath); 504 validateXml(coordXml); 505 return coordXml; 506 } 507 508 /** 509 * Validate against Coordinator XSD file 510 * 511 * @param xmlContent : Input coordinator xml 512 * @throws CoordinatorJobException thrown if unable to validate coordinator xml 513 */ 514 private void validateXml(String xmlContent) throws CoordinatorJobException { 515 try { 516 Validator validator = Services.get().get(SchemaService.class).getValidator(SchemaName.COORDINATOR); 517 validator.validate(new StreamSource(new StringReader(xmlContent))); 518 } 519 catch (SAXException ex) { 520 LOG.warn("SAXException :", ex); 521 throw new CoordinatorJobException(ErrorCode.E0701, ex.getMessage(), ex); 522 } 523 catch (IOException ex) { 524 LOG.warn("IOException :", ex); 525 throw new CoordinatorJobException(ErrorCode.E0702, ex.getMessage(), ex); 526 } 527 } 528 529 /** 530 * Read the application XML schema namespace 531 * 532 * @param coordXmlElement input coordinator xml Element 533 * @return app xml namespace 534 * @throws CoordinatorJobException 535 */ 536 private String readAppNamespace(Element coordXmlElement) throws CoordinatorJobException { 537 Namespace ns = coordXmlElement.getNamespace(); 538 if (ns != null && bundleId != null && ns.getURI().equals(SchemaService.COORDINATOR_NAMESPACE_URI_1)) { 539 throw new CoordinatorJobException(ErrorCode.E1319, "bundle app can not submit coordinator namespace " 540 + SchemaService.COORDINATOR_NAMESPACE_URI_1 + ", please use 0.2 or later"); 541 } 542 if (ns != null) { 543 return ns.getURI(); 544 } 545 else { 546 throw new CoordinatorJobException(ErrorCode.E0700, "the application xml namespace is not given"); 547 } 548 } 549 550 /** 551 * Merge default configuration with user-defined configuration. 552 * 553 * @throws CommandException thrown if failed to read or merge configurations 554 */ 555 protected void mergeDefaultConfig() throws CommandException { 556 Path configDefault = null; 557 try { 558 String coordAppPathStr = conf.get(OozieClient.COORDINATOR_APP_PATH); 559 Path coordAppPath = new Path(coordAppPathStr); 560 String user = ParamChecker.notEmpty(conf.get(OozieClient.USER_NAME), OozieClient.USER_NAME); 561 HadoopAccessorService has = Services.get().get(HadoopAccessorService.class); 562 Configuration fsConf = has.createConfiguration(coordAppPath.toUri().getAuthority()); 563 FileSystem fs = has.createFileSystem(user, coordAppPath.toUri(), fsConf); 564 565 // app path could be a directory 566 if (!fs.isFile(coordAppPath)) { 567 configDefault = new Path(coordAppPath, CONFIG_DEFAULT); 568 } else { 569 configDefault = new Path(coordAppPath.getParent(), CONFIG_DEFAULT); 570 } 571 572 if (fs.exists(configDefault)) { 573 Configuration defaultConf = new XConfiguration(fs.open(configDefault)); 574 PropertiesUtils.checkDisallowedProperties(defaultConf, DISALLOWED_DEFAULT_PROPERTIES); 575 XConfiguration.injectDefaults(defaultConf, conf); 576 } 577 else { 578 LOG.info("configDefault Doesn't exist " + configDefault); 579 } 580 PropertiesUtils.checkDisallowedProperties(conf, DISALLOWED_USER_PROPERTIES); 581 582 // Resolving all variables in the job properties. 583 // This ensures the Hadoop Configuration semantics is preserved. 584 XConfiguration resolvedVarsConf = new XConfiguration(); 585 for (Map.Entry<String, String> entry : conf) { 586 resolvedVarsConf.set(entry.getKey(), conf.get(entry.getKey())); 587 } 588 conf = resolvedVarsConf; 589 } 590 catch (IOException e) { 591 throw new CommandException(ErrorCode.E0702, e.getMessage() + " : Problem reading default config " 592 + configDefault, e); 593 } 594 catch (HadoopAccessorException e) { 595 throw new CommandException(e); 596 } 597 LOG.debug("Merged CONF :" + XmlUtils.prettyPrint(conf).toString()); 598 } 599 600 /** 601 * The method resolve all the variables that are defined in configuration. It also include the data set definition 602 * from dataset file into XML. 603 * 604 * @param appXml : Original job XML 605 * @param conf : Configuration of the job 606 * @param coordJob : Coordinator job bean to be populated. 607 * @return Resolved and modified job XML element. 608 * @throws CoordinatorJobException thrown if failed to resolve basic entities or include referred datasets 609 * @throws Exception thrown if failed to resolve basic entities or include referred datasets 610 */ 611 public Element basicResolveAndIncludeDS(String appXml, Configuration conf, CoordinatorJobBean coordJob) 612 throws CoordinatorJobException, Exception { 613 Element basicResolvedApp = resolveInitial(conf, appXml, coordJob); 614 includeDataSets(basicResolvedApp, conf); 615 return basicResolvedApp; 616 } 617 618 /** 619 * Insert data set into data-in and data-out tags. 620 * 621 * @param eAppXml : coordinator application XML 622 * @param eDatasets : DataSet XML 623 */ 624 @SuppressWarnings("unchecked") 625 private void insertDataSet(Element eAppXml, Element eDatasets) { 626 // Adding DS definition in the coordinator XML 627 Element inputList = eAppXml.getChild("input-events", eAppXml.getNamespace()); 628 if (inputList != null) { 629 for (Element dataIn : (List<Element>) inputList.getChildren("data-in", eAppXml.getNamespace())) { 630 Element eDataset = findDataSet(eDatasets, dataIn.getAttributeValue("dataset")); 631 dataIn.getContent().add(0, eDataset); 632 } 633 } 634 Element outputList = eAppXml.getChild("output-events", eAppXml.getNamespace()); 635 if (outputList != null) { 636 for (Element dataOut : (List<Element>) outputList.getChildren("data-out", eAppXml.getNamespace())) { 637 Element eDataset = findDataSet(eDatasets, dataOut.getAttributeValue("dataset")); 638 dataOut.getContent().add(0, eDataset); 639 } 640 } 641 } 642 643 /** 644 * Find a specific dataset from a list of Datasets. 645 * 646 * @param eDatasets : List of data sets 647 * @param name : queried data set name 648 * @return one Dataset element. otherwise throw Exception 649 */ 650 @SuppressWarnings("unchecked") 651 private static Element findDataSet(Element eDatasets, String name) { 652 for (Element eDataset : (List<Element>) eDatasets.getChildren("dataset", eDatasets.getNamespace())) { 653 if (eDataset.getAttributeValue("name").equals(name)) { 654 eDataset = (Element) eDataset.clone(); 655 eDataset.detach(); 656 return eDataset; 657 } 658 } 659 throw new RuntimeException("undefined dataset: " + name); 660 } 661 662 /** 663 * Initialize all the required EL Evaluators. 664 */ 665 protected void initEvaluators() { 666 evalFreq = CoordELEvaluator.createELEvaluatorForGroup(conf, "coord-job-submit-freq"); 667 evalNofuncs = CoordELEvaluator.createELEvaluatorForGroup(conf, "coord-job-submit-nofuncs"); 668 evalInst = CoordELEvaluator.createELEvaluatorForGroup(conf, "coord-job-submit-instances"); 669 evalAction = CoordELEvaluator.createELEvaluatorForGroup(conf, "coord-action-start"); 670 evalTimeout = CoordELEvaluator.createELEvaluatorForGroup(conf, "coord-job-wait-timeout"); 671 evalInitialInstance = CoordELEvaluator.createELEvaluatorForGroup(conf, "coord-job-submit-initial-instance"); 672 673 } 674 675 /** 676 * Resolve basic entities using job Configuration. 677 * 678 * @param conf :Job configuration 679 * @param appXml : Original job XML 680 * @param coordJob : Coordinator job bean to be populated. 681 * @return Resolved job XML element. 682 * @throws CoordinatorJobException thrown if failed to resolve basic entities 683 * @throws Exception thrown if failed to resolve basic entities 684 */ 685 @SuppressWarnings("unchecked") 686 protected Element resolveInitial(Configuration conf, String appXml, CoordinatorJobBean coordJob) 687 throws CoordinatorJobException, Exception { 688 Element eAppXml = XmlUtils.parseXml(appXml); 689 // job's main attributes 690 // frequency 691 String val = resolveAttribute("frequency", eAppXml, evalFreq); 692 int ival = 0; 693 694 val = ParamChecker.checkFrequency(val); 695 coordJob.setFrequency(val); 696 TimeUnit tmp = (evalFreq.getVariable("timeunit") == null) ? TimeUnit.MINUTE : ((TimeUnit) evalFreq 697 .getVariable("timeunit")); 698 try { 699 Integer.parseInt(val); 700 } 701 catch (NumberFormatException ex) { 702 tmp=TimeUnit.CRON; 703 } 704 705 addAnAttribute("freq_timeunit", eAppXml, tmp.toString()); 706 // TimeUnit 707 coordJob.setTimeUnit(CoordinatorJob.Timeunit.valueOf(tmp.toString())); 708 // End Of Duration 709 tmp = evalFreq.getVariable("endOfDuration") == null ? TimeUnit.NONE : ((TimeUnit) evalFreq 710 .getVariable("endOfDuration")); 711 addAnAttribute("end_of_duration", eAppXml, tmp.toString()); 712 // coordJob.setEndOfDuration(tmp) // TODO: Add new attribute in Job bean 713 714 // Application name 715 if (this.coordName == null) { 716 String name = ELUtils.resolveAppName(eAppXml.getAttribute("name").getValue(), conf); 717 coordJob.setAppName(name); 718 } 719 else { 720 // this coord job is created from bundle 721 coordJob.setAppName(this.coordName); 722 } 723 724 // start time 725 val = resolveAttribute("start", eAppXml, evalNofuncs); 726 ParamChecker.checkDateOozieTZ(val, "start"); 727 coordJob.setStartTime(DateUtils.parseDateOozieTZ(val)); 728 // end time 729 val = resolveAttribute("end", eAppXml, evalNofuncs); 730 ParamChecker.checkDateOozieTZ(val, "end"); 731 coordJob.setEndTime(DateUtils.parseDateOozieTZ(val)); 732 // Time zone 733 val = resolveAttribute("timezone", eAppXml, evalNofuncs); 734 ParamChecker.checkTimeZone(val, "timezone"); 735 coordJob.setTimeZone(val); 736 737 // controls 738 val = resolveTagContents("timeout", eAppXml.getChild("controls", eAppXml.getNamespace()), evalTimeout); 739 if (val != null && val != "") { 740 int t = Integer.parseInt(val); 741 tmp = (evalTimeout.getVariable("timeunit") == null) ? TimeUnit.MINUTE : ((TimeUnit) evalTimeout 742 .getVariable("timeunit")); 743 switch (tmp) { 744 case HOUR: 745 val = String.valueOf(t * 60); 746 break; 747 case DAY: 748 val = String.valueOf(t * 60 * 24); 749 break; 750 case MONTH: 751 val = String.valueOf(t * 60 * 24 * 30); 752 break; 753 default: 754 break; 755 } 756 } 757 else { 758 val = ConfigurationService.get(CONF_DEFAULT_TIMEOUT_NORMAL); 759 } 760 761 ival = ParamChecker.checkInteger(val, "timeout"); 762 if (ival < 0 || ival > ConfigurationService.getInt(CONF_DEFAULT_MAX_TIMEOUT)) { 763 ival = ConfigurationService.getInt(CONF_DEFAULT_MAX_TIMEOUT); 764 } 765 coordJob.setTimeout(ival); 766 767 val = resolveTagContents("concurrency", eAppXml.getChild("controls", eAppXml.getNamespace()), evalNofuncs); 768 if (val == null || val.isEmpty()) { 769 val = ConfigurationService.get(CONF_DEFAULT_CONCURRENCY); 770 } 771 ival = ParamChecker.checkInteger(val, "concurrency"); 772 coordJob.setConcurrency(ival); 773 774 val = resolveTagContents("throttle", eAppXml.getChild("controls", eAppXml.getNamespace()), evalNofuncs); 775 if (val == null || val.isEmpty()) { 776 int defaultThrottle = ConfigurationService.getInt(CONF_DEFAULT_THROTTLE); 777 ival = defaultThrottle; 778 } 779 else { 780 ival = ParamChecker.checkInteger(val, "throttle"); 781 } 782 int maxQueue = ConfigurationService.getInt(CONF_QUEUE_SIZE); 783 float factor = ConfigurationService.getFloat(CONF_MAT_THROTTLING_FACTOR); 784 int maxThrottle = (int) (maxQueue * factor); 785 if (ival > maxThrottle || ival < 1) { 786 ival = maxThrottle; 787 } 788 LOG.debug("max throttle " + ival); 789 coordJob.setMatThrottling(ival); 790 791 val = resolveTagContents("execution", eAppXml.getChild("controls", eAppXml.getNamespace()), evalNofuncs); 792 if (val == "") { 793 val = Execution.FIFO.toString(); 794 } 795 coordJob.setExecutionOrder(Execution.valueOf(val)); 796 String[] acceptedVals = { Execution.LIFO.toString(), Execution.FIFO.toString(), Execution.LAST_ONLY.toString(), 797 Execution.NONE.toString()}; 798 ParamChecker.isMember(val, acceptedVals, "execution"); 799 800 // datasets 801 resolveTagContents("include", eAppXml.getChild("datasets", eAppXml.getNamespace()), evalNofuncs); 802 // for each data set 803 resolveDataSets(eAppXml); 804 HashMap<String, String> dataNameList = new HashMap<String, String>(); 805 resolveIODataset(eAppXml); 806 resolveIOEvents(eAppXml, dataNameList); 807 808 if (CoordUtils.isInputLogicSpecified(eAppXml)) { 809 resolveInputLogic(eAppXml.getChild(CoordInputLogicEvaluator.INPUT_LOGIC, eAppXml.getNamespace()), evalInst, 810 dataNameList); 811 } 812 813 resolveTagContents("app-path", eAppXml.getChild("action", eAppXml.getNamespace()).getChild("workflow", 814 eAppXml.getNamespace()), evalNofuncs); 815 // TODO: If action or workflow tag is missing, NullPointerException will 816 // occur 817 Element configElem = eAppXml.getChild("action", eAppXml.getNamespace()).getChild("workflow", 818 eAppXml.getNamespace()).getChild("configuration", eAppXml.getNamespace()); 819 evalData = CoordELEvaluator.createELEvaluatorForDataEcho(conf, "coord-job-submit-data", dataNameList); 820 if (configElem != null) { 821 for (Element propElem : (List<Element>) configElem.getChildren("property", configElem.getNamespace())) { 822 resolveTagContents("name", propElem, evalData); 823 // Want to check the data-integrity but don't want to modify the 824 // XML 825 // for properties only 826 Element tmpProp = (Element) propElem.clone(); 827 resolveTagContents("value", tmpProp, evalData); 828 } 829 } 830 evalSla = CoordELEvaluator.createELEvaluatorForDataAndConf(conf, "coord-sla-submit", dataNameList); 831 resolveSLA(eAppXml, coordJob); 832 return eAppXml; 833 } 834 835 /** 836 * Resolve SLA events 837 * 838 * @param eAppXml job XML 839 * @param coordJob coordinator job bean 840 * @throws CommandException thrown if failed to resolve sla events 841 */ 842 private void resolveSLA(Element eAppXml, CoordinatorJobBean coordJob) throws CommandException { 843 Element eSla = XmlUtils.getSLAElement(eAppXml.getChild("action", eAppXml.getNamespace())); 844 845 if (eSla != null) { 846 resolveSLAContent(eSla); 847 String slaXml = XmlUtils.prettyPrint(eSla).toString(); 848 try { 849 // EL evaluation 850 slaXml = evalSla.evaluate(slaXml, String.class); 851 // Validate against semantic SXD 852 XmlUtils.validateData(slaXml, SchemaName.SLA_ORIGINAL); 853 } 854 catch (Exception e) { 855 throw new CommandException(ErrorCode.E1004, "Validation ERROR :" + e.getMessage(), e); 856 } 857 } 858 } 859 860 /** 861 * Resolve an SLA value. 862 * 863 * @param elem : XML Element where attribute is defiend 864 */ 865 private void resolveSLAContent(Element elem) { 866 for (Element tagElem : (List<Element>) elem.getChildren()) { 867 if (tagElem != null) { 868 try { 869 String val = CoordELFunctions.evalAndWrap(evalNofuncs, tagElem.getText().trim()); 870 tagElem.setText(val); 871 } 872 catch (Exception e) { 873 LOG.warn("Variable is not defined in job.properties. Here is the message: {0}", e.getMessage()); 874 continue; 875 } 876 } 877 } 878 } 879 880 /** 881 * Resolve input-events/data-in and output-events/data-out tags. 882 * 883 * @param eJobOrg : Job element 884 * @throws CoordinatorJobException thrown if failed to resolve input and output events 885 */ 886 @SuppressWarnings("unchecked") 887 private void resolveIOEvents(Element eJobOrg, HashMap<String, String> dataNameList) throws CoordinatorJobException { 888 // Resolving input-events/data-in 889 // Clone the job and don't update anything in the original 890 Element eJob = (Element) eJobOrg.clone(); 891 Element inputList = eJob.getChild("input-events", eJob.getNamespace()); 892 if (inputList != null) { 893 TreeSet<String> eventNameSet = new TreeSet<String>(); 894 for (Element dataIn : (List<Element>) inputList.getChildren("data-in", eJob.getNamespace())) { 895 String dataInName = dataIn.getAttributeValue("name"); 896 dataNameList.put(dataInName, "data-in"); 897 // check whether there is any duplicate data-in name 898 if (eventNameSet.contains(dataInName)) { 899 throw new RuntimeException("Duplicate dataIn name " + dataInName); 900 } 901 else { 902 eventNameSet.add(dataInName); 903 } 904 resolveTagContents("instance", dataIn, evalInst); 905 resolveTagContents("start-instance", dataIn, evalInst); 906 resolveTagContents("end-instance", dataIn, evalInst); 907 908 } 909 } 910 // Resolving output-events/data-out 911 Element outputList = eJob.getChild("output-events", eJob.getNamespace()); 912 if (outputList != null) { 913 TreeSet<String> eventNameSet = new TreeSet<String>(); 914 for (Element dataOut : (List<Element>) outputList.getChildren("data-out", eJob.getNamespace())) { 915 String dataOutName = dataOut.getAttributeValue("name"); 916 dataNameList.put(dataOutName, "data-out"); 917 // check whether there is any duplicate data-out name 918 if (eventNameSet.contains(dataOutName)) { 919 throw new RuntimeException("Duplicate dataIn name " + dataOutName); 920 } 921 else { 922 eventNameSet.add(dataOutName); 923 } 924 resolveTagContents("instance", dataOut, evalInst); 925 926 } 927 } 928 929 } 930 931 private void resolveInputLogic(Element root, ELEvaluator evalInputLogic, HashMap<String, String> dataNameList) 932 throws Exception { 933 for (Object event : root.getChildren()) { 934 Element inputElement = (Element) event; 935 resolveAttribute("dataset", inputElement, evalInputLogic); 936 String name=resolveAttribute("name", inputElement, evalInputLogic); 937 resolveAttribute("or", inputElement, evalInputLogic); 938 resolveAttribute("and", inputElement, evalInputLogic); 939 resolveAttribute("combine", inputElement, evalInputLogic); 940 941 if (name != null) { 942 dataNameList.put(name, "data-in"); 943 } 944 945 if (!inputElement.getChildren().isEmpty()) { 946 resolveInputLogic(inputElement, evalInputLogic, dataNameList); 947 } 948 } 949 } 950 951 /** 952 * Resolve input-events/dataset and output-events/dataset tags. 953 * 954 * @param eJob : Job element 955 * @throws CoordinatorJobException thrown if failed to resolve input and output events 956 */ 957 @SuppressWarnings("unchecked") 958 private void resolveIODataset(Element eAppXml) throws CoordinatorJobException { 959 // Resolving input-events/data-in 960 Element inputList = eAppXml.getChild("input-events", eAppXml.getNamespace()); 961 if (inputList != null) { 962 for (Element dataIn : (List<Element>) inputList.getChildren("data-in", eAppXml.getNamespace())) { 963 resolveAttribute("dataset", dataIn, evalInst); 964 965 } 966 } 967 // Resolving output-events/data-out 968 Element outputList = eAppXml.getChild("output-events", eAppXml.getNamespace()); 969 if (outputList != null) { 970 for (Element dataOut : (List<Element>) outputList.getChildren("data-out", eAppXml.getNamespace())) { 971 resolveAttribute("dataset", dataOut, evalInst); 972 973 } 974 } 975 976 } 977 978 979 /** 980 * Add an attribute into XML element. 981 * 982 * @param attrName :attribute name 983 * @param elem : Element to add attribute 984 * @param value :Value of attribute 985 */ 986 private void addAnAttribute(String attrName, Element elem, String value) { 987 elem.setAttribute(attrName, value); 988 } 989 990 /** 991 * Resolve datasets using job configuration. 992 * 993 * @param eAppXml : Job Element XML 994 * @throws Exception thrown if failed to resolve datasets 995 */ 996 @SuppressWarnings("unchecked") 997 private void resolveDataSets(Element eAppXml) throws Exception { 998 Element datasetList = eAppXml.getChild("datasets", eAppXml.getNamespace()); 999 if (datasetList != null) { 1000 1001 List<Element> dsElems = datasetList.getChildren("dataset", eAppXml.getNamespace()); 1002 resolveDataSets(dsElems); 1003 resolveTagContents("app-path", eAppXml.getChild("action", eAppXml.getNamespace()).getChild("workflow", 1004 eAppXml.getNamespace()), evalNofuncs); 1005 } 1006 } 1007 1008 /** 1009 * Resolve datasets using job configuration. 1010 * 1011 * @param dsElems : Data set XML element. 1012 * @throws CoordinatorJobException thrown if failed to resolve datasets 1013 */ 1014 private void resolveDataSets(List<Element> dsElems) throws CoordinatorJobException { 1015 for (Element dsElem : dsElems) { 1016 // Setting up default TimeUnit and EndOFDuraion 1017 evalFreq.setVariable("timeunit", TimeUnit.MINUTE); 1018 evalFreq.setVariable("endOfDuration", TimeUnit.NONE); 1019 1020 String val = resolveAttribute("frequency", dsElem, evalFreq); 1021 int ival = ParamChecker.checkInteger(val, "frequency"); 1022 ParamChecker.checkGTZero(ival, "frequency"); 1023 addAnAttribute("freq_timeunit", dsElem, evalFreq.getVariable("timeunit") == null ? TimeUnit.MINUTE 1024 .toString() : ((TimeUnit) evalFreq.getVariable("timeunit")).toString()); 1025 addAnAttribute("end_of_duration", dsElem, evalFreq.getVariable("endOfDuration") == null ? TimeUnit.NONE 1026 .toString() : ((TimeUnit) evalFreq.getVariable("endOfDuration")).toString()); 1027 val = resolveAttribute("initial-instance", dsElem, evalInitialInstance); 1028 ParamChecker.checkDateOozieTZ(val, "initial-instance"); 1029 checkInitialInstance(val); 1030 val = resolveAttribute("timezone", dsElem, evalNofuncs); 1031 ParamChecker.checkTimeZone(val, "timezone"); 1032 resolveTagContents("uri-template", dsElem, evalNofuncs); 1033 resolveTagContents("done-flag", dsElem, evalNofuncs); 1034 } 1035 } 1036 1037 /** 1038 * Resolve the content of a tag. 1039 * 1040 * @param tagName : Tag name of job XML i.e. <timeout> 10 </timeout> 1041 * @param elem : Element where the tag exists. 1042 * @param eval : EL evealuator 1043 * @return Resolved tag content. 1044 * @throws CoordinatorJobException thrown if failed to resolve tag content 1045 */ 1046 @SuppressWarnings("unchecked") 1047 private String resolveTagContents(String tagName, Element elem, ELEvaluator eval) throws CoordinatorJobException { 1048 String ret = ""; 1049 if (elem != null) { 1050 for (Element tagElem : (List<Element>) elem.getChildren(tagName, elem.getNamespace())) { 1051 if (tagElem != null) { 1052 String updated; 1053 try { 1054 updated = CoordELFunctions.evalAndWrap(eval, tagElem.getText().trim()); 1055 1056 } 1057 catch (Exception e) { 1058 throw new CoordinatorJobException(ErrorCode.E1004, e.getMessage(), e); 1059 } 1060 tagElem.removeContent(); 1061 tagElem.addContent(updated); 1062 ret += updated; 1063 } 1064 } 1065 } 1066 return ret; 1067 } 1068 1069 /** 1070 * Resolve an attribute value. 1071 * 1072 * @param attrName : Attribute name. 1073 * @param elem : XML Element where attribute is defiend 1074 * @param eval : ELEvaluator used to resolve 1075 * @return Resolved attribute value 1076 * @throws CoordinatorJobException thrown if failed to resolve an attribute value 1077 */ 1078 private String resolveAttribute(String attrName, Element elem, ELEvaluator eval) throws CoordinatorJobException { 1079 Attribute attr = elem.getAttribute(attrName); 1080 String val = null; 1081 if (attr != null) { 1082 try { 1083 val = CoordELFunctions.evalAndWrap(eval, attr.getValue().trim()); 1084 } 1085 catch (Exception e) { 1086 throw new CoordinatorJobException(ErrorCode.E1004, e.getMessage(), e); 1087 } 1088 attr.setValue(val); 1089 } 1090 return val; 1091 } 1092 1093 /** 1094 * Include referred datasets into XML. 1095 * 1096 * @param resolvedXml : Job XML element. 1097 * @param conf : Job configuration 1098 * @throws CoordinatorJobException thrown if failed to include referred datasets into XML 1099 */ 1100 @SuppressWarnings("unchecked") 1101 protected void includeDataSets(Element resolvedXml, Configuration conf) throws CoordinatorJobException { 1102 Element datasets = resolvedXml.getChild("datasets", resolvedXml.getNamespace()); 1103 Element allDataSets = new Element("all_datasets", resolvedXml.getNamespace()); 1104 List<String> dsList = new ArrayList<String>(); 1105 if (datasets != null) { 1106 for (Element includeElem : (List<Element>) datasets.getChildren("include", datasets.getNamespace())) { 1107 String incDSFile = includeElem.getTextTrim(); 1108 includeOneDSFile(incDSFile, dsList, allDataSets, datasets.getNamespace()); 1109 } 1110 for (Element e : (List<Element>) datasets.getChildren("dataset", datasets.getNamespace())) { 1111 String dsName = e.getAttributeValue("name"); 1112 if (dsList.contains(dsName)) {// Override with this DS 1113 // Remove duplicate 1114 removeDataSet(allDataSets, dsName); 1115 } 1116 else { 1117 dsList.add(dsName); 1118 } 1119 allDataSets.addContent((Element) e.clone()); 1120 } 1121 } 1122 insertDataSet(resolvedXml, allDataSets); 1123 resolvedXml.removeChild("datasets", resolvedXml.getNamespace()); 1124 } 1125 1126 /** 1127 * Include one dataset file. 1128 * 1129 * @param incDSFile : Include data set filename. 1130 * @param dsList :List of dataset names to verify the duplicate. 1131 * @param allDataSets : Element that includes all dataset definitions. 1132 * @param dsNameSpace : Data set name space 1133 * @throws CoordinatorJobException thrown if failed to include one dataset file 1134 */ 1135 @SuppressWarnings("unchecked") 1136 private void includeOneDSFile(String incDSFile, List<String> dsList, Element allDataSets, Namespace dsNameSpace) 1137 throws CoordinatorJobException { 1138 Element tmpDataSets = null; 1139 try { 1140 String dsXml = readDefinition(incDSFile); 1141 LOG.debug("DSFILE :" + incDSFile + "\n" + dsXml); 1142 tmpDataSets = XmlUtils.parseXml(dsXml); 1143 } 1144 catch (JDOMException e) { 1145 LOG.warn("Error parsing included dataset [{0}]. Message [{1}]", incDSFile, e.getMessage()); 1146 throw new CoordinatorJobException(ErrorCode.E0700, e.getMessage()); 1147 } 1148 resolveDataSets(tmpDataSets.getChildren("dataset")); 1149 for (Element e : (List<Element>) tmpDataSets.getChildren("dataset")) { 1150 String dsName = e.getAttributeValue("name"); 1151 if (dsList.contains(dsName)) { 1152 throw new RuntimeException("Duplicate Dataset " + dsName); 1153 } 1154 dsList.add(dsName); 1155 Element tmp = (Element) e.clone(); 1156 // TODO: Don't like to over-write the external/include DS's namespace 1157 tmp.setNamespace(dsNameSpace); 1158 tmp.getChild("uri-template").setNamespace(dsNameSpace); 1159 if (e.getChild("done-flag") != null) { 1160 tmp.getChild("done-flag").setNamespace(dsNameSpace); 1161 } 1162 allDataSets.addContent(tmp); 1163 } 1164 // nested include 1165 for (Element includeElem : (List<Element>) tmpDataSets.getChildren("include", tmpDataSets.getNamespace())) { 1166 String incFile = includeElem.getTextTrim(); 1167 includeOneDSFile(incFile, dsList, allDataSets, dsNameSpace); 1168 } 1169 } 1170 1171 /** 1172 * Remove a dataset from a list of dataset. 1173 * 1174 * @param eDatasets : List of dataset 1175 * @param name : Dataset name to be removed. 1176 */ 1177 @SuppressWarnings("unchecked") 1178 private static void removeDataSet(Element eDatasets, String name) { 1179 for (Element eDataset : (List<Element>) eDatasets.getChildren("dataset", eDatasets.getNamespace())) { 1180 if (eDataset.getAttributeValue("name").equals(name)) { 1181 eDataset.detach(); 1182 return; 1183 } 1184 } 1185 throw new RuntimeException("undefined dataset: " + name); 1186 } 1187 1188 /** 1189 * Read coordinator definition. 1190 * 1191 * @param appPath application path. 1192 * @return coordinator definition. 1193 * @throws CoordinatorJobException thrown if the definition could not be read. 1194 */ 1195 protected String readDefinition(String appPath) throws CoordinatorJobException { 1196 String user = ParamChecker.notEmpty(conf.get(OozieClient.USER_NAME), OozieClient.USER_NAME); 1197 // Configuration confHadoop = CoordUtils.getHadoopConf(conf); 1198 try { 1199 URI uri = new URI(appPath); 1200 LOG.debug("user =" + user); 1201 HadoopAccessorService has = Services.get().get(HadoopAccessorService.class); 1202 Configuration fsConf = has.createConfiguration(uri.getAuthority()); 1203 FileSystem fs = has.createFileSystem(user, uri, fsConf); 1204 Path appDefPath = null; 1205 1206 // app path could be a directory 1207 Path path = new Path(uri.getPath()); 1208 // check file exists for dataset include file, app xml already checked 1209 if (!fs.exists(path)) { 1210 throw new URISyntaxException(path.toString(), "path not existed : " + path.toString()); 1211 } 1212 if (!fs.isFile(path)) { 1213 appDefPath = new Path(path, COORDINATOR_XML_FILE); 1214 } else { 1215 appDefPath = path; 1216 } 1217 1218 Reader reader = new InputStreamReader(fs.open(appDefPath)); 1219 StringWriter writer = new StringWriter(); 1220 IOUtils.copyCharStream(reader, writer); 1221 return writer.toString(); 1222 } 1223 catch (IOException ex) { 1224 LOG.warn("IOException :" + XmlUtils.prettyPrint(conf), ex); 1225 throw new CoordinatorJobException(ErrorCode.E1001, ex.getMessage(), ex); 1226 } 1227 catch (URISyntaxException ex) { 1228 LOG.warn("URISyException :" + ex.getMessage()); 1229 throw new CoordinatorJobException(ErrorCode.E1002, appPath, ex.getMessage(), ex); 1230 } 1231 catch (HadoopAccessorException ex) { 1232 throw new CoordinatorJobException(ex); 1233 } 1234 catch (Exception ex) { 1235 LOG.warn("Exception :", ex); 1236 throw new CoordinatorJobException(ErrorCode.E1001, ex.getMessage(), ex); 1237 } 1238 } 1239 1240 /** 1241 * Write a coordinator job into database 1242 * 1243 *@param appXML : Coordinator definition xml 1244 * @param eJob : XML element of job 1245 * @param coordJob : Coordinator job bean 1246 * @return Job id 1247 * @throws CommandException thrown if unable to save coordinator job to db 1248 */ 1249 protected String storeToDB(String appXML, Element eJob, CoordinatorJobBean coordJob) throws CommandException { 1250 String jobId = Services.get().get(UUIDService.class).generateId(ApplicationType.COORDINATOR); 1251 coordJob.setId(jobId); 1252 1253 coordJob.setAppPath(conf.get(OozieClient.COORDINATOR_APP_PATH)); 1254 coordJob.setCreatedTime(new Date()); 1255 coordJob.setUser(conf.get(OozieClient.USER_NAME)); 1256 String group = ConfigUtils.getWithDeprecatedCheck(conf, OozieClient.JOB_ACL, OozieClient.GROUP_NAME, null); 1257 coordJob.setGroup(group); 1258 coordJob.setConf(XmlUtils.prettyPrint(conf).toString()); 1259 coordJob.setJobXml(XmlUtils.prettyPrint(eJob).toString()); 1260 coordJob.setLastActionNumber(0); 1261 coordJob.setLastModifiedTime(new Date()); 1262 1263 if (!dryrun) { 1264 coordJob.setLastModifiedTime(new Date()); 1265 try { 1266 CoordJobQueryExecutor.getInstance().insert(coordJob); 1267 } 1268 catch (JPAExecutorException jpaee) { 1269 coordJob.setId(null); 1270 coordJob.setStatus(CoordinatorJob.Status.FAILED); 1271 throw new CommandException(jpaee); 1272 } 1273 } 1274 return jobId; 1275 } 1276 1277 /* 1278 * this method checks if the initial-instance specified for a particular 1279 is not a date earlier than the oozie server default Jan 01, 1970 00:00Z UTC 1280 */ 1281 private void checkInitialInstance(String val) throws CoordinatorJobException, IllegalArgumentException { 1282 Date initialInstance, givenInstance; 1283 try { 1284 initialInstance = DateUtils.parseDateUTC("1970-01-01T00:00Z"); 1285 givenInstance = DateUtils.parseDateOozieTZ(val); 1286 } 1287 catch (Exception e) { 1288 throw new IllegalArgumentException("Unable to parse dataset initial-instance string '" + val + 1289 "' to Date object. ",e); 1290 } 1291 if(givenInstance.compareTo(initialInstance) < 0) { 1292 throw new CoordinatorJobException(ErrorCode.E1021, "Dataset initial-instance " + val + 1293 " is earlier than the default initial instance " + DateUtils.formatDateOozieTZ(initialInstance)); 1294 } 1295 } 1296 1297 /* (non-Javadoc) 1298 * @see org.apache.oozie.command.XCommand#getEntityKey() 1299 */ 1300 @Override 1301 public String getEntityKey() { 1302 return null; 1303 } 1304 1305 /* (non-Javadoc) 1306 * @see org.apache.oozie.command.XCommand#isLockRequired() 1307 */ 1308 @Override 1309 protected boolean isLockRequired() { 1310 return false; 1311 } 1312 1313 /* (non-Javadoc) 1314 * @see org.apache.oozie.command.XCommand#loadState() 1315 */ 1316 @Override 1317 protected void loadState() throws CommandException { 1318 jpaService = Services.get().get(JPAService.class); 1319 if (jpaService == null) { 1320 throw new CommandException(ErrorCode.E0610); 1321 } 1322 coordJob = new CoordinatorJobBean(); 1323 if (this.bundleId != null) { 1324 // this coord job is created from bundle 1325 coordJob.setBundleId(this.bundleId); 1326 // first use bundle id if submit thru bundle 1327 LogUtils.setLogInfo(this.bundleId); 1328 } 1329 if (this.coordName != null) { 1330 // this coord job is created from bundle 1331 coordJob.setAppName(this.coordName); 1332 } 1333 setJob(coordJob); 1334 1335 } 1336 1337 /* (non-Javadoc) 1338 * @see org.apache.oozie.command.XCommand#verifyPrecondition() 1339 */ 1340 @Override 1341 protected void verifyPrecondition() throws CommandException { 1342 1343 } 1344 1345 /* (non-Javadoc) 1346 * @see org.apache.oozie.command.TransitionXCommand#notifyParent() 1347 */ 1348 @Override 1349 public void notifyParent() throws CommandException { 1350 // update bundle action 1351 if (coordJob.getBundleId() != null) { 1352 LOG.debug("Updating bundle record: " + coordJob.getBundleId() + " for coord id: " + coordJob.getId()); 1353 BundleStatusUpdateXCommand bundleStatusUpdate = new BundleStatusUpdateXCommand(coordJob, prevStatus); 1354 bundleStatusUpdate.call(); 1355 } 1356 } 1357 1358 /* (non-Javadoc) 1359 * @see org.apache.oozie.command.TransitionXCommand#updateJob() 1360 */ 1361 @Override 1362 public void updateJob() throws CommandException { 1363 } 1364 1365 /* (non-Javadoc) 1366 * @see org.apache.oozie.command.TransitionXCommand#getJob() 1367 */ 1368 @Override 1369 public Job getJob() { 1370 return coordJob; 1371 } 1372 1373 @Override 1374 public void performWrites() throws CommandException { 1375 } 1376}