001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *      http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.oozie.action.hadoop;
020
021import com.google.common.base.Strings;
022import org.apache.hadoop.conf.Configuration;
023import org.apache.hadoop.fs.Path;
024import org.apache.oozie.action.ActionExecutorException;
025import org.apache.oozie.service.ConfigurationService;
026import org.apache.oozie.service.Services;
027import org.apache.oozie.service.SparkConfigurationService;
028import org.jdom.Element;
029import org.jdom.Namespace;
030
031import java.io.IOException;
032import java.io.StringWriter;
033import java.util.ArrayList;
034import java.util.List;
035import java.util.Map;
036import java.util.Properties;
037
038public class SparkActionExecutor extends JavaActionExecutor {
039    public static final String SPARK_MAIN_CLASS_NAME = "org.apache.oozie.action.hadoop.SparkMain";
040    public static final String SPARK_MASTER = "oozie.spark.master";
041    public static final String SPARK_MODE = "oozie.spark.mode";
042    public static final String SPARK_OPTS = "oozie.spark.spark-opts";
043    public static final String SPARK_DEFAULT_OPTS = "oozie.spark.spark-default-opts";
044    public static final String SPARK_JOB_NAME = "oozie.spark.name";
045    public static final String SPARK_CLASS = "oozie.spark.class";
046    public static final String SPARK_JAR = "oozie.spark.jar";
047    public static final String MAPRED_CHILD_ENV = "mapred.child.env";
048    private static final String CONF_OOZIE_SPARK_SETUP_HADOOP_CONF_DIR = "oozie.action.spark.setup.hadoop.conf.dir";
049
050    public SparkActionExecutor() {
051        super("spark");
052    }
053
054    @Override
055    Configuration setupActionConf(Configuration actionConf, Context context, Element actionXml, Path appPath)
056            throws ActionExecutorException {
057        actionConf = super.setupActionConf(actionConf, context, actionXml, appPath);
058        Namespace ns = actionXml.getNamespace();
059
060        String master = actionXml.getChildTextTrim("master", ns);
061        actionConf.set(SPARK_MASTER, master);
062
063        String mode = actionXml.getChildTextTrim("mode", ns);
064        if (mode != null) {
065            actionConf.set(SPARK_MODE, mode);
066        }
067
068        String jobName = actionXml.getChildTextTrim("name", ns);
069        actionConf.set(SPARK_JOB_NAME, jobName);
070
071        String sparkClass = actionXml.getChildTextTrim("class", ns);
072        if (sparkClass != null) {
073            actionConf.set(SPARK_CLASS, sparkClass);
074        }
075
076        String jarLocation = actionXml.getChildTextTrim("jar", ns);
077        actionConf.set(SPARK_JAR, jarLocation);
078
079        if (master.startsWith("yarn")) {
080            String resourceManager = actionConf.get(HADOOP_YARN_RM);
081            Properties sparkConfig =
082                    Services.get().get(SparkConfigurationService.class).getSparkConfig(resourceManager);
083            if (!sparkConfig.isEmpty()) {
084                try (final StringWriter sw = new StringWriter()) {
085                    sparkConfig.store(sw, "Generated by Oozie server SparkActionExecutor");
086                    actionConf.set(SPARK_DEFAULT_OPTS, sw.toString());
087                } catch (IOException e) {
088                    LOG.warn("Could not propagate Spark default configuration!", e);
089                }
090            }
091        }
092        String sparkOpts = actionXml.getChildTextTrim("spark-opts", ns);
093        if (!Strings.isNullOrEmpty(sparkOpts)) {
094            actionConf.set(SPARK_OPTS, sparkOpts.toString().trim());
095        }
096
097        // Setting if SparkMain should setup hadoop config *-site.xml
098        boolean setupHadoopConf = actionConf.getBoolean(CONF_OOZIE_SPARK_SETUP_HADOOP_CONF_DIR,
099                ConfigurationService.getBoolean(CONF_OOZIE_SPARK_SETUP_HADOOP_CONF_DIR));
100        actionConf.setBoolean(CONF_OOZIE_SPARK_SETUP_HADOOP_CONF_DIR, setupHadoopConf);
101        return actionConf;
102    }
103
104    @Override
105    Configuration setupLauncherConf(Configuration conf, Element actionXml, Path appPath, Context context)
106            throws ActionExecutorException {
107        super.setupLauncherConf(conf, actionXml, appPath, context);
108
109        // Set SPARK_HOME environment variable on launcher job
110        // It is needed since pyspark client checks for it.
111        String sparkHome = "SPARK_HOME=.";
112        String mapredChildEnv = conf.get("oozie.launcher." + MAPRED_CHILD_ENV);
113
114        if (mapredChildEnv == null) {
115            conf.set(MAPRED_CHILD_ENV, sparkHome);
116            conf.set("oozie.launcher." + MAPRED_CHILD_ENV, sparkHome);
117        } else if (!mapredChildEnv.contains("SPARK_HOME")) {
118            conf.set(MAPRED_CHILD_ENV, mapredChildEnv + "," + sparkHome);
119            conf.set("oozie.launcher." + MAPRED_CHILD_ENV, mapredChildEnv + "," + sparkHome);
120        }
121        return conf;
122    }
123
124    @Override
125    public List<Class<?>> getLauncherClasses() {
126        List<Class<?>> classes = new ArrayList<Class<?>>();
127        try {
128            classes.add(Class.forName(SPARK_MAIN_CLASS_NAME));
129        } catch (ClassNotFoundException e) {
130            throw new RuntimeException("Class not found", e);
131        }
132        return classes;
133    }
134
135
136    /**
137     * Return the sharelib name for the action.
138     *
139     * @param actionXml
140     * @return returns <code>spark</code>.
141     */
142    @Override
143    protected String getDefaultShareLibName(Element actionXml) {
144        return "spark";
145    }
146
147    @Override
148    protected void addActionSpecificEnvVars(Map<String, String> env) {
149        env.put("SPARK_HOME", ".");
150    }
151
152    @Override
153    protected String getLauncherMain(Configuration launcherConf, Element actionXml) {
154        return launcherConf.get(LauncherAMUtils.CONF_OOZIE_ACTION_MAIN_CLASS, SPARK_MAIN_CLASS_NAME);
155    }
156
157    @Override
158    public String[] getShareLibFilesForActionConf() {
159        return new String[] { "hive-site.xml" };
160    }
161
162}