001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *      http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.oozie.coord;
020
021import java.net.URI;
022import java.net.URISyntaxException;
023import java.util.regex.Pattern;
024
025import org.apache.hadoop.conf.Configuration;
026import org.apache.oozie.DagELFunctions;
027import org.apache.oozie.client.WorkflowJob;
028import org.apache.oozie.dependency.URIHandler;
029import org.apache.oozie.service.ConfigurationService;
030import org.apache.oozie.service.Services;
031import org.apache.oozie.service.URIHandlerService;
032import org.apache.oozie.util.ELEvaluator;
033import org.apache.oozie.util.HCatURI;
034import org.apache.oozie.util.HCatURIParser;
035import org.apache.oozie.util.XLog;
036
037/**
038 * This class implements the EL function for HCat datasets in coordinator
039 */
040
041public class HCatELFunctions {
042    private static final Configuration EMPTY_CONF = new Configuration(true);
043    private static final String HCAT_URI_REGEX_CONFIG = ConfigurationService.get("oozie.hcat.uri.regex.pattern");
044    private static final Pattern HCAT_URI_PATTERN = Pattern.compile(HCAT_URI_REGEX_CONFIG);
045
046    enum EventType {
047        input, output
048    }
049
050    /* Workflow Parameterization EL functions */
051
052    /**
053     * Return true if partitions exists or false if not.
054     *
055     * @param uri hcatalog partition uri.
056     * @return <code>true</code> if the uri exists, <code>false</code> if it does not.
057     * @throws Exception
058     */
059    public static boolean hcat_exists(String uri) throws Exception {
060        URI hcatURI = new URI(uri);
061        URIHandlerService uriService = Services.get().get(URIHandlerService.class);
062        URIHandler handler = uriService.getURIHandler(hcatURI);
063        WorkflowJob workflow = DagELFunctions.getWorkflow();
064        String user = workflow.getUser();
065        return handler.exists(hcatURI, EMPTY_CONF, user);
066    }
067
068    /* Coord EL functions */
069
070    /**
071     * Echo the same EL function without evaluating anything
072     *
073     * @param dataInName
074     * @return the same EL function
075     */
076    public static String ph1_coord_databaseIn_echo(String dataInName) {
077        // Checking if the dataIn is correct?
078        isValidDataEvent(dataInName);
079        return echoUnResolved("databaseIn", "'" + dataInName + "'");
080    }
081
082    public static String ph1_coord_databaseOut_echo(String dataName) {
083        // Checking if the dataOut is correct?
084        isValidDataEvent(dataName);
085        return echoUnResolved("databaseOut", "'" + dataName + "'");
086    }
087
088    public static String ph1_coord_tableIn_echo(String dataName) {
089        // Checking if the dataIn is correct?
090        isValidDataEvent(dataName);
091        return echoUnResolved("tableIn", "'" + dataName + "'");
092    }
093
094    public static String ph1_coord_tableOut_echo(String dataName) {
095        // Checking if the dataOut is correct?
096        isValidDataEvent(dataName);
097        return echoUnResolved("tableOut", "'" + dataName + "'");
098    }
099
100    public static String ph1_coord_dataInPartitionFilter_echo(String dataInName, String type) {
101        // Checking if the dataIn/dataOut is correct?
102        isValidDataEvent(dataInName);
103        return echoUnResolved("dataInPartitionFilter", "'" + dataInName + "', '" + type + "'");
104    }
105
106    public static String ph1_coord_dataInPartitionMin_echo(String dataInName, String partition) {
107        // Checking if the dataIn/dataOut is correct?
108        isValidDataEvent(dataInName);
109        return echoUnResolved("dataInPartitionMin", "'" + dataInName + "', '" + partition + "'");
110    }
111
112    public static String ph1_coord_dataInPartitionMax_echo(String dataInName, String partition) {
113        // Checking if the dataIn/dataOut is correct?
114        isValidDataEvent(dataInName);
115        return echoUnResolved("dataInPartitionMax", "'" + dataInName + "', '" + partition + "'");
116    }
117
118    public static String ph1_coord_dataOutPartitions_echo(String dataOutName) {
119        // Checking if the dataIn/dataOut is correct?
120        isValidDataEvent(dataOutName);
121        return echoUnResolved("dataOutPartitions", "'" + dataOutName + "'");
122    }
123
124    public static String ph1_coord_dataInPartitions_echo(String dataInName, String type) {
125        // Checking if the dataIn/dataOut is correct?
126        isValidDataEvent(dataInName);
127        return echoUnResolved("dataInPartitions", "'" + dataInName + "', '" + type + "'");
128    }
129
130    public static String ph1_coord_dataOutPartitionValue_echo(String dataOutName, String partition) {
131        // Checking if the dataIn/dataOut is correct?
132        isValidDataEvent(dataOutName);
133        return echoUnResolved("dataOutPartitionValue", "'" + dataOutName + "', '" + partition + "'");
134    }
135
136    /**
137     * Extract the hcat DB name from the URI-template associate with
138     * 'dataInName'. Caller needs to specify the EL-evaluator level variable
139     * 'oozie.coord.el.dataset.bean' with synchronous dataset object
140     * (SyncCoordDataset)
141     *
142     * @param dataInName
143     * @return DB name
144     */
145    public static String ph3_coord_databaseIn(String dataInName) {
146        HCatURI hcatURI = getURIFromResolved(dataInName, EventType.input);
147        if (hcatURI != null) {
148            return hcatURI.getDb();
149        }
150        else {
151            return "";
152        }
153    }
154
155    /**
156     * Extract the hcat DB name from the URI-template associate with
157     * 'dataOutName'. Caller needs to specify the EL-evaluator level variable
158     * 'oozie.coord.el.dataset.bean' with synchronous dataset object
159     * (SyncCoordDataset)
160     *
161     * @param dataOutName
162     * @return DB name
163     */
164    public static String ph3_coord_databaseOut(String dataOutName) {
165        HCatURI hcatURI = getURIFromResolved(dataOutName, EventType.output);
166        if (hcatURI != null) {
167            return hcatURI.getDb();
168        }
169        else {
170            return "";
171        }
172    }
173
174    /**
175     * Extract the hcat Table name from the URI-template associate with
176     * 'dataInName'. Caller needs to specify the EL-evaluator level variable
177     * 'oozie.coord.el.dataset.bean' with synchronous dataset object
178     * (SyncCoordDataset)
179     *
180     * @param dataInName
181     * @return Table name
182     */
183    public static String ph3_coord_tableIn(String dataInName) {
184        HCatURI hcatURI = getURIFromResolved(dataInName, EventType.input);
185        if (hcatURI != null) {
186            return hcatURI.getTable();
187        }
188        else {
189            return "";
190        }
191    }
192
193    /**
194     * Extract the hcat Table name from the URI-template associate with
195     * 'dataOutName'. Caller needs to specify the EL-evaluator level variable
196     * 'oozie.coord.el.dataset.bean' with synchronous dataset object
197     * (SyncCoordDataset)
198     *
199     * @param dataOutName
200     * @return Table name
201     */
202    public static String ph3_coord_tableOut(String dataOutName) {
203        HCatURI hcatURI = getURIFromResolved(dataOutName, EventType.output);
204        if (hcatURI != null) {
205            return hcatURI.getTable();
206        }
207        else {
208            return "";
209        }
210    }
211
212    /**
213     * Used to specify the HCat partition filter which is input dependency for workflow job.<p> Look for two evaluator-level
214     * variables <p> A) .datain.&lt;DATAIN_NAME&gt; B) .datain.&lt;DATAIN_NAME&gt;.unresolved <p> A defines the current list of
215     * HCat URIs. <p> B defines whether there are any unresolved EL-function (i.e latest) <p> If there are something
216     * unresolved, this function will echo back the original function <p> otherwise it sends the partition filter.
217     *
218     * @param dataInName : Datain name
219     * @param type : for action type - pig, MR or hive
220     */
221    public static String ph3_coord_dataInPartitionFilter(String dataInName, String type) {
222        ELEvaluator eval = ELEvaluator.getCurrent();
223        String uris = (String) eval.getVariable(".datain." + dataInName);
224        Boolean unresolved = (Boolean) eval.getVariable(".datain." + dataInName + ".unresolved");
225        if (unresolved != null && unresolved.booleanValue() == true) {
226            return "${coord:dataInPartitionFilter('" + dataInName + "', '" + type + "')}";
227        }
228        return createPartitionFilter(uris, type);
229    }
230
231    /**
232     * Used to specify the HCat partition's value defining output for workflow job.<p> Look for two evaluator-level
233     * variables <p> A) .dataout.&lt;DATAOUT_NAME&gt; B) .dataout.&lt;DATAOUT_NAME&gt;.unresolved <p> A defines the current list of
234     * HCat URIs. <p> B defines whether there are any unresolved EL-function (i.e latest) <p> If there are something
235     * unresolved, this function will echo back the original function <p> otherwise it sends the partition value.
236     *
237     * @param dataOutName : Dataout name
238     * @param partitionName : Specific partition name whose value is wanted
239     */
240    public static String ph3_coord_dataOutPartitionValue(String dataOutName, String partitionName) {
241        ELEvaluator eval = ELEvaluator.getCurrent();
242        String uri = (String) eval.getVariable(".dataout." + dataOutName);
243        Boolean unresolved = (Boolean) eval.getVariable(".dataout." + dataOutName + ".unresolved");
244        if (unresolved != null && unresolved.booleanValue() == true) {
245            return "${coord:dataOutPartitionValue('" + dataOutName + "', '" + partitionName + "')}";
246        }
247        try {
248            HCatURI hcatUri = new HCatURI(uri);
249            return hcatUri.getPartitionValue(partitionName);
250        }
251        catch(URISyntaxException urie) {
252            XLog.getLog(HCatELFunctions.class).warn("Exception with uriTemplate [{0}]. Reason [{1}]: ", uri, urie);
253            throw new RuntimeException("HCat URI can't be parsed " + urie);
254        }
255    }
256
257    /**
258     * Used to specify the entire HCat partition defining output for workflow job.<p> Look for two evaluator-level
259     * variables <p> A) .dataout.&lt;DATAOUT_NAME&gt; B) .dataout.&lt;DATAOUT_NAME&gt;.unresolved <p> A defines the data-out
260     * HCat URI. <p> B defines whether there are any unresolved EL-function (i.e latest) <p> If there are something
261     * unresolved, this function will echo back the original function <p> otherwise it sends the partition.
262     *
263     * @param dataOutName : DataOut name
264     */
265    public static String ph3_coord_dataOutPartitions(String dataOutName) {
266        ELEvaluator eval = ELEvaluator.getCurrent();
267        String uri = (String) eval.getVariable(".dataout." + dataOutName);
268        Boolean unresolved = (Boolean) eval.getVariable(".dataout." + dataOutName + ".unresolved");
269        if (unresolved != null && unresolved.booleanValue() == true) {
270            return "${coord:dataOutPartitions('" + dataOutName + "')}";
271        }
272        try {
273            return new HCatURI(uri).toPartitionString();
274        }
275        catch (URISyntaxException e) {
276            throw new RuntimeException("Parsing exception for HCatURI " + uri + ". details: " + e);
277        }
278    }
279
280    /**
281     * Used to specify the entire HCat partition defining input for workflow job. <p> Look for two evaluator-level
282     * variables <p> A) .datain.&lt;DATAIN_NAME&gt; B) .datain.&lt;DATAIN_NAME&gt;.unresolved <p> A defines the data-in HCat URI.
283     * <p> B defines whether there are any unresolved EL-function (i.e latest) <p> If there are something unresolved,
284     * this function will echo back the original function <p> otherwise it sends the partition.
285     *
286     * @param dataInName : DataIn name
287     * @param type : for action type: hive-export
288     */
289    public static String ph3_coord_dataInPartitions(String dataInName, String type) {
290        ELEvaluator eval = ELEvaluator.getCurrent();
291        String uri = (String) eval.getVariable(".datain." + dataInName);
292        Boolean unresolved = (Boolean) eval.getVariable(".datain." + dataInName + ".unresolved");
293        if (unresolved != null && unresolved.booleanValue() == true) {
294            return "${coord:dataInPartitions('" + dataInName + "', '" + type + "')}";
295        }
296        String partitionValue = null;
297        if (uri != null) {
298            if (type.equals("hive-export")) {
299                String[] uriList = HCatURIParser.splitHCatUris(uri, HCAT_URI_PATTERN);
300                if (uriList.length > 1) {
301                    throw new RuntimeException("Multiple partitions not supported for hive-export type. Dataset name: "
302                        + dataInName + " URI: " + uri);
303                }
304                try {
305                    partitionValue = new HCatURI(uri).toPartitionValueString(type);
306                }
307                catch (URISyntaxException e) {
308                    throw new RuntimeException("Parsing exception for HCatURI " + uri, e);
309                }
310            } else {
311                  throw new RuntimeException("Unsupported type: " + type + " dataset name: " + dataInName);
312            }
313        }
314        else {
315            XLog.getLog(HCatELFunctions.class).warn("URI is null");
316            return null;
317        }
318        return partitionValue;
319    }
320
321    /**
322     * Used to specify the MAXIMUM value of an HCat partition which is input dependency for workflow job.
323     * <p> Look for two evaluator-level
324     * variables <p> A) .datain.&lt;DATAIN_NAME&gt; B) .datain.&lt;DATAIN_NAME&gt;.unresolved <p> A defines the current list of
325     * HCat URIs. <p> B defines whether there are any unresolved EL-function (i.e latest) <p> If there are something
326     * unresolved, this function will echo back the original function <p> otherwise it sends the max partition value.
327     *
328     * @param dataInName : Datain name
329     * @param partitionName : Specific partition name whose MAX value is wanted
330     */
331    public static String ph3_coord_dataInPartitionMin(String dataInName, String partitionName) {
332        ELEvaluator eval = ELEvaluator.getCurrent();
333        String uris = (String) eval.getVariable(".datain." + dataInName);
334        Boolean unresolved = (Boolean) eval.getVariable(".datain." + dataInName + ".unresolved");
335        if (unresolved != null && unresolved.booleanValue() == true) {
336            return "${coord:dataInPartitionMin('" + dataInName + "', '" + partitionName + "')}";
337        }
338        String minPartition = null;
339        if (uris != null) {
340            String[] uriList = HCatURIParser.splitHCatUris(uris, HCAT_URI_PATTERN);
341            // get the partition values list and find minimum
342            try {
343                // initialize minValue with first partition value
344                minPartition = new HCatURI(uriList[0]).getPartitionValue(partitionName);
345                if (minPartition == null || minPartition.isEmpty()) {
346                    throw new RuntimeException("No value in data-in uri for partition key: " + partitionName);
347                }
348                for (int i = 1; i < uriList.length; i++) {
349                        String value = new HCatURI(uriList[i]).getPartitionValue(partitionName);
350                        if(value.compareTo(minPartition) < 0) { //sticking to string comparison since some numerical date
351                                                                //values can also contain letters e.g. 20120101T0300Z (UTC)
352                            minPartition = value;
353                        }
354                }
355            }
356            catch(URISyntaxException urie) {
357                throw new RuntimeException("HCat URI can't be parsed " + urie);
358            }
359        }
360        else {
361            XLog.getLog(HCatELFunctions.class).warn("URI is null");
362            return null;
363        }
364        return minPartition;
365    }
366
367    /**
368     * Used to specify the MINIMUM value of an HCat partition which is input dependency for workflow job.
369     * <p> Look for two evaluator-level
370     * variables <p> A) .datain.&lt;DATAIN_NAME&gt; B) .datain.&lt;DATAIN_NAME&gt;.unresolved <p> A defines the current list of
371     * HCat URIs. <p> B defines whether there are any unresolved EL-function (i.e latest) <p> If there are something
372     * unresolved, this function will echo back the original function <p> otherwise it sends the min partition value.
373     *
374     * @param dataInName : Datain name
375     * @param partitionName : Specific partition name whose MIN value is wanted
376     */
377    public static String ph3_coord_dataInPartitionMax(String dataInName, String partitionName) {
378        ELEvaluator eval = ELEvaluator.getCurrent();
379        String uris = (String) eval.getVariable(".datain." + dataInName);
380        Boolean unresolved = (Boolean) eval.getVariable(".datain." + dataInName + ".unresolved");
381        if (unresolved != null && unresolved.booleanValue() == true) {
382            return "${coord:dataInPartitionMin('" + dataInName + "', '" + partitionName + "')}";
383        }
384        String maxPartition = null;
385        if (uris != null) {
386            String[] uriList = HCatURIParser.splitHCatUris(uris, HCAT_URI_PATTERN);
387            // get the partition values list and find minimum
388            try {
389                // initialize minValue with first partition value
390                maxPartition = new HCatURI(uriList[0]).getPartitionValue(partitionName);
391                if (maxPartition == null || maxPartition.isEmpty()) {
392                    throw new RuntimeException("No value in data-in uri for partition key: " + partitionName);
393                }
394                for(int i = 1; i < uriList.length; i++) {
395                        String value = new HCatURI(uriList[i]).getPartitionValue(partitionName);
396                        if(value.compareTo(maxPartition) > 0) {
397                            maxPartition = value;
398                        }
399                }
400            }
401            catch(URISyntaxException urie) {
402                throw new RuntimeException("HCat URI can't be parsed " + urie);
403            }
404        }
405        else {
406            XLog.getLog(HCatELFunctions.class).warn("URI is null");
407            return null;
408        }
409        return maxPartition;
410    }
411
412    private static String createPartitionFilter(String uris, String type) {
413        String[] uriList = HCatURIParser.splitHCatUris(uris, HCAT_URI_PATTERN);
414        StringBuilder filter = new StringBuilder("");
415        if (uriList.length > 0) {
416            for (String uri : uriList) {
417                if (filter.length() > 0) {
418                    filter.append(" OR ");
419                }
420                try {
421                    filter.append(new HCatURI(uri).toPartitionFilter(type));
422                }
423                catch (URISyntaxException e) {
424                    throw new RuntimeException("Parsing exception for HCatURI " + uri + ". details: " + e);
425                }
426            }
427        }
428        return filter.toString();
429    }
430
431    private static HCatURI getURIFromResolved(String dataInName, EventType type) {
432        final XLog LOG = XLog.getLog(HCatELFunctions.class);
433        StringBuilder uriTemplate = new StringBuilder();
434        ELEvaluator eval = ELEvaluator.getCurrent();
435        String uris;
436        if(type == EventType.input) {
437            uris = (String) eval.getVariable(".datain." + dataInName);
438        }
439        else { //type=output
440            uris = (String) eval.getVariable(".dataout." + dataInName);
441        }
442        if (uris != null) {
443            String[] uri = HCatURIParser.splitHCatUris(uris, HCAT_URI_PATTERN);
444            uriTemplate.append(uri[0]);
445        }
446        else {
447            LOG.warn("URI is NULL");
448            return null;
449        }
450        LOG.info("uriTemplate [{0}] ", uriTemplate);
451        HCatURI hcatURI;
452        try {
453            hcatURI = new HCatURI(uriTemplate.toString());
454        }
455        catch (URISyntaxException e) {
456            LOG.info("uriTemplate [{0}]. Reason [{1}]: ", uriTemplate, e);
457            throw new RuntimeException("HCat URI can't be parsed " + e);
458        }
459        return hcatURI;
460    }
461
462    private static boolean isValidDataEvent(String dataInName) {
463        ELEvaluator eval = ELEvaluator.getCurrent();
464        String val = (String) eval.getVariable("oozie.dataname." + dataInName);
465        if (val == null || (val.equals("data-in") == false && val.equals("data-out") == false)) {
466            XLog.getLog(HCatELFunctions.class).error("dataset name " + dataInName + " is not valid. val :" + val);
467            throw new RuntimeException("data set name " + dataInName + " is not valid");
468        }
469        return true;
470    }
471
472    private static String echoUnResolved(String functionName, String n) {
473        return echoUnResolvedPre(functionName, n, "coord:");
474    }
475
476    private static String echoUnResolvedPre(String functionName, String n, String prefix) {
477        ELEvaluator eval = ELEvaluator.getCurrent();
478        eval.setVariable(".wrap", "true");
479        return prefix + functionName + "(" + n + ")"; // Unresolved
480    }
481
482}