001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *      http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019
020package org.apache.oozie.util;
021
022import com.codahale.metrics.Counter;
023import com.codahale.metrics.ExponentiallyDecayingReservoir;
024import com.codahale.metrics.Gauge;
025import com.codahale.metrics.Histogram;
026import com.codahale.metrics.JmxReporter;
027import com.codahale.metrics.MetricFilter;
028import com.codahale.metrics.MetricRegistry;
029import com.codahale.metrics.ganglia.GangliaReporter;
030import com.codahale.metrics.graphite.Graphite;
031import com.codahale.metrics.graphite.GraphiteReporter;
032import com.codahale.metrics.json.MetricsModule;
033import com.codahale.metrics.jvm.MemoryUsageGaugeSet;
034import com.fasterxml.jackson.core.JsonProcessingException;
035import com.fasterxml.jackson.databind.ObjectMapper;
036import com.google.common.annotations.VisibleForTesting;
037import com.google.common.cache.CacheBuilder;
038import com.google.common.cache.CacheLoader;
039import com.google.common.cache.LoadingCache;
040import info.ganglia.gmetric4j.gmetric.GMetric;
041import org.apache.oozie.service.ConfigurationService;
042
043import java.io.IOException;
044import java.io.OutputStream;
045import java.net.InetSocketAddress;
046import java.net.MalformedURLException;
047import java.net.URL;
048import java.util.HashMap;
049import java.util.Map;
050import java.util.concurrent.ConcurrentHashMap;
051import java.util.concurrent.ConcurrentMap;
052import java.util.concurrent.ExecutionException;
053import java.util.concurrent.ScheduledExecutorService;
054import java.util.concurrent.TimeUnit;
055import java.util.concurrent.locks.Lock;
056import java.util.concurrent.locks.ReentrantLock;
057
058/**
059 * Instrumentation framework that is mostly compatible with {@link Instrumentation} but is backed by Codahale Metrics.  This class
060 * was designed to minimize the changes required to switch from {@link Instrumentation} to {@link MetricsInstrumentation} by keeping
061 * the same API.  However, certain operations are obviously implemented differently or are no longer needed; and the output format
062 * is a little different.  Internally, this class maps Cron to {@link com.codahale.metrics.Timer}, Variable to {@link Gauge},
063 * counter to {@link Counter}, and Sampler to {@link Histogram}.
064 */
065@SuppressWarnings("unchecked")
066public class MetricsInstrumentation extends Instrumentation {
067
068    private final MetricRegistry metricRegistry;
069    private transient ObjectMapper jsonMapper;
070    private ScheduledExecutorService scheduler;
071    private final LoadingCache<String, com.codahale.metrics.Counter> counters;
072    private final Map<String, Gauge> gauges;
073    private final LoadingCache<String, com.codahale.metrics.Timer> timers;
074    private final Map<String, Histogram> histograms;
075    private Lock timersLock;
076    private Lock gaugesLock;
077    private Lock countersLock;
078    private Lock histogramsLock;
079
080    public static final String EXTERNAL_MONITORING_ENABLE = "oozie.external_monitoring.enable";
081    public static final String EXTERNAL_MONITORING_TYPE = "oozie.external_monitoring.type";
082    public static final String EXTERNAL_MONITORING_ADDRESS = "oozie.external_monitoring.address";
083    public static final String EXTERNAL_MONITORING_PREFIX = "oozie.external_monitoring.metricPrefix";
084    public static final String EXTERNAL_MONITORING_INTERVAL = "oozie.external_monitoring.reporterIntervalSecs";
085    public static final String JMX_MONITORING_ENABLE = "oozie.jmx_monitoring.enable";
086    public static final String GRAPHITE="graphite";
087    public static final String GANGLIA="ganglia";
088    private String metricsAddress;
089    private String metricsHost;
090    private String metricsPrefix;
091    private String metricsServerName;
092    private int metricsPort;
093    private GraphiteReporter graphiteReporter = null;
094    private GangliaReporter gangliaReporter = null;
095    private JmxReporter jmxReporter = null;
096    private long metricsReportIntervalSec;
097    private boolean isExternalMonitoringEnabled;
098    private boolean isJMXMonitoringEnabled;
099
100    private static final TimeUnit RATE_UNIT = TimeUnit.MILLISECONDS;
101    private static final TimeUnit DURATION_UNIT = TimeUnit.MILLISECONDS;
102
103    protected XLog LOG = XLog.getLog(getClass());
104
105    /**
106     * Creates the MetricsInstrumentation and starts taking some metrics.
107     */
108    public MetricsInstrumentation() {
109        metricRegistry = new MetricRegistry();
110
111        isExternalMonitoringEnabled = ConfigurationService.getBoolean(EXTERNAL_MONITORING_ENABLE);
112        if(isExternalMonitoringEnabled) {
113            metricsServerName = ConfigurationService.get(EXTERNAL_MONITORING_TYPE);
114            if (metricsServerName != null) {
115                String modifiedServerName = metricsServerName.trim().toLowerCase();
116                if (modifiedServerName.equals(GRAPHITE) || modifiedServerName.equals(GANGLIA)) {
117                    metricsAddress = ConfigurationService.get(EXTERNAL_MONITORING_ADDRESS);
118                    metricsPrefix = ConfigurationService.get(EXTERNAL_MONITORING_PREFIX);
119                    metricsReportIntervalSec = ConfigurationService.getLong(EXTERNAL_MONITORING_INTERVAL);
120                    LOG.debug("Publishing external monitoring to [{0}]  at host [{1}] every [{2}] seconds with prefix " +
121                            "[{3}]", metricsServerName, metricsAddress, metricsReportIntervalSec, metricsPrefix);
122
123                    try {
124                        URL url = new URL(metricsAddress);
125                        metricsHost = url.getHost();
126                        metricsPort = url.getPort();
127                    } catch (MalformedURLException e) {
128                        LOG.error("Exception, ", e);
129                    }
130
131                    if (modifiedServerName.equals(GRAPHITE)) {
132                        Graphite graphite = new Graphite(new InetSocketAddress(metricsHost, metricsPort));
133                        graphiteReporter = GraphiteReporter.forRegistry(metricRegistry).prefixedWith(metricsPrefix)
134                                .convertDurationsTo(TimeUnit.SECONDS).filter(MetricFilter.ALL).build(graphite);
135                        graphiteReporter.start(metricsReportIntervalSec, TimeUnit.SECONDS);
136                    }
137
138                    if (modifiedServerName.equals(GANGLIA)) {
139                        GMetric ganglia;
140                        try {
141                            ganglia = new GMetric(metricsHost, metricsPort, GMetric.UDPAddressingMode.MULTICAST, 1);
142                        } catch (IOException e) {
143                            LOG.error("Exception, ", e);
144                            throw new RuntimeException(e);
145                        }
146                        gangliaReporter = GangliaReporter.forRegistry(metricRegistry).prefixedWith(metricsPrefix)
147                                .convertRatesTo(TimeUnit.SECONDS)
148                                .convertDurationsTo(TimeUnit.MILLISECONDS)
149                                .build(ganglia);
150                        gangliaReporter.start(metricsReportIntervalSec, TimeUnit.SECONDS);
151                    }
152                } else {
153                    throw new RuntimeException("Metrics Server Name should be either graphite or ganglia");
154                }
155            }
156            else {
157                throw new RuntimeException("Metrics Server Name is not specified");
158            }
159        }
160
161        timersLock = new ReentrantLock();
162        gaugesLock = new ReentrantLock();
163        countersLock = new ReentrantLock();
164        histogramsLock = new ReentrantLock();
165
166        // Used for writing the json for the metrics (see com.codahale.metrics.servlets.MetricsServlet)
167        // The "false" is to prevent it from printing out all of the values used in the histograms and timers
168        this.jsonMapper = new ObjectMapper().registerModule(new MetricsModule(RATE_UNIT, DURATION_UNIT, false));
169
170        // Register the JVM memory gauges and prefix the keys
171        MemoryUsageGaugeSet memorySet = new MemoryUsageGaugeSet();
172        for (String key : memorySet.getMetrics().keySet()) {
173            metricRegistry.register(MetricRegistry.name("jvm", "memory", key), memorySet.getMetrics().get(key));
174        }
175
176        // By setting this up as a cache, if a counter doesn't exist when we try to retrieve it, it will automatically be created
177        counters = CacheBuilder.newBuilder().build(
178                new CacheLoader<String, com.codahale.metrics.Counter>() {
179                    @Override
180                    public com.codahale.metrics.Counter load(String key) throws Exception {
181                        com.codahale.metrics.Counter counter = new com.codahale.metrics.Counter();
182                        metricRegistry.register(key, counter);
183                        return counter;
184                    }
185                }
186        );
187        timers = CacheBuilder.newBuilder().build(
188                new CacheLoader<String, com.codahale.metrics.Timer>() {
189                    @Override
190                    public com.codahale.metrics.Timer load(String key) throws Exception {
191                        com.codahale.metrics.Timer timer
192                                = new com.codahale.metrics.Timer(new ExponentiallyDecayingReservoir());
193                        metricRegistry.register(key, timer);
194                        return timer;
195                    }
196                }
197        );
198        gauges = new ConcurrentHashMap<String, Gauge>();
199        histograms = new ConcurrentHashMap<String, Histogram>();
200        isJMXMonitoringEnabled = ConfigurationService.getBoolean(JMX_MONITORING_ENABLE);
201        if (isJMXMonitoringEnabled) {
202            jmxReporter  = JmxReporter.forRegistry(metricRegistry).build();
203            jmxReporter.start();
204        }
205    }
206
207    /**
208     * Reporting final metrics into the server before stopping
209     */
210    @Override
211    public void stop() {
212        if (graphiteReporter != null) {
213            try {
214                // reporting final metrics into graphite before stopping
215                graphiteReporter.report();
216            } finally {
217                graphiteReporter.stop();
218            }
219        }
220        if (gangliaReporter != null) {
221            try {
222                // reporting final metrics into ganglia before stopping
223                gangliaReporter.report();
224            } finally {
225                gangliaReporter.stop();
226            }
227        }
228
229        if (jmxReporter != null) {
230            jmxReporter.stop();
231        }
232    }
233
234    /**
235     * Add a cron to an instrumentation timer. The timer is created if it does not exists. <p>
236     * Internally, this is backed by a {@link com.codahale.metrics.Timer}.
237     *
238     * @param group timer group.
239     * @param name timer name.
240     * @param cron cron to add to the timer.
241     */
242    @Override
243    public void addCron(String group, String name, Cron cron) {
244        String key = MetricRegistry.name(group, name, "timer");
245        try {
246            timersLock.lock();
247            com.codahale.metrics.Timer timer = timers.get(key);
248            timer.update(cron.getOwn(), TimeUnit.MILLISECONDS);
249        } catch(ExecutionException ee) {
250            throw new RuntimeException(ee);
251        } finally {
252            timersLock.unlock();
253        }
254    }
255
256    /**
257     * Add an instrumentation variable. <p>
258     * Internally, this is backed by a {@link Gauge}.
259     *
260     * @param group counter group.
261     * @param name counter name.
262     * @param variable variable to add.
263     */
264    @Override
265    public void addVariable(String group, String name, final Variable variable) {
266        Gauge gauge = new Gauge() {
267            @Override
268            public Object getValue() {
269                return variable.getValue();
270            }
271        };
272        String key = MetricRegistry.name(group, name);
273
274        try {
275            gaugesLock.lock();
276            gauges.put(key, gauge);
277            // Metrics throws an Exception if we don't do this when the key already exists
278            if (metricRegistry.getGauges().containsKey(key)) {
279                XLog.getLog(MetricsInstrumentation.class).debug("A Variable with name [" + key + "] already exists. "
280                        + " The old Variable will be overwritten, but this is not recommended");
281                metricRegistry.remove(key);
282            }
283            metricRegistry.register(key, gauge);
284        } finally {
285            gaugesLock.unlock();
286        }
287    }
288
289   /**
290     * Increment an instrumentation counter. The counter is created if it does not exists. <p>
291     * Internally, this is backed by a {@link Counter}.
292     *
293     * @param group counter group.
294     * @param name counter name.
295     * @param count increment to add to the counter.
296     */
297    @Override
298    public void incr(String group, String name, long count) {
299        String key = MetricRegistry.name(group, name);
300        try {
301            countersLock.lock();
302            counters.get(key).inc(count);
303        } catch(ExecutionException ee) {
304            throw new RuntimeException(ee);
305        } finally {
306            countersLock.unlock();
307        }
308    }
309
310    /**
311     * Add a sampling variable. <p>
312     * Internally, this is backed by a biased (decaying) {@link Histogram}.
313     *
314     * @param group timer group.
315     * @param name timer name.
316     * @param period (ignored)
317     * @param interval sampling frequency, how often the variable is probed.
318     * @param variable variable to sample.
319     */
320    @Override
321    public void addSampler(String group, String name, int period, int interval, Variable<Long> variable) {
322        if (scheduler == null) {
323            throw new IllegalStateException("scheduler not set, cannot sample");
324        }
325        Histogram histogram = new Histogram(new ExponentiallyDecayingReservoir());
326        Sampler sampler = new Sampler(variable, histogram);
327        scheduler.scheduleAtFixedRate(sampler, 0, interval, TimeUnit.SECONDS);
328        String key = MetricRegistry.name(group, name, "histogram");
329        try {
330            histogramsLock.lock();
331            histograms.put(key, histogram);
332            // Metrics throws an Exception if we don't do this when the key already exists
333            if (metricRegistry.getHistograms().containsKey(key)) {
334                XLog.getLog(MetricsInstrumentation.class).debug("A Sampler with name [" + key + "] already exists. "
335                        + " The old Sampler will be overwritten, but this is not recommended");
336                metricRegistry.remove(key);
337            }
338            metricRegistry.register(key, histogram);
339        } finally {
340            histogramsLock.unlock();
341        }
342    }
343
344    public static class Sampler implements Runnable {
345        private final Variable<Long> variable;
346        private final Histogram histogram;
347        public Sampler(Variable<Long> variable, Histogram histogram) {
348            this.variable = variable;
349            this.histogram = histogram;
350        }
351
352        @Override
353        public void run() {
354            histogram.update(variable.getValue());
355        }
356    }
357
358    /**
359     * Set the scheduler instance to handle the samplers.
360     *
361     * @param scheduler scheduler instance.
362     */
363    @Override
364    public void setScheduler(ScheduledExecutorService scheduler) {
365        this.scheduler = scheduler;
366    }
367
368    /**
369     * Return the string representation of the instrumentation.  It does a JSON pretty-print.
370     *
371     * @return the string representation of the instrumentation.
372     */
373    @Override
374    public String toString() {
375        try {
376            return jsonMapper.writerWithDefaultPrettyPrinter().writeValueAsString(metricRegistry);
377        } catch (JsonProcessingException jpe) {
378            throw new RuntimeException(jpe);
379        }
380    }
381
382    /**
383     * Converts the current state of the metrics and writes them to the OutputStream.
384     *
385     * @param os The OutputStream to write the metrics to
386     * @throws IOException in case of error during writing to the stream
387     */
388    public void writeJSONResponse(OutputStream os) throws IOException {
389        jsonMapper.writer().writeValue(os, metricRegistry);
390    }
391
392    /**
393     * Returns the MetricRegistry: public for unit tests -- do not use.
394     *
395     * @return the MetricRegistry
396     */
397    @VisibleForTesting
398    MetricRegistry getMetricRegistry() {
399        return metricRegistry;
400    }
401
402    /**
403     * Not Supported: throws {@link UnsupportedOperationException}
404     *
405     * @return nothing
406     */
407    @Override
408    public Map<String, Map<String, Map<String, Object>>> getAll() {
409        throw new UnsupportedOperationException();
410    }
411
412    /**
413     * For backwards compatibility reasons with {@link Instrumentation}, create a deep copy of {@link #counters}:
414     * <ul>
415     *     <li>counter groups and names are separated by {@code "."}. Here we use Codahale Metrics internals to concatenate group
416     *     and name pairs with {@code "."}</li>
417     *     <li>no synchronization is done on {@link #counters} between calls to {@link LoadingCache#asMap()},
418     *     {@link ConcurrentMap#keySet()}, and {@link ConcurrentMap#get(Object)}. Hence it's possible to get values that are
419     *     not present anymore. It's also possible to get values that have been updated in the meanwhile, and not to get values
420     *     that have been inserted in the meanwhile</li>
421     * </ul>
422     *
423     * @return a deep copy of counter groups, names, and values
424     */
425    @Override
426    public Map<String, Map<String, Element<Long>>> getCounters() {
427        final ConcurrentMap<String, com.codahale.metrics.Counter> countersAsMap = counters.asMap();
428        final Map<String, Map<String, Element<Long>>> countersAsDeepMap = new HashMap<>();
429
430        for (final Map.Entry<String, com.codahale.metrics.Counter> counterEntry : countersAsMap.entrySet()) {
431            final String groupAndName = counterEntry.getKey();
432            final com.codahale.metrics.Counter value = counterEntry.getValue();
433            final String group = groupAndName.substring(0, groupAndName.indexOf("."));
434            final String name = groupAndName.substring(groupAndName.indexOf(".") + 1);
435
436            if (!countersAsDeepMap.containsKey(group)) {
437                countersAsDeepMap.put(group, new HashMap<>());
438            }
439
440            final Instrumentation.Counter counter = new Counter();
441            counter.set(value.getCount());
442            countersAsDeepMap.get(group).put(name, counter);
443        }
444
445        return countersAsDeepMap;
446    }
447
448    /**
449     * Not Supported: throws {@link UnsupportedOperationException}
450     *
451     * @return nothing
452     */
453    @Override
454    public Map<String, Map<String, Element<Double>>> getSamplers() {
455        throw new UnsupportedOperationException();
456    }
457
458    /**
459     * Not Supported: throws {@link UnsupportedOperationException}
460     *
461     * @return nothing
462     */
463    @Override
464    public Map<String, Map<String, Element<Timer>>> getTimers() {
465        throw new UnsupportedOperationException();
466    }
467
468    /**
469     * Not Supported: throws {@link UnsupportedOperationException}
470     *
471     * @return nothing
472     */
473    @Override
474    public Map<String, Map<String, Element<Variable>>> getVariables() {
475        throw new UnsupportedOperationException();
476    }
477}