001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 020package org.apache.oozie.util; 021 022import com.codahale.metrics.Counter; 023import com.codahale.metrics.ExponentiallyDecayingReservoir; 024import com.codahale.metrics.Gauge; 025import com.codahale.metrics.Histogram; 026import com.codahale.metrics.JmxReporter; 027import com.codahale.metrics.MetricFilter; 028import com.codahale.metrics.MetricRegistry; 029import com.codahale.metrics.ganglia.GangliaReporter; 030import com.codahale.metrics.graphite.Graphite; 031import com.codahale.metrics.graphite.GraphiteReporter; 032import com.codahale.metrics.json.MetricsModule; 033import com.codahale.metrics.jvm.MemoryUsageGaugeSet; 034import com.fasterxml.jackson.core.JsonProcessingException; 035import com.fasterxml.jackson.databind.ObjectMapper; 036import com.google.common.annotations.VisibleForTesting; 037import com.google.common.cache.CacheBuilder; 038import com.google.common.cache.CacheLoader; 039import com.google.common.cache.LoadingCache; 040import info.ganglia.gmetric4j.gmetric.GMetric; 041import org.apache.oozie.service.ConfigurationService; 042 043import java.io.IOException; 044import java.io.OutputStream; 045import java.net.InetSocketAddress; 046import java.net.MalformedURLException; 047import java.net.URL; 048import java.util.HashMap; 049import java.util.Map; 050import java.util.concurrent.ConcurrentHashMap; 051import java.util.concurrent.ConcurrentMap; 052import java.util.concurrent.ExecutionException; 053import java.util.concurrent.ScheduledExecutorService; 054import java.util.concurrent.TimeUnit; 055import java.util.concurrent.locks.Lock; 056import java.util.concurrent.locks.ReentrantLock; 057 058/** 059 * Instrumentation framework that is mostly compatible with {@link Instrumentation} but is backed by Codahale Metrics. This class 060 * was designed to minimize the changes required to switch from {@link Instrumentation} to {@link MetricsInstrumentation} by keeping 061 * the same API. However, certain operations are obviously implemented differently or are no longer needed; and the output format 062 * is a little different. Internally, this class maps Cron to {@link com.codahale.metrics.Timer}, Variable to {@link Gauge}, 063 * counter to {@link Counter}, and Sampler to {@link Histogram}. 064 */ 065@SuppressWarnings("unchecked") 066public class MetricsInstrumentation extends Instrumentation { 067 068 private final MetricRegistry metricRegistry; 069 private transient ObjectMapper jsonMapper; 070 private ScheduledExecutorService scheduler; 071 private final LoadingCache<String, com.codahale.metrics.Counter> counters; 072 private final Map<String, Gauge> gauges; 073 private final LoadingCache<String, com.codahale.metrics.Timer> timers; 074 private final Map<String, Histogram> histograms; 075 private Lock timersLock; 076 private Lock gaugesLock; 077 private Lock countersLock; 078 private Lock histogramsLock; 079 080 public static final String EXTERNAL_MONITORING_ENABLE = "oozie.external_monitoring.enable"; 081 public static final String EXTERNAL_MONITORING_TYPE = "oozie.external_monitoring.type"; 082 public static final String EXTERNAL_MONITORING_ADDRESS = "oozie.external_monitoring.address"; 083 public static final String EXTERNAL_MONITORING_PREFIX = "oozie.external_monitoring.metricPrefix"; 084 public static final String EXTERNAL_MONITORING_INTERVAL = "oozie.external_monitoring.reporterIntervalSecs"; 085 public static final String JMX_MONITORING_ENABLE = "oozie.jmx_monitoring.enable"; 086 public static final String GRAPHITE="graphite"; 087 public static final String GANGLIA="ganglia"; 088 private String metricsAddress; 089 private String metricsHost; 090 private String metricsPrefix; 091 private String metricsServerName; 092 private int metricsPort; 093 private GraphiteReporter graphiteReporter = null; 094 private GangliaReporter gangliaReporter = null; 095 private JmxReporter jmxReporter = null; 096 private long metricsReportIntervalSec; 097 private boolean isExternalMonitoringEnabled; 098 private boolean isJMXMonitoringEnabled; 099 100 private static final TimeUnit RATE_UNIT = TimeUnit.MILLISECONDS; 101 private static final TimeUnit DURATION_UNIT = TimeUnit.MILLISECONDS; 102 103 protected XLog LOG = XLog.getLog(getClass()); 104 105 /** 106 * Creates the MetricsInstrumentation and starts taking some metrics. 107 */ 108 public MetricsInstrumentation() { 109 metricRegistry = new MetricRegistry(); 110 111 isExternalMonitoringEnabled = ConfigurationService.getBoolean(EXTERNAL_MONITORING_ENABLE); 112 if(isExternalMonitoringEnabled) { 113 metricsServerName = ConfigurationService.get(EXTERNAL_MONITORING_TYPE); 114 if (metricsServerName != null) { 115 String modifiedServerName = metricsServerName.trim().toLowerCase(); 116 if (modifiedServerName.equals(GRAPHITE) || modifiedServerName.equals(GANGLIA)) { 117 metricsAddress = ConfigurationService.get(EXTERNAL_MONITORING_ADDRESS); 118 metricsPrefix = ConfigurationService.get(EXTERNAL_MONITORING_PREFIX); 119 metricsReportIntervalSec = ConfigurationService.getLong(EXTERNAL_MONITORING_INTERVAL); 120 LOG.debug("Publishing external monitoring to [{0}] at host [{1}] every [{2}] seconds with prefix " + 121 "[{3}]", metricsServerName, metricsAddress, metricsReportIntervalSec, metricsPrefix); 122 123 try { 124 URL url = new URL(metricsAddress); 125 metricsHost = url.getHost(); 126 metricsPort = url.getPort(); 127 } catch (MalformedURLException e) { 128 LOG.error("Exception, ", e); 129 } 130 131 if (modifiedServerName.equals(GRAPHITE)) { 132 Graphite graphite = new Graphite(new InetSocketAddress(metricsHost, metricsPort)); 133 graphiteReporter = GraphiteReporter.forRegistry(metricRegistry).prefixedWith(metricsPrefix) 134 .convertDurationsTo(TimeUnit.SECONDS).filter(MetricFilter.ALL).build(graphite); 135 graphiteReporter.start(metricsReportIntervalSec, TimeUnit.SECONDS); 136 } 137 138 if (modifiedServerName.equals(GANGLIA)) { 139 GMetric ganglia; 140 try { 141 ganglia = new GMetric(metricsHost, metricsPort, GMetric.UDPAddressingMode.MULTICAST, 1); 142 } catch (IOException e) { 143 LOG.error("Exception, ", e); 144 throw new RuntimeException(e); 145 } 146 gangliaReporter = GangliaReporter.forRegistry(metricRegistry).prefixedWith(metricsPrefix) 147 .convertRatesTo(TimeUnit.SECONDS) 148 .convertDurationsTo(TimeUnit.MILLISECONDS) 149 .build(ganglia); 150 gangliaReporter.start(metricsReportIntervalSec, TimeUnit.SECONDS); 151 } 152 } else { 153 throw new RuntimeException("Metrics Server Name should be either graphite or ganglia"); 154 } 155 } 156 else { 157 throw new RuntimeException("Metrics Server Name is not specified"); 158 } 159 } 160 161 timersLock = new ReentrantLock(); 162 gaugesLock = new ReentrantLock(); 163 countersLock = new ReentrantLock(); 164 histogramsLock = new ReentrantLock(); 165 166 // Used for writing the json for the metrics (see com.codahale.metrics.servlets.MetricsServlet) 167 // The "false" is to prevent it from printing out all of the values used in the histograms and timers 168 this.jsonMapper = new ObjectMapper().registerModule(new MetricsModule(RATE_UNIT, DURATION_UNIT, false)); 169 170 // Register the JVM memory gauges and prefix the keys 171 MemoryUsageGaugeSet memorySet = new MemoryUsageGaugeSet(); 172 for (String key : memorySet.getMetrics().keySet()) { 173 metricRegistry.register(MetricRegistry.name("jvm", "memory", key), memorySet.getMetrics().get(key)); 174 } 175 176 // By setting this up as a cache, if a counter doesn't exist when we try to retrieve it, it will automatically be created 177 counters = CacheBuilder.newBuilder().build( 178 new CacheLoader<String, com.codahale.metrics.Counter>() { 179 @Override 180 public com.codahale.metrics.Counter load(String key) throws Exception { 181 com.codahale.metrics.Counter counter = new com.codahale.metrics.Counter(); 182 metricRegistry.register(key, counter); 183 return counter; 184 } 185 } 186 ); 187 timers = CacheBuilder.newBuilder().build( 188 new CacheLoader<String, com.codahale.metrics.Timer>() { 189 @Override 190 public com.codahale.metrics.Timer load(String key) throws Exception { 191 com.codahale.metrics.Timer timer 192 = new com.codahale.metrics.Timer(new ExponentiallyDecayingReservoir()); 193 metricRegistry.register(key, timer); 194 return timer; 195 } 196 } 197 ); 198 gauges = new ConcurrentHashMap<String, Gauge>(); 199 histograms = new ConcurrentHashMap<String, Histogram>(); 200 isJMXMonitoringEnabled = ConfigurationService.getBoolean(JMX_MONITORING_ENABLE); 201 if (isJMXMonitoringEnabled) { 202 jmxReporter = JmxReporter.forRegistry(metricRegistry).build(); 203 jmxReporter.start(); 204 } 205 } 206 207 /** 208 * Reporting final metrics into the server before stopping 209 */ 210 @Override 211 public void stop() { 212 if (graphiteReporter != null) { 213 try { 214 // reporting final metrics into graphite before stopping 215 graphiteReporter.report(); 216 } finally { 217 graphiteReporter.stop(); 218 } 219 } 220 if (gangliaReporter != null) { 221 try { 222 // reporting final metrics into ganglia before stopping 223 gangliaReporter.report(); 224 } finally { 225 gangliaReporter.stop(); 226 } 227 } 228 229 if (jmxReporter != null) { 230 jmxReporter.stop(); 231 } 232 } 233 234 /** 235 * Add a cron to an instrumentation timer. The timer is created if it does not exists. <p> 236 * Internally, this is backed by a {@link com.codahale.metrics.Timer}. 237 * 238 * @param group timer group. 239 * @param name timer name. 240 * @param cron cron to add to the timer. 241 */ 242 @Override 243 public void addCron(String group, String name, Cron cron) { 244 String key = MetricRegistry.name(group, name, "timer"); 245 try { 246 timersLock.lock(); 247 com.codahale.metrics.Timer timer = timers.get(key); 248 timer.update(cron.getOwn(), TimeUnit.MILLISECONDS); 249 } catch(ExecutionException ee) { 250 throw new RuntimeException(ee); 251 } finally { 252 timersLock.unlock(); 253 } 254 } 255 256 /** 257 * Add an instrumentation variable. <p> 258 * Internally, this is backed by a {@link Gauge}. 259 * 260 * @param group counter group. 261 * @param name counter name. 262 * @param variable variable to add. 263 */ 264 @Override 265 public void addVariable(String group, String name, final Variable variable) { 266 Gauge gauge = new Gauge() { 267 @Override 268 public Object getValue() { 269 return variable.getValue(); 270 } 271 }; 272 String key = MetricRegistry.name(group, name); 273 274 try { 275 gaugesLock.lock(); 276 gauges.put(key, gauge); 277 // Metrics throws an Exception if we don't do this when the key already exists 278 if (metricRegistry.getGauges().containsKey(key)) { 279 XLog.getLog(MetricsInstrumentation.class).debug("A Variable with name [" + key + "] already exists. " 280 + " The old Variable will be overwritten, but this is not recommended"); 281 metricRegistry.remove(key); 282 } 283 metricRegistry.register(key, gauge); 284 } finally { 285 gaugesLock.unlock(); 286 } 287 } 288 289 /** 290 * Increment an instrumentation counter. The counter is created if it does not exists. <p> 291 * Internally, this is backed by a {@link Counter}. 292 * 293 * @param group counter group. 294 * @param name counter name. 295 * @param count increment to add to the counter. 296 */ 297 @Override 298 public void incr(String group, String name, long count) { 299 String key = MetricRegistry.name(group, name); 300 try { 301 countersLock.lock(); 302 counters.get(key).inc(count); 303 } catch(ExecutionException ee) { 304 throw new RuntimeException(ee); 305 } finally { 306 countersLock.unlock(); 307 } 308 } 309 310 /** 311 * Add a sampling variable. <p> 312 * Internally, this is backed by a biased (decaying) {@link Histogram}. 313 * 314 * @param group timer group. 315 * @param name timer name. 316 * @param period (ignored) 317 * @param interval sampling frequency, how often the variable is probed. 318 * @param variable variable to sample. 319 */ 320 @Override 321 public void addSampler(String group, String name, int period, int interval, Variable<Long> variable) { 322 if (scheduler == null) { 323 throw new IllegalStateException("scheduler not set, cannot sample"); 324 } 325 Histogram histogram = new Histogram(new ExponentiallyDecayingReservoir()); 326 Sampler sampler = new Sampler(variable, histogram); 327 scheduler.scheduleAtFixedRate(sampler, 0, interval, TimeUnit.SECONDS); 328 String key = MetricRegistry.name(group, name, "histogram"); 329 try { 330 histogramsLock.lock(); 331 histograms.put(key, histogram); 332 // Metrics throws an Exception if we don't do this when the key already exists 333 if (metricRegistry.getHistograms().containsKey(key)) { 334 XLog.getLog(MetricsInstrumentation.class).debug("A Sampler with name [" + key + "] already exists. " 335 + " The old Sampler will be overwritten, but this is not recommended"); 336 metricRegistry.remove(key); 337 } 338 metricRegistry.register(key, histogram); 339 } finally { 340 histogramsLock.unlock(); 341 } 342 } 343 344 public static class Sampler implements Runnable { 345 private final Variable<Long> variable; 346 private final Histogram histogram; 347 public Sampler(Variable<Long> variable, Histogram histogram) { 348 this.variable = variable; 349 this.histogram = histogram; 350 } 351 352 @Override 353 public void run() { 354 histogram.update(variable.getValue()); 355 } 356 } 357 358 /** 359 * Set the scheduler instance to handle the samplers. 360 * 361 * @param scheduler scheduler instance. 362 */ 363 @Override 364 public void setScheduler(ScheduledExecutorService scheduler) { 365 this.scheduler = scheduler; 366 } 367 368 /** 369 * Return the string representation of the instrumentation. It does a JSON pretty-print. 370 * 371 * @return the string representation of the instrumentation. 372 */ 373 @Override 374 public String toString() { 375 try { 376 return jsonMapper.writerWithDefaultPrettyPrinter().writeValueAsString(metricRegistry); 377 } catch (JsonProcessingException jpe) { 378 throw new RuntimeException(jpe); 379 } 380 } 381 382 /** 383 * Converts the current state of the metrics and writes them to the OutputStream. 384 * 385 * @param os The OutputStream to write the metrics to 386 * @throws IOException in case of error during writing to the stream 387 */ 388 public void writeJSONResponse(OutputStream os) throws IOException { 389 jsonMapper.writer().writeValue(os, metricRegistry); 390 } 391 392 /** 393 * Returns the MetricRegistry: public for unit tests -- do not use. 394 * 395 * @return the MetricRegistry 396 */ 397 @VisibleForTesting 398 MetricRegistry getMetricRegistry() { 399 return metricRegistry; 400 } 401 402 /** 403 * Not Supported: throws {@link UnsupportedOperationException} 404 * 405 * @return nothing 406 */ 407 @Override 408 public Map<String, Map<String, Map<String, Object>>> getAll() { 409 throw new UnsupportedOperationException(); 410 } 411 412 /** 413 * For backwards compatibility reasons with {@link Instrumentation}, create a deep copy of {@link #counters}: 414 * <ul> 415 * <li>counter groups and names are separated by {@code "."}. Here we use Codahale Metrics internals to concatenate group 416 * and name pairs with {@code "."}</li> 417 * <li>no synchronization is done on {@link #counters} between calls to {@link LoadingCache#asMap()}, 418 * {@link ConcurrentMap#keySet()}, and {@link ConcurrentMap#get(Object)}. Hence it's possible to get values that are 419 * not present anymore. It's also possible to get values that have been updated in the meanwhile, and not to get values 420 * that have been inserted in the meanwhile</li> 421 * </ul> 422 * 423 * @return a deep copy of counter groups, names, and values 424 */ 425 @Override 426 public Map<String, Map<String, Element<Long>>> getCounters() { 427 final ConcurrentMap<String, com.codahale.metrics.Counter> countersAsMap = counters.asMap(); 428 final Map<String, Map<String, Element<Long>>> countersAsDeepMap = new HashMap<>(); 429 430 for (final Map.Entry<String, com.codahale.metrics.Counter> counterEntry : countersAsMap.entrySet()) { 431 final String groupAndName = counterEntry.getKey(); 432 final com.codahale.metrics.Counter value = counterEntry.getValue(); 433 final String group = groupAndName.substring(0, groupAndName.indexOf(".")); 434 final String name = groupAndName.substring(groupAndName.indexOf(".") + 1); 435 436 if (!countersAsDeepMap.containsKey(group)) { 437 countersAsDeepMap.put(group, new HashMap<>()); 438 } 439 440 final Instrumentation.Counter counter = new Counter(); 441 counter.set(value.getCount()); 442 countersAsDeepMap.get(group).put(name, counter); 443 } 444 445 return countersAsDeepMap; 446 } 447 448 /** 449 * Not Supported: throws {@link UnsupportedOperationException} 450 * 451 * @return nothing 452 */ 453 @Override 454 public Map<String, Map<String, Element<Double>>> getSamplers() { 455 throw new UnsupportedOperationException(); 456 } 457 458 /** 459 * Not Supported: throws {@link UnsupportedOperationException} 460 * 461 * @return nothing 462 */ 463 @Override 464 public Map<String, Map<String, Element<Timer>>> getTimers() { 465 throw new UnsupportedOperationException(); 466 } 467 468 /** 469 * Not Supported: throws {@link UnsupportedOperationException} 470 * 471 * @return nothing 472 */ 473 @Override 474 public Map<String, Map<String, Element<Variable>>> getVariables() { 475 throw new UnsupportedOperationException(); 476 } 477}