001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.oozie.service; 019 020import java.util.concurrent.ConcurrentMap; 021import java.util.concurrent.TimeUnit; 022 023import org.apache.curator.framework.recipes.locks.InterProcessMutex; 024import org.apache.curator.framework.recipes.locks.InterProcessReadWriteLock; 025import org.apache.oozie.ErrorCode; 026import org.apache.oozie.util.Instrumentable; 027import org.apache.oozie.util.Instrumentation; 028import org.apache.oozie.lock.LockToken; 029import org.apache.oozie.util.XLog; 030import org.apache.oozie.util.ZKUtils; 031 032import java.io.IOException; 033import java.util.concurrent.ScheduledExecutorService; 034 035import org.apache.curator.framework.recipes.locks.ChildReaper; 036import org.apache.curator.framework.recipes.locks.Reaper; 037import org.apache.curator.utils.ThreadUtils; 038 039import com.google.common.annotations.VisibleForTesting; 040import com.google.common.collect.MapMaker; 041import org.apache.zookeeper.KeeperException; 042 043/** 044 * Service that provides distributed locks via ZooKeeper. Requires that a ZooKeeper ensemble is available. The locks will be 045 * located under a ZNode named "locks" under the namespace (see {@link ZKUtils}). For example, with default settings, if the 046 * resource we're locking is called "foo", then the ZNode backing the lock will be at /oozie/locks/foo. 047 */ 048public class ZKLocksService extends MemoryLocksService implements Service, Instrumentable { 049 050 private ZKUtils zk; 051 public static final String LOCKS_NODE = "/locks"; 052 053 private static final XLog LOG = XLog.getLog(ZKLocksService.class); 054 private final ConcurrentMap<String, InterProcessReadWriteLock> zkLocks = new MapMaker().weakValues().makeMap(); 055 private ChildReaper reaper = null; 056 057 private static final String REAPING_LEADER_PATH = ZKUtils.ZK_BASE_SERVICES_PATH + "/locksChildReaperLeaderPath"; 058 static final String REAPING_THRESHOLD = CONF_PREFIX + "ZKLocksService.locks.reaper.threshold"; 059 static final String REAPING_THREADS = CONF_PREFIX + "ZKLocksService.locks.reaper.threads"; 060 private static final String RELEASE_RETRY_TIME_LIMIT_MINUTES = CONF_PREFIX + "ZKLocksService.lock.release.retry.time.limit" 061 + ".minutes"; 062 063 /** 064 * Initialize the zookeeper locks service 065 * 066 * @param services services instance. 067 */ 068 @Override 069 public void init(Services services) throws ServiceException { 070 super.init(services); 071 try { 072 zk = ZKUtils.register(this); 073 reaper = new ChildReaper(zk.getClient(), LOCKS_NODE, Reaper.Mode.REAP_UNTIL_GONE, getExecutorService(), 074 ConfigurationService.getInt(services.getConf(), REAPING_THRESHOLD) * 1000, REAPING_LEADER_PATH); 075 reaper.start(); 076 } 077 catch (Exception ex) { 078 throw new ServiceException(ErrorCode.E1700, ex.getMessage(), ex); 079 } 080 } 081 082 /** 083 * Destroy the zookeeper locks service. 084 */ 085 @Override 086 public void destroy() { 087 if (reaper != null) { 088 try { 089 reaper.close(); 090 } 091 catch (IOException e) { 092 LOG.error("Error closing childReaper", e); 093 } 094 } 095 if (zk != null) { 096 zk.unregister(this); 097 } 098 zk = null; 099 super.destroy(); 100 } 101 102 /** 103 * Instruments the zookeeper locks service. 104 * 105 * @param instr instance to instrument the memory locks service to. 106 */ 107 @Override 108 public void instrument(Instrumentation instr) { 109 // Similar to MemoryLocksService's instrumentation, though this is only the number of locks this Oozie server currently has 110 instr.addVariable(INSTRUMENTATION_GROUP, "locks", new Instrumentation.Variable<Integer>() { 111 @Override 112 public Integer getValue() { 113 return zkLocks.size(); 114 } 115 }); 116 } 117 118 /** 119 * Obtain a READ lock for a source. 120 * 121 * @param resource resource name. 122 * @param wait time out in milliseconds to wait for the lock, -1 means no timeout and 0 no wait. 123 * @return the lock token for the resource, or <code>null</code> if the lock could not be obtained. 124 * @throws InterruptedException thrown if the thread was interrupted while waiting. 125 */ 126 @Override 127 public LockToken getReadLock(String resource, long wait) throws InterruptedException { 128 return acquireLock(resource, Type.READ, wait); 129 } 130 131 /** 132 * Obtain a WRITE lock for a source. 133 * 134 * @param resource resource name. 135 * @param wait time out in milliseconds to wait for the lock, -1 means no timeout and 0 no wait. 136 * @return the lock token for the resource, or <code>null</code> if the lock could not be obtained. 137 * @throws InterruptedException thrown if the thread was interrupted while waiting. 138 */ 139 @Override 140 public LockToken getWriteLock(String resource, long wait) throws InterruptedException { 141 return acquireLock(resource, Type.WRITE, wait); 142 } 143 144 private LockToken acquireLock(final String resource, final Type type, final long wait) throws InterruptedException { 145 LOG.debug("Acquiring ZooKeeper lock. [resource={};type={};wait={}]", resource, type, wait); 146 147 InterProcessReadWriteLock lockEntry; 148 final String zkPath = LOCKS_NODE + "/" + resource; 149 LOG.debug("Checking existing Curator lock or creating new one. [zkPath={}]", zkPath); 150 151 // Creating a Curator InterProcessReadWriteLock is lightweight - only calling acquire() costs real ZooKeeper calls 152 final InterProcessReadWriteLock newLockEntry = new InterProcessReadWriteLock(zk.getClient(), zkPath); 153 final InterProcessReadWriteLock existingLockEntry = zkLocks.putIfAbsent(resource, newLockEntry); 154 if (existingLockEntry == null) { 155 lockEntry = newLockEntry; 156 LOG.debug("No existing Curator lock present, new one created successfully. [zkPath={}]", zkPath); 157 } 158 else { 159 // We can't destoy newLockEntry and we don't have to - it's taken care of by Curator and JVM GC 160 lockEntry = existingLockEntry; 161 LOG.debug("Reusing existing Curator lock. [zkPath={}]", zkPath); 162 } 163 164 ZKLockToken token = null; 165 try { 166 LOG.debug("Calling Curator to acquire ZooKeeper lock. [resource={};type={};wait={}]", resource, type, wait); 167 final InterProcessMutex lock = (type.equals(Type.READ)) ? lockEntry.readLock() : lockEntry.writeLock(); 168 if (wait == -1) { 169 lock.acquire(); 170 token = new ZKLockToken(lockEntry, type); 171 LOG.debug("ZooKeeper lock acquired successfully. [resource={};type={}]", resource, type); 172 } 173 else if (lock.acquire(wait, TimeUnit.MILLISECONDS)) { 174 token = new ZKLockToken(lockEntry, type); 175 LOG.debug("ZooKeeper lock acquired successfully waiting. [resource={};type={};wait={}]", resource, type, wait); 176 } 177 else { 178 LOG.warn("Could not acquire ZooKeeper lock, timed out. [resource={};type={};wait={}]", resource, type, wait); 179 } 180 } 181 catch (final Exception ex) { 182 //Not throwing exception. Should return null, so that command can be requeued 183 LOG.warn("Could not acquire lock due to a ZooKeeper error. " + 184 "[ex={};resource={};type={};wait={}]", ex, resource, type, wait); 185 LOG.error("Error while acquiring lock", ex); 186 } 187 188 return token; 189 } 190 191 /** 192 * Implementation of {@link LockToken} for zookeeper locks. 193 */ 194 class ZKLockToken implements LockToken { 195 private final InterProcessReadWriteLock lockEntry; 196 private final Type type; 197 198 private ZKLockToken(InterProcessReadWriteLock lockEntry, Type type) { 199 this.lockEntry = lockEntry; 200 this.type = type; 201 } 202 203 /** 204 * Release the lock. 205 */ 206 @Override 207 public void release() { 208 try { 209 retriableRelease(); 210 } 211 catch (Exception ex) { 212 LOG.warn("Could not release lock: " + ex.getMessage(), ex); 213 } 214 } 215 216 /** 217 * Retires on failure to release lock 218 * 219 * @throws InterruptedException 220 */ 221 private void retriableRelease() throws Exception { 222 long retryTimeLimit = TimeUnit.MINUTES.toSeconds(ConfigurationService.getLong(RELEASE_RETRY_TIME_LIMIT_MINUTES, 30)); 223 int sleepSeconds = 10; 224 for(int retryCount = 1; retryTimeLimit>=0; retryTimeLimit -= sleepSeconds, retryCount++) { 225 try { 226 switch (type) { 227 case WRITE: 228 lockEntry.writeLock().release(); 229 break; 230 case READ: 231 lockEntry.readLock().release(); 232 break; 233 } 234 break; 235 } 236 catch (KeeperException.ConnectionLossException ex) { 237 LOG.warn("Could not release lock: " + ex.getMessage() + ". Retry will be after " + sleepSeconds + " seconds", 238 ex); 239 Thread.sleep(TimeUnit.SECONDS.toMillis(sleepSeconds)); 240 LOG.info("Retrying to release lock. Retry number=" + retryCount); 241 } 242 } 243 } 244 } 245 246 @VisibleForTesting 247 public ConcurrentMap<String, InterProcessReadWriteLock> getLocks(){ 248 return zkLocks; 249 } 250 251 private static ScheduledExecutorService getExecutorService() { 252 return ThreadUtils.newFixedThreadScheduledPool(ConfigurationService.getInt(REAPING_THREADS), 253 "ZKLocksChildReaper"); 254 } 255 256}