001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *      http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.oozie.service;
019
020import java.util.concurrent.ConcurrentMap;
021import java.util.concurrent.TimeUnit;
022
023import org.apache.curator.framework.recipes.locks.InterProcessMutex;
024import org.apache.curator.framework.recipes.locks.InterProcessReadWriteLock;
025import org.apache.oozie.ErrorCode;
026import org.apache.oozie.util.Instrumentable;
027import org.apache.oozie.util.Instrumentation;
028import org.apache.oozie.lock.LockToken;
029import org.apache.oozie.util.XLog;
030import org.apache.oozie.util.ZKUtils;
031
032import java.io.IOException;
033import java.util.concurrent.ScheduledExecutorService;
034
035import org.apache.curator.framework.recipes.locks.ChildReaper;
036import org.apache.curator.framework.recipes.locks.Reaper;
037import org.apache.curator.utils.ThreadUtils;
038
039import com.google.common.annotations.VisibleForTesting;
040import com.google.common.collect.MapMaker;
041import org.apache.zookeeper.KeeperException;
042
043/**
044 * Service that provides distributed locks via ZooKeeper.  Requires that a ZooKeeper ensemble is available.  The locks will be
045 * located under a ZNode named "locks" under the namespace (see {@link ZKUtils}).  For example, with default settings, if the
046 * resource we're locking is called "foo", then the ZNode backing the lock will be at /oozie/locks/foo.
047 */
048public class ZKLocksService extends MemoryLocksService implements Service, Instrumentable {
049
050    private ZKUtils zk;
051    public static final String LOCKS_NODE = "/locks";
052
053    private static final XLog LOG = XLog.getLog(ZKLocksService.class);
054    private final ConcurrentMap<String, InterProcessReadWriteLock> zkLocks = new MapMaker().weakValues().makeMap();
055    private ChildReaper reaper = null;
056
057    private static final String REAPING_LEADER_PATH = ZKUtils.ZK_BASE_SERVICES_PATH + "/locksChildReaperLeaderPath";
058    static final String REAPING_THRESHOLD = CONF_PREFIX + "ZKLocksService.locks.reaper.threshold";
059    static final String REAPING_THREADS = CONF_PREFIX + "ZKLocksService.locks.reaper.threads";
060    private static final String RELEASE_RETRY_TIME_LIMIT_MINUTES = CONF_PREFIX + "ZKLocksService.lock.release.retry.time.limit"
061            + ".minutes";
062
063    /**
064     * Initialize the zookeeper locks service
065     *
066     * @param services services instance.
067     */
068    @Override
069    public void init(Services services) throws ServiceException {
070        super.init(services);
071        try {
072            zk = ZKUtils.register(this);
073            reaper = new ChildReaper(zk.getClient(), LOCKS_NODE, Reaper.Mode.REAP_UNTIL_GONE, getExecutorService(),
074                    ConfigurationService.getInt(services.getConf(), REAPING_THRESHOLD) * 1000, REAPING_LEADER_PATH);
075            reaper.start();
076        }
077        catch (Exception ex) {
078            throw new ServiceException(ErrorCode.E1700, ex.getMessage(), ex);
079        }
080    }
081
082    /**
083     * Destroy the zookeeper locks service.
084     */
085    @Override
086    public void destroy() {
087        if (reaper != null) {
088            try {
089                reaper.close();
090            }
091            catch (IOException e) {
092                LOG.error("Error closing childReaper", e);
093            }
094        }
095        if (zk != null) {
096            zk.unregister(this);
097        }
098        zk = null;
099        super.destroy();
100    }
101
102    /**
103     * Instruments the zookeeper locks service.
104     *
105     * @param instr instance to instrument the memory locks service to.
106     */
107    @Override
108    public void instrument(Instrumentation instr) {
109        // Similar to MemoryLocksService's instrumentation, though this is only the number of locks this Oozie server currently has
110        instr.addVariable(INSTRUMENTATION_GROUP, "locks", new Instrumentation.Variable<Integer>() {
111            @Override
112            public Integer getValue() {
113                return zkLocks.size();
114            }
115        });
116    }
117
118    /**
119     * Obtain a READ lock for a source.
120     *
121     * @param resource resource name.
122     * @param wait time out in milliseconds to wait for the lock, -1 means no timeout and 0 no wait.
123     * @return the lock token for the resource, or <code>null</code> if the lock could not be obtained.
124     * @throws InterruptedException thrown if the thread was interrupted while waiting.
125     */
126    @Override
127    public LockToken getReadLock(String resource, long wait) throws InterruptedException {
128        return acquireLock(resource, Type.READ, wait);
129    }
130
131    /**
132     * Obtain a WRITE lock for a source.
133     *
134     * @param resource resource name.
135     * @param wait time out in milliseconds to wait for the lock, -1 means no timeout and 0 no wait.
136     * @return the lock token for the resource, or <code>null</code> if the lock could not be obtained.
137     * @throws InterruptedException thrown if the thread was interrupted while waiting.
138     */
139    @Override
140    public LockToken getWriteLock(String resource, long wait) throws InterruptedException {
141        return acquireLock(resource, Type.WRITE, wait);
142    }
143
144    private LockToken acquireLock(final String resource, final Type type, final long wait) throws InterruptedException {
145        LOG.debug("Acquiring ZooKeeper lock. [resource={};type={};wait={}]", resource, type, wait);
146
147        InterProcessReadWriteLock lockEntry;
148        final String zkPath = LOCKS_NODE + "/" + resource;
149        LOG.debug("Checking existing Curator lock or creating new one. [zkPath={}]", zkPath);
150
151        // Creating a Curator InterProcessReadWriteLock is lightweight - only calling acquire() costs real ZooKeeper calls
152        final InterProcessReadWriteLock newLockEntry = new InterProcessReadWriteLock(zk.getClient(), zkPath);
153        final InterProcessReadWriteLock existingLockEntry = zkLocks.putIfAbsent(resource, newLockEntry);
154        if (existingLockEntry == null) {
155            lockEntry = newLockEntry;
156            LOG.debug("No existing Curator lock present, new one created successfully. [zkPath={}]", zkPath);
157        }
158        else {
159            // We can't destoy newLockEntry and we don't have to - it's taken care of by Curator and JVM GC
160            lockEntry = existingLockEntry;
161            LOG.debug("Reusing existing Curator lock. [zkPath={}]", zkPath);
162        }
163
164        ZKLockToken token = null;
165        try {
166            LOG.debug("Calling Curator to acquire ZooKeeper lock. [resource={};type={};wait={}]", resource, type, wait);
167            final InterProcessMutex lock = (type.equals(Type.READ)) ? lockEntry.readLock() : lockEntry.writeLock();
168            if (wait == -1) {
169                lock.acquire();
170                token = new ZKLockToken(lockEntry, type);
171                LOG.debug("ZooKeeper lock acquired successfully. [resource={};type={}]", resource, type);
172            }
173            else if (lock.acquire(wait, TimeUnit.MILLISECONDS)) {
174                token = new ZKLockToken(lockEntry, type);
175                LOG.debug("ZooKeeper lock acquired successfully waiting. [resource={};type={};wait={}]", resource, type, wait);
176            }
177            else {
178                LOG.warn("Could not acquire ZooKeeper lock, timed out. [resource={};type={};wait={}]", resource, type, wait);
179            }
180        }
181        catch (final Exception ex) {
182            //Not throwing exception. Should return null, so that command can be requeued
183            LOG.warn("Could not acquire lock due to a ZooKeeper error. " +
184                    "[ex={};resource={};type={};wait={}]", ex, resource, type, wait);
185            LOG.error("Error while acquiring lock", ex);
186        }
187
188        return token;
189    }
190
191    /**
192     * Implementation of {@link LockToken} for zookeeper locks.
193     */
194    class ZKLockToken implements LockToken {
195        private final InterProcessReadWriteLock lockEntry;
196        private final Type type;
197
198        private ZKLockToken(InterProcessReadWriteLock lockEntry, Type type) {
199            this.lockEntry = lockEntry;
200            this.type = type;
201        }
202
203        /**
204         * Release the lock.
205         */
206        @Override
207        public void release() {
208            try {
209                retriableRelease();
210            }
211            catch (Exception ex) {
212                LOG.warn("Could not release lock: " + ex.getMessage(), ex);
213            }
214        }
215
216        /**
217         * Retires on failure to release lock
218         *
219         * @throws InterruptedException
220         */
221        private void retriableRelease() throws Exception {
222            long retryTimeLimit = TimeUnit.MINUTES.toSeconds(ConfigurationService.getLong(RELEASE_RETRY_TIME_LIMIT_MINUTES, 30));
223            int sleepSeconds = 10;
224            for(int retryCount = 1; retryTimeLimit>=0; retryTimeLimit -= sleepSeconds, retryCount++) {
225                try {
226                    switch (type) {
227                        case WRITE:
228                            lockEntry.writeLock().release();
229                            break;
230                        case READ:
231                            lockEntry.readLock().release();
232                            break;
233                    }
234                    break;
235                }
236                catch (KeeperException.ConnectionLossException ex) {
237                    LOG.warn("Could not release lock: " + ex.getMessage() + ". Retry will be after " + sleepSeconds + " seconds",
238                            ex);
239                    Thread.sleep(TimeUnit.SECONDS.toMillis(sleepSeconds));
240                    LOG.info("Retrying to release lock. Retry number=" + retryCount);
241                }
242            }
243        }
244    }
245
246    @VisibleForTesting
247    public ConcurrentMap<String, InterProcessReadWriteLock> getLocks(){
248        return zkLocks;
249    }
250
251    private static ScheduledExecutorService getExecutorService() {
252        return ThreadUtils.newFixedThreadScheduledPool(ConfigurationService.getInt(REAPING_THREADS),
253                "ZKLocksChildReaper");
254    }
255
256}