Chris Lu
5 years ago
12 changed files with 58 additions and 1576 deletions
-
137other/java/hdfs2/src/main/java/seaweed/hdfs/ReadBuffer.java
-
394other/java/hdfs2/src/main/java/seaweed/hdfs/ReadBufferManager.java
-
29other/java/hdfs2/src/main/java/seaweed/hdfs/ReadBufferStatus.java
-
70other/java/hdfs2/src/main/java/seaweed/hdfs/ReadBufferWorker.java
-
4other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedFileSystemStore.java
-
183other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedInputStream.java
-
137other/java/hdfs3/src/main/java/seaweed/hdfs/ReadBuffer.java
-
394other/java/hdfs3/src/main/java/seaweed/hdfs/ReadBufferManager.java
-
29other/java/hdfs3/src/main/java/seaweed/hdfs/ReadBufferStatus.java
-
70other/java/hdfs3/src/main/java/seaweed/hdfs/ReadBufferWorker.java
-
4other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedFileSystemStore.java
-
183other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedInputStream.java
@ -1,137 +0,0 @@ |
|||||
/** |
|
||||
* Licensed to the Apache Software Foundation (ASF) under one |
|
||||
* or more contributor license agreements. See the NOTICE file |
|
||||
* distributed with this work for additional information |
|
||||
* regarding copyright ownership. The ASF licenses this file |
|
||||
* to you under the Apache License, Version 2.0 (the |
|
||||
* "License"); you may not use this file except in compliance |
|
||||
* with the License. You may obtain a copy of the License at |
|
||||
* |
|
||||
* http://www.apache.org/licenses/LICENSE-2.0 |
|
||||
* |
|
||||
* Unless required by applicable law or agreed to in writing, software |
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
||||
* See the License for the specific language governing permissions and |
|
||||
* limitations under the License. |
|
||||
*/ |
|
||||
|
|
||||
package seaweed.hdfs; |
|
||||
|
|
||||
import java.util.concurrent.CountDownLatch; |
|
||||
|
|
||||
class ReadBuffer { |
|
||||
|
|
||||
private SeaweedInputStream stream; |
|
||||
private long offset; // offset within the file for the buffer |
|
||||
private int length; // actual length, set after the buffer is filles |
|
||||
private int requestedLength; // requested length of the read |
|
||||
private byte[] buffer; // the buffer itself |
|
||||
private int bufferindex = -1; // index in the buffers array in Buffer manager |
|
||||
private ReadBufferStatus status; // status of the buffer |
|
||||
private CountDownLatch latch = null; // signaled when the buffer is done reading, so any client |
|
||||
// waiting on this buffer gets unblocked |
|
||||
|
|
||||
// fields to help with eviction logic |
|
||||
private long timeStamp = 0; // tick at which buffer became available to read |
|
||||
private boolean isFirstByteConsumed = false; |
|
||||
private boolean isLastByteConsumed = false; |
|
||||
private boolean isAnyByteConsumed = false; |
|
||||
|
|
||||
public SeaweedInputStream getStream() { |
|
||||
return stream; |
|
||||
} |
|
||||
|
|
||||
public void setStream(SeaweedInputStream stream) { |
|
||||
this.stream = stream; |
|
||||
} |
|
||||
|
|
||||
public long getOffset() { |
|
||||
return offset; |
|
||||
} |
|
||||
|
|
||||
public void setOffset(long offset) { |
|
||||
this.offset = offset; |
|
||||
} |
|
||||
|
|
||||
public int getLength() { |
|
||||
return length; |
|
||||
} |
|
||||
|
|
||||
public void setLength(int length) { |
|
||||
this.length = length; |
|
||||
} |
|
||||
|
|
||||
public int getRequestedLength() { |
|
||||
return requestedLength; |
|
||||
} |
|
||||
|
|
||||
public void setRequestedLength(int requestedLength) { |
|
||||
this.requestedLength = requestedLength; |
|
||||
} |
|
||||
|
|
||||
public byte[] getBuffer() { |
|
||||
return buffer; |
|
||||
} |
|
||||
|
|
||||
public void setBuffer(byte[] buffer) { |
|
||||
this.buffer = buffer; |
|
||||
} |
|
||||
|
|
||||
public int getBufferindex() { |
|
||||
return bufferindex; |
|
||||
} |
|
||||
|
|
||||
public void setBufferindex(int bufferindex) { |
|
||||
this.bufferindex = bufferindex; |
|
||||
} |
|
||||
|
|
||||
public ReadBufferStatus getStatus() { |
|
||||
return status; |
|
||||
} |
|
||||
|
|
||||
public void setStatus(ReadBufferStatus status) { |
|
||||
this.status = status; |
|
||||
} |
|
||||
|
|
||||
public CountDownLatch getLatch() { |
|
||||
return latch; |
|
||||
} |
|
||||
|
|
||||
public void setLatch(CountDownLatch latch) { |
|
||||
this.latch = latch; |
|
||||
} |
|
||||
|
|
||||
public long getTimeStamp() { |
|
||||
return timeStamp; |
|
||||
} |
|
||||
|
|
||||
public void setTimeStamp(long timeStamp) { |
|
||||
this.timeStamp = timeStamp; |
|
||||
} |
|
||||
|
|
||||
public boolean isFirstByteConsumed() { |
|
||||
return isFirstByteConsumed; |
|
||||
} |
|
||||
|
|
||||
public void setFirstByteConsumed(boolean isFirstByteConsumed) { |
|
||||
this.isFirstByteConsumed = isFirstByteConsumed; |
|
||||
} |
|
||||
|
|
||||
public boolean isLastByteConsumed() { |
|
||||
return isLastByteConsumed; |
|
||||
} |
|
||||
|
|
||||
public void setLastByteConsumed(boolean isLastByteConsumed) { |
|
||||
this.isLastByteConsumed = isLastByteConsumed; |
|
||||
} |
|
||||
|
|
||||
public boolean isAnyByteConsumed() { |
|
||||
return isAnyByteConsumed; |
|
||||
} |
|
||||
|
|
||||
public void setAnyByteConsumed(boolean isAnyByteConsumed) { |
|
||||
this.isAnyByteConsumed = isAnyByteConsumed; |
|
||||
} |
|
||||
|
|
||||
} |
|
@ -1,394 +0,0 @@ |
|||||
/** |
|
||||
* Licensed to the Apache Software Foundation (ASF) under one |
|
||||
* or more contributor license agreements. See the NOTICE file |
|
||||
* distributed with this work for additional information |
|
||||
* regarding copyright ownership. The ASF licenses this file |
|
||||
* to you under the Apache License, Version 2.0 (the |
|
||||
* "License"); you may not use this file except in compliance |
|
||||
* with the License. You may obtain a copy of the License at |
|
||||
* <p> |
|
||||
* http://www.apache.org/licenses/LICENSE-2.0 |
|
||||
* <p> |
|
||||
* Unless required by applicable law or agreed to in writing, software |
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
||||
* See the License for the specific language governing permissions and |
|
||||
* limitations under the License. |
|
||||
*/ |
|
||||
package seaweed.hdfs; |
|
||||
|
|
||||
import org.slf4j.Logger; |
|
||||
import org.slf4j.LoggerFactory; |
|
||||
|
|
||||
import java.util.Collection; |
|
||||
import java.util.LinkedList; |
|
||||
import java.util.Queue; |
|
||||
import java.util.Stack; |
|
||||
import java.util.concurrent.CountDownLatch; |
|
||||
|
|
||||
/** |
|
||||
* The Read Buffer Manager for Rest AbfsClient. |
|
||||
*/ |
|
||||
final class ReadBufferManager { |
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(ReadBufferManager.class); |
|
||||
|
|
||||
private static final int NUM_BUFFERS = 16; |
|
||||
private static final int BLOCK_SIZE = 4 * 1024 * 1024; |
|
||||
private static final int NUM_THREADS = 8; |
|
||||
private static final int THRESHOLD_AGE_MILLISECONDS = 3000; // have to see if 3 seconds is a good threshold |
|
||||
|
|
||||
private Thread[] threads = new Thread[NUM_THREADS]; |
|
||||
private byte[][] buffers; // array of byte[] buffers, to hold the data that is read |
|
||||
private Stack<Integer> freeList = new Stack<>(); // indices in buffers[] array that are available |
|
||||
|
|
||||
private Queue<ReadBuffer> readAheadQueue = new LinkedList<>(); // queue of requests that are not picked up by any worker thread yet |
|
||||
private LinkedList<ReadBuffer> inProgressList = new LinkedList<>(); // requests being processed by worker threads |
|
||||
private LinkedList<ReadBuffer> completedReadList = new LinkedList<>(); // buffers available for reading |
|
||||
private static final ReadBufferManager BUFFER_MANAGER; // singleton, initialized in static initialization block |
|
||||
|
|
||||
static { |
|
||||
BUFFER_MANAGER = new ReadBufferManager(); |
|
||||
BUFFER_MANAGER.init(); |
|
||||
} |
|
||||
|
|
||||
static ReadBufferManager getBufferManager() { |
|
||||
return BUFFER_MANAGER; |
|
||||
} |
|
||||
|
|
||||
private void init() { |
|
||||
buffers = new byte[NUM_BUFFERS][]; |
|
||||
for (int i = 0; i < NUM_BUFFERS; i++) { |
|
||||
buffers[i] = new byte[BLOCK_SIZE]; // same buffers are reused. The byte array never goes back to GC |
|
||||
freeList.add(i); |
|
||||
} |
|
||||
for (int i = 0; i < NUM_THREADS; i++) { |
|
||||
Thread t = new Thread(new ReadBufferWorker(i)); |
|
||||
t.setDaemon(true); |
|
||||
threads[i] = t; |
|
||||
t.setName("SeaweedFS-prefetch-" + i); |
|
||||
t.start(); |
|
||||
} |
|
||||
ReadBufferWorker.UNLEASH_WORKERS.countDown(); |
|
||||
} |
|
||||
|
|
||||
// hide instance constructor |
|
||||
private ReadBufferManager() { |
|
||||
} |
|
||||
|
|
||||
|
|
||||
/* |
|
||||
* |
|
||||
* SeaweedInputStream-facing methods |
|
||||
* |
|
||||
*/ |
|
||||
|
|
||||
|
|
||||
/** |
|
||||
* {@link SeaweedInputStream} calls this method to queue read-aheads. |
|
||||
* |
|
||||
* @param stream The {@link SeaweedInputStream} for which to do the read-ahead |
|
||||
* @param requestedOffset The offset in the file which shoukd be read |
|
||||
* @param requestedLength The length to read |
|
||||
*/ |
|
||||
void queueReadAhead(final SeaweedInputStream stream, final long requestedOffset, final int requestedLength) { |
|
||||
if (LOGGER.isTraceEnabled()) { |
|
||||
LOGGER.trace("Start Queueing readAhead for {} offset {} length {}", |
|
||||
stream.getPath(), requestedOffset, requestedLength); |
|
||||
} |
|
||||
ReadBuffer buffer; |
|
||||
synchronized (this) { |
|
||||
if (isAlreadyQueued(stream, requestedOffset)) { |
|
||||
return; // already queued, do not queue again |
|
||||
} |
|
||||
if (freeList.isEmpty() && !tryEvict()) { |
|
||||
return; // no buffers available, cannot queue anything |
|
||||
} |
|
||||
|
|
||||
buffer = new ReadBuffer(); |
|
||||
buffer.setStream(stream); |
|
||||
buffer.setOffset(requestedOffset); |
|
||||
buffer.setLength(0); |
|
||||
buffer.setRequestedLength(requestedLength); |
|
||||
buffer.setStatus(ReadBufferStatus.NOT_AVAILABLE); |
|
||||
buffer.setLatch(new CountDownLatch(1)); |
|
||||
|
|
||||
Integer bufferIndex = freeList.pop(); // will return a value, since we have checked size > 0 already |
|
||||
|
|
||||
buffer.setBuffer(buffers[bufferIndex]); |
|
||||
buffer.setBufferindex(bufferIndex); |
|
||||
readAheadQueue.add(buffer); |
|
||||
notifyAll(); |
|
||||
} |
|
||||
if (LOGGER.isTraceEnabled()) { |
|
||||
LOGGER.trace("Done q-ing readAhead for file {} offset {} buffer idx {}", |
|
||||
stream.getPath(), requestedOffset, buffer.getBufferindex()); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
|
|
||||
/** |
|
||||
* {@link SeaweedInputStream} calls this method read any bytes already available in a buffer (thereby saving a |
|
||||
* remote read). This returns the bytes if the data already exists in buffer. If there is a buffer that is reading |
|
||||
* the requested offset, then this method blocks until that read completes. If the data is queued in a read-ahead |
|
||||
* but not picked up by a worker thread yet, then it cancels that read-ahead and reports cache miss. This is because |
|
||||
* depending on worker thread availability, the read-ahead may take a while - the calling thread can do it's own |
|
||||
* read to get the data faster (copmared to the read waiting in queue for an indeterminate amount of time). |
|
||||
* |
|
||||
* @param stream the file to read bytes for |
|
||||
* @param position the offset in the file to do a read for |
|
||||
* @param length the length to read |
|
||||
* @param buffer the buffer to read data into. Note that the buffer will be written into from offset 0. |
|
||||
* @return the number of bytes read |
|
||||
*/ |
|
||||
int getBlock(final SeaweedInputStream stream, final long position, final int length, final byte[] buffer) { |
|
||||
// not synchronized, so have to be careful with locking |
|
||||
if (LOGGER.isTraceEnabled()) { |
|
||||
LOGGER.trace("getBlock for file {} position {} thread {}", |
|
||||
stream.getPath(), position, Thread.currentThread().getName()); |
|
||||
} |
|
||||
|
|
||||
waitForProcess(stream, position); |
|
||||
|
|
||||
int bytesRead = 0; |
|
||||
synchronized (this) { |
|
||||
bytesRead = getBlockFromCompletedQueue(stream, position, length, buffer); |
|
||||
} |
|
||||
if (bytesRead > 0) { |
|
||||
if (LOGGER.isTraceEnabled()) { |
|
||||
LOGGER.trace("Done read from Cache for {} position {} length {}", |
|
||||
stream.getPath(), position, bytesRead); |
|
||||
} |
|
||||
return bytesRead; |
|
||||
} |
|
||||
|
|
||||
// otherwise, just say we got nothing - calling thread can do its own read |
|
||||
return 0; |
|
||||
} |
|
||||
|
|
||||
/* |
|
||||
* |
|
||||
* Internal methods |
|
||||
* |
|
||||
*/ |
|
||||
|
|
||||
private void waitForProcess(final SeaweedInputStream stream, final long position) { |
|
||||
ReadBuffer readBuf; |
|
||||
synchronized (this) { |
|
||||
clearFromReadAheadQueue(stream, position); |
|
||||
readBuf = getFromList(inProgressList, stream, position); |
|
||||
} |
|
||||
if (readBuf != null) { // if in in-progress queue, then block for it |
|
||||
try { |
|
||||
if (LOGGER.isTraceEnabled()) { |
|
||||
LOGGER.trace("got a relevant read buffer for file {} offset {} buffer idx {}", |
|
||||
stream.getPath(), readBuf.getOffset(), readBuf.getBufferindex()); |
|
||||
} |
|
||||
readBuf.getLatch().await(); // blocking wait on the caller stream's thread |
|
||||
// Note on correctness: readBuf gets out of inProgressList only in 1 place: after worker thread |
|
||||
// is done processing it (in doneReading). There, the latch is set after removing the buffer from |
|
||||
// inProgressList. So this latch is safe to be outside the synchronized block. |
|
||||
// Putting it in synchronized would result in a deadlock, since this thread would be holding the lock |
|
||||
// while waiting, so no one will be able to change any state. If this becomes more complex in the future, |
|
||||
// then the latch cane be removed and replaced with wait/notify whenever inProgressList is touched. |
|
||||
} catch (InterruptedException ex) { |
|
||||
Thread.currentThread().interrupt(); |
|
||||
} |
|
||||
if (LOGGER.isTraceEnabled()) { |
|
||||
LOGGER.trace("latch done for file {} buffer idx {} length {}", |
|
||||
stream.getPath(), readBuf.getBufferindex(), readBuf.getLength()); |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* If any buffer in the completedlist can be reclaimed then reclaim it and return the buffer to free list. |
|
||||
* The objective is to find just one buffer - there is no advantage to evicting more than one. |
|
||||
* |
|
||||
* @return whether the eviction succeeeded - i.e., were we able to free up one buffer |
|
||||
*/ |
|
||||
private synchronized boolean tryEvict() { |
|
||||
ReadBuffer nodeToEvict = null; |
|
||||
if (completedReadList.size() <= 0) { |
|
||||
return false; // there are no evict-able buffers |
|
||||
} |
|
||||
|
|
||||
// first, try buffers where all bytes have been consumed (approximated as first and last bytes consumed) |
|
||||
for (ReadBuffer buf : completedReadList) { |
|
||||
if (buf.isFirstByteConsumed() && buf.isLastByteConsumed()) { |
|
||||
nodeToEvict = buf; |
|
||||
break; |
|
||||
} |
|
||||
} |
|
||||
if (nodeToEvict != null) { |
|
||||
return evict(nodeToEvict); |
|
||||
} |
|
||||
|
|
||||
// next, try buffers where any bytes have been consumed (may be a bad idea? have to experiment and see) |
|
||||
for (ReadBuffer buf : completedReadList) { |
|
||||
if (buf.isAnyByteConsumed()) { |
|
||||
nodeToEvict = buf; |
|
||||
break; |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
if (nodeToEvict != null) { |
|
||||
return evict(nodeToEvict); |
|
||||
} |
|
||||
|
|
||||
// next, try any old nodes that have not been consumed |
|
||||
long earliestBirthday = Long.MAX_VALUE; |
|
||||
for (ReadBuffer buf : completedReadList) { |
|
||||
if (buf.getTimeStamp() < earliestBirthday) { |
|
||||
nodeToEvict = buf; |
|
||||
earliestBirthday = buf.getTimeStamp(); |
|
||||
} |
|
||||
} |
|
||||
if ((currentTimeMillis() - earliestBirthday > THRESHOLD_AGE_MILLISECONDS) && (nodeToEvict != null)) { |
|
||||
return evict(nodeToEvict); |
|
||||
} |
|
||||
|
|
||||
// nothing can be evicted |
|
||||
return false; |
|
||||
} |
|
||||
|
|
||||
private boolean evict(final ReadBuffer buf) { |
|
||||
freeList.push(buf.getBufferindex()); |
|
||||
completedReadList.remove(buf); |
|
||||
if (LOGGER.isTraceEnabled()) { |
|
||||
LOGGER.trace("Evicting buffer idx {}; was used for file {} offset {} length {}", |
|
||||
buf.getBufferindex(), buf.getStream().getPath(), buf.getOffset(), buf.getLength()); |
|
||||
} |
|
||||
return true; |
|
||||
} |
|
||||
|
|
||||
private boolean isAlreadyQueued(final SeaweedInputStream stream, final long requestedOffset) { |
|
||||
// returns true if any part of the buffer is already queued |
|
||||
return (isInList(readAheadQueue, stream, requestedOffset) |
|
||||
|| isInList(inProgressList, stream, requestedOffset) |
|
||||
|| isInList(completedReadList, stream, requestedOffset)); |
|
||||
} |
|
||||
|
|
||||
private boolean isInList(final Collection<ReadBuffer> list, final SeaweedInputStream stream, final long requestedOffset) { |
|
||||
return (getFromList(list, stream, requestedOffset) != null); |
|
||||
} |
|
||||
|
|
||||
private ReadBuffer getFromList(final Collection<ReadBuffer> list, final SeaweedInputStream stream, final long requestedOffset) { |
|
||||
for (ReadBuffer buffer : list) { |
|
||||
if (buffer.getStream() == stream) { |
|
||||
if (buffer.getStatus() == ReadBufferStatus.AVAILABLE |
|
||||
&& requestedOffset >= buffer.getOffset() |
|
||||
&& requestedOffset < buffer.getOffset() + buffer.getLength()) { |
|
||||
return buffer; |
|
||||
} else if (requestedOffset >= buffer.getOffset() |
|
||||
&& requestedOffset < buffer.getOffset() + buffer.getRequestedLength()) { |
|
||||
return buffer; |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
return null; |
|
||||
} |
|
||||
|
|
||||
private void clearFromReadAheadQueue(final SeaweedInputStream stream, final long requestedOffset) { |
|
||||
ReadBuffer buffer = getFromList(readAheadQueue, stream, requestedOffset); |
|
||||
if (buffer != null) { |
|
||||
readAheadQueue.remove(buffer); |
|
||||
notifyAll(); // lock is held in calling method |
|
||||
freeList.push(buffer.getBufferindex()); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
private int getBlockFromCompletedQueue(final SeaweedInputStream stream, final long position, final int length, |
|
||||
final byte[] buffer) { |
|
||||
ReadBuffer buf = getFromList(completedReadList, stream, position); |
|
||||
if (buf == null || position >= buf.getOffset() + buf.getLength()) { |
|
||||
return 0; |
|
||||
} |
|
||||
int cursor = (int) (position - buf.getOffset()); |
|
||||
int availableLengthInBuffer = buf.getLength() - cursor; |
|
||||
int lengthToCopy = Math.min(length, availableLengthInBuffer); |
|
||||
System.arraycopy(buf.getBuffer(), cursor, buffer, 0, lengthToCopy); |
|
||||
if (cursor == 0) { |
|
||||
buf.setFirstByteConsumed(true); |
|
||||
} |
|
||||
if (cursor + lengthToCopy == buf.getLength()) { |
|
||||
buf.setLastByteConsumed(true); |
|
||||
} |
|
||||
buf.setAnyByteConsumed(true); |
|
||||
return lengthToCopy; |
|
||||
} |
|
||||
|
|
||||
/* |
|
||||
* |
|
||||
* ReadBufferWorker-thread-facing methods |
|
||||
* |
|
||||
*/ |
|
||||
|
|
||||
/** |
|
||||
* ReadBufferWorker thread calls this to get the next buffer that it should work on. |
|
||||
* |
|
||||
* @return {@link ReadBuffer} |
|
||||
* @throws InterruptedException if thread is interrupted |
|
||||
*/ |
|
||||
ReadBuffer getNextBlockToRead() throws InterruptedException { |
|
||||
ReadBuffer buffer = null; |
|
||||
synchronized (this) { |
|
||||
//buffer = readAheadQueue.take(); // blocking method |
|
||||
while (readAheadQueue.size() == 0) { |
|
||||
wait(); |
|
||||
} |
|
||||
buffer = readAheadQueue.remove(); |
|
||||
notifyAll(); |
|
||||
if (buffer == null) { |
|
||||
return null; // should never happen |
|
||||
} |
|
||||
buffer.setStatus(ReadBufferStatus.READING_IN_PROGRESS); |
|
||||
inProgressList.add(buffer); |
|
||||
} |
|
||||
if (LOGGER.isTraceEnabled()) { |
|
||||
LOGGER.trace("ReadBufferWorker picked file {} for offset {}", |
|
||||
buffer.getStream().getPath(), buffer.getOffset()); |
|
||||
} |
|
||||
return buffer; |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* ReadBufferWorker thread calls this method to post completion. |
|
||||
* |
|
||||
* @param buffer the buffer whose read was completed |
|
||||
* @param result the {@link ReadBufferStatus} after the read operation in the worker thread |
|
||||
* @param bytesActuallyRead the number of bytes that the worker thread was actually able to read |
|
||||
*/ |
|
||||
void doneReading(final ReadBuffer buffer, final ReadBufferStatus result, final int bytesActuallyRead) { |
|
||||
if (LOGGER.isTraceEnabled()) { |
|
||||
LOGGER.trace("ReadBufferWorker completed file {} for offset {} bytes {}", |
|
||||
buffer.getStream().getPath(), buffer.getOffset(), bytesActuallyRead); |
|
||||
} |
|
||||
synchronized (this) { |
|
||||
inProgressList.remove(buffer); |
|
||||
if (result == ReadBufferStatus.AVAILABLE && bytesActuallyRead > 0) { |
|
||||
buffer.setStatus(ReadBufferStatus.AVAILABLE); |
|
||||
buffer.setTimeStamp(currentTimeMillis()); |
|
||||
buffer.setLength(bytesActuallyRead); |
|
||||
completedReadList.add(buffer); |
|
||||
} else { |
|
||||
freeList.push(buffer.getBufferindex()); |
|
||||
// buffer should go out of scope after the end of the calling method in ReadBufferWorker, and eligible for GC |
|
||||
} |
|
||||
} |
|
||||
//outside the synchronized, since anyone receiving a wake-up from the latch must see safe-published results |
|
||||
buffer.getLatch().countDown(); // wake up waiting threads (if any) |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* Similar to System.currentTimeMillis, except implemented with System.nanoTime(). |
|
||||
* System.currentTimeMillis can go backwards when system clock is changed (e.g., with NTP time synchronization), |
|
||||
* making it unsuitable for measuring time intervals. nanotime is strictly monotonically increasing per CPU core. |
|
||||
* Note: it is not monotonic across Sockets, and even within a CPU, its only the |
|
||||
* more recent parts which share a clock across all cores. |
|
||||
* |
|
||||
* @return current time in milliseconds |
|
||||
*/ |
|
||||
private long currentTimeMillis() { |
|
||||
return System.nanoTime() / 1000 / 1000; |
|
||||
} |
|
||||
} |
|
@ -1,29 +0,0 @@ |
|||||
/** |
|
||||
* Licensed to the Apache Software Foundation (ASF) under one |
|
||||
* or more contributor license agreements. See the NOTICE file |
|
||||
* distributed with this work for additional information |
|
||||
* regarding copyright ownership. The ASF licenses this file |
|
||||
* to you under the Apache License, Version 2.0 (the |
|
||||
* "License"); you may not use this file except in compliance |
|
||||
* with the License. You may obtain a copy of the License at |
|
||||
* |
|
||||
* http://www.apache.org/licenses/LICENSE-2.0 |
|
||||
* |
|
||||
* Unless required by applicable law or agreed to in writing, software |
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
||||
* See the License for the specific language governing permissions and |
|
||||
* limitations under the License. |
|
||||
*/ |
|
||||
|
|
||||
package seaweed.hdfs; |
|
||||
|
|
||||
/** |
|
||||
* The ReadBufferStatus for Rest AbfsClient |
|
||||
*/ |
|
||||
public enum ReadBufferStatus { |
|
||||
NOT_AVAILABLE, // buffers sitting in readaheadqueue have this stats |
|
||||
READING_IN_PROGRESS, // reading is in progress on this buffer. Buffer should be in inProgressList |
|
||||
AVAILABLE, // data is available in buffer. It should be in completedList |
|
||||
READ_FAILED // read completed, but failed. |
|
||||
} |
|
@ -1,70 +0,0 @@ |
|||||
/** |
|
||||
* Licensed to the Apache Software Foundation (ASF) under one |
|
||||
* or more contributor license agreements. See the NOTICE file |
|
||||
* distributed with this work for additional information |
|
||||
* regarding copyright ownership. The ASF licenses this file |
|
||||
* to you under the Apache License, Version 2.0 (the |
|
||||
* "License"); you may not use this file except in compliance |
|
||||
* with the License. You may obtain a copy of the License at |
|
||||
* |
|
||||
* http://www.apache.org/licenses/LICENSE-2.0 |
|
||||
* |
|
||||
* Unless required by applicable law or agreed to in writing, software |
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
||||
* See the License for the specific language governing permissions and |
|
||||
* limitations under the License. |
|
||||
*/ |
|
||||
|
|
||||
package seaweed.hdfs; |
|
||||
|
|
||||
import java.util.concurrent.CountDownLatch; |
|
||||
|
|
||||
class ReadBufferWorker implements Runnable { |
|
||||
|
|
||||
protected static final CountDownLatch UNLEASH_WORKERS = new CountDownLatch(1); |
|
||||
private int id; |
|
||||
|
|
||||
ReadBufferWorker(final int id) { |
|
||||
this.id = id; |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* return the ID of ReadBufferWorker. |
|
||||
*/ |
|
||||
public int getId() { |
|
||||
return this.id; |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* Waits until a buffer becomes available in ReadAheadQueue. |
|
||||
* Once a buffer becomes available, reads the file specified in it and then posts results back to buffer manager. |
|
||||
* Rinse and repeat. Forever. |
|
||||
*/ |
|
||||
public void run() { |
|
||||
try { |
|
||||
UNLEASH_WORKERS.await(); |
|
||||
} catch (InterruptedException ex) { |
|
||||
Thread.currentThread().interrupt(); |
|
||||
} |
|
||||
ReadBufferManager bufferManager = ReadBufferManager.getBufferManager(); |
|
||||
ReadBuffer buffer; |
|
||||
while (true) { |
|
||||
try { |
|
||||
buffer = bufferManager.getNextBlockToRead(); // blocks, until a buffer is available for this thread |
|
||||
} catch (InterruptedException ex) { |
|
||||
Thread.currentThread().interrupt(); |
|
||||
return; |
|
||||
} |
|
||||
if (buffer != null) { |
|
||||
try { |
|
||||
// do the actual read, from the file. |
|
||||
int bytesRead = buffer.getStream().readRemote(buffer.getOffset(), buffer.getBuffer(), 0, buffer.getRequestedLength()); |
|
||||
bufferManager.doneReading(buffer, ReadBufferStatus.AVAILABLE, bytesRead); // post result back to ReadBufferManager |
|
||||
} catch (Exception ex) { |
|
||||
bufferManager.doneReading(buffer, ReadBufferStatus.READ_FAILED, 0); |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
} |
|
@ -1,137 +0,0 @@ |
|||||
/** |
|
||||
* Licensed to the Apache Software Foundation (ASF) under one |
|
||||
* or more contributor license agreements. See the NOTICE file |
|
||||
* distributed with this work for additional information |
|
||||
* regarding copyright ownership. The ASF licenses this file |
|
||||
* to you under the Apache License, Version 2.0 (the |
|
||||
* "License"); you may not use this file except in compliance |
|
||||
* with the License. You may obtain a copy of the License at |
|
||||
* |
|
||||
* http://www.apache.org/licenses/LICENSE-2.0 |
|
||||
* |
|
||||
* Unless required by applicable law or agreed to in writing, software |
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
||||
* See the License for the specific language governing permissions and |
|
||||
* limitations under the License. |
|
||||
*/ |
|
||||
|
|
||||
package seaweed.hdfs; |
|
||||
|
|
||||
import java.util.concurrent.CountDownLatch; |
|
||||
|
|
||||
class ReadBuffer { |
|
||||
|
|
||||
private SeaweedInputStream stream; |
|
||||
private long offset; // offset within the file for the buffer |
|
||||
private int length; // actual length, set after the buffer is filles |
|
||||
private int requestedLength; // requested length of the read |
|
||||
private byte[] buffer; // the buffer itself |
|
||||
private int bufferindex = -1; // index in the buffers array in Buffer manager |
|
||||
private ReadBufferStatus status; // status of the buffer |
|
||||
private CountDownLatch latch = null; // signaled when the buffer is done reading, so any client |
|
||||
// waiting on this buffer gets unblocked |
|
||||
|
|
||||
// fields to help with eviction logic |
|
||||
private long timeStamp = 0; // tick at which buffer became available to read |
|
||||
private boolean isFirstByteConsumed = false; |
|
||||
private boolean isLastByteConsumed = false; |
|
||||
private boolean isAnyByteConsumed = false; |
|
||||
|
|
||||
public SeaweedInputStream getStream() { |
|
||||
return stream; |
|
||||
} |
|
||||
|
|
||||
public void setStream(SeaweedInputStream stream) { |
|
||||
this.stream = stream; |
|
||||
} |
|
||||
|
|
||||
public long getOffset() { |
|
||||
return offset; |
|
||||
} |
|
||||
|
|
||||
public void setOffset(long offset) { |
|
||||
this.offset = offset; |
|
||||
} |
|
||||
|
|
||||
public int getLength() { |
|
||||
return length; |
|
||||
} |
|
||||
|
|
||||
public void setLength(int length) { |
|
||||
this.length = length; |
|
||||
} |
|
||||
|
|
||||
public int getRequestedLength() { |
|
||||
return requestedLength; |
|
||||
} |
|
||||
|
|
||||
public void setRequestedLength(int requestedLength) { |
|
||||
this.requestedLength = requestedLength; |
|
||||
} |
|
||||
|
|
||||
public byte[] getBuffer() { |
|
||||
return buffer; |
|
||||
} |
|
||||
|
|
||||
public void setBuffer(byte[] buffer) { |
|
||||
this.buffer = buffer; |
|
||||
} |
|
||||
|
|
||||
public int getBufferindex() { |
|
||||
return bufferindex; |
|
||||
} |
|
||||
|
|
||||
public void setBufferindex(int bufferindex) { |
|
||||
this.bufferindex = bufferindex; |
|
||||
} |
|
||||
|
|
||||
public ReadBufferStatus getStatus() { |
|
||||
return status; |
|
||||
} |
|
||||
|
|
||||
public void setStatus(ReadBufferStatus status) { |
|
||||
this.status = status; |
|
||||
} |
|
||||
|
|
||||
public CountDownLatch getLatch() { |
|
||||
return latch; |
|
||||
} |
|
||||
|
|
||||
public void setLatch(CountDownLatch latch) { |
|
||||
this.latch = latch; |
|
||||
} |
|
||||
|
|
||||
public long getTimeStamp() { |
|
||||
return timeStamp; |
|
||||
} |
|
||||
|
|
||||
public void setTimeStamp(long timeStamp) { |
|
||||
this.timeStamp = timeStamp; |
|
||||
} |
|
||||
|
|
||||
public boolean isFirstByteConsumed() { |
|
||||
return isFirstByteConsumed; |
|
||||
} |
|
||||
|
|
||||
public void setFirstByteConsumed(boolean isFirstByteConsumed) { |
|
||||
this.isFirstByteConsumed = isFirstByteConsumed; |
|
||||
} |
|
||||
|
|
||||
public boolean isLastByteConsumed() { |
|
||||
return isLastByteConsumed; |
|
||||
} |
|
||||
|
|
||||
public void setLastByteConsumed(boolean isLastByteConsumed) { |
|
||||
this.isLastByteConsumed = isLastByteConsumed; |
|
||||
} |
|
||||
|
|
||||
public boolean isAnyByteConsumed() { |
|
||||
return isAnyByteConsumed; |
|
||||
} |
|
||||
|
|
||||
public void setAnyByteConsumed(boolean isAnyByteConsumed) { |
|
||||
this.isAnyByteConsumed = isAnyByteConsumed; |
|
||||
} |
|
||||
|
|
||||
} |
|
@ -1,394 +0,0 @@ |
|||||
/** |
|
||||
* Licensed to the Apache Software Foundation (ASF) under one |
|
||||
* or more contributor license agreements. See the NOTICE file |
|
||||
* distributed with this work for additional information |
|
||||
* regarding copyright ownership. The ASF licenses this file |
|
||||
* to you under the Apache License, Version 2.0 (the |
|
||||
* "License"); you may not use this file except in compliance |
|
||||
* with the License. You may obtain a copy of the License at |
|
||||
* <p> |
|
||||
* http://www.apache.org/licenses/LICENSE-2.0 |
|
||||
* <p> |
|
||||
* Unless required by applicable law or agreed to in writing, software |
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
||||
* See the License for the specific language governing permissions and |
|
||||
* limitations under the License. |
|
||||
*/ |
|
||||
package seaweed.hdfs; |
|
||||
|
|
||||
import org.slf4j.Logger; |
|
||||
import org.slf4j.LoggerFactory; |
|
||||
|
|
||||
import java.util.Collection; |
|
||||
import java.util.LinkedList; |
|
||||
import java.util.Queue; |
|
||||
import java.util.Stack; |
|
||||
import java.util.concurrent.CountDownLatch; |
|
||||
|
|
||||
/** |
|
||||
* The Read Buffer Manager for Rest AbfsClient. |
|
||||
*/ |
|
||||
final class ReadBufferManager { |
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(ReadBufferManager.class); |
|
||||
|
|
||||
private static final int NUM_BUFFERS = 16; |
|
||||
private static final int BLOCK_SIZE = 4 * 1024 * 1024; |
|
||||
private static final int NUM_THREADS = 8; |
|
||||
private static final int THRESHOLD_AGE_MILLISECONDS = 3000; // have to see if 3 seconds is a good threshold |
|
||||
|
|
||||
private Thread[] threads = new Thread[NUM_THREADS]; |
|
||||
private byte[][] buffers; // array of byte[] buffers, to hold the data that is read |
|
||||
private Stack<Integer> freeList = new Stack<>(); // indices in buffers[] array that are available |
|
||||
|
|
||||
private Queue<ReadBuffer> readAheadQueue = new LinkedList<>(); // queue of requests that are not picked up by any worker thread yet |
|
||||
private LinkedList<ReadBuffer> inProgressList = new LinkedList<>(); // requests being processed by worker threads |
|
||||
private LinkedList<ReadBuffer> completedReadList = new LinkedList<>(); // buffers available for reading |
|
||||
private static final ReadBufferManager BUFFER_MANAGER; // singleton, initialized in static initialization block |
|
||||
|
|
||||
static { |
|
||||
BUFFER_MANAGER = new ReadBufferManager(); |
|
||||
BUFFER_MANAGER.init(); |
|
||||
} |
|
||||
|
|
||||
static ReadBufferManager getBufferManager() { |
|
||||
return BUFFER_MANAGER; |
|
||||
} |
|
||||
|
|
||||
private void init() { |
|
||||
buffers = new byte[NUM_BUFFERS][]; |
|
||||
for (int i = 0; i < NUM_BUFFERS; i++) { |
|
||||
buffers[i] = new byte[BLOCK_SIZE]; // same buffers are reused. The byte array never goes back to GC |
|
||||
freeList.add(i); |
|
||||
} |
|
||||
for (int i = 0; i < NUM_THREADS; i++) { |
|
||||
Thread t = new Thread(new ReadBufferWorker(i)); |
|
||||
t.setDaemon(true); |
|
||||
threads[i] = t; |
|
||||
t.setName("SeaweedFS-prefetch-" + i); |
|
||||
t.start(); |
|
||||
} |
|
||||
ReadBufferWorker.UNLEASH_WORKERS.countDown(); |
|
||||
} |
|
||||
|
|
||||
// hide instance constructor |
|
||||
private ReadBufferManager() { |
|
||||
} |
|
||||
|
|
||||
|
|
||||
/* |
|
||||
* |
|
||||
* SeaweedInputStream-facing methods |
|
||||
* |
|
||||
*/ |
|
||||
|
|
||||
|
|
||||
/** |
|
||||
* {@link SeaweedInputStream} calls this method to queue read-aheads. |
|
||||
* |
|
||||
* @param stream The {@link SeaweedInputStream} for which to do the read-ahead |
|
||||
* @param requestedOffset The offset in the file which shoukd be read |
|
||||
* @param requestedLength The length to read |
|
||||
*/ |
|
||||
void queueReadAhead(final SeaweedInputStream stream, final long requestedOffset, final int requestedLength) { |
|
||||
if (LOGGER.isTraceEnabled()) { |
|
||||
LOGGER.trace("Start Queueing readAhead for {} offset {} length {}", |
|
||||
stream.getPath(), requestedOffset, requestedLength); |
|
||||
} |
|
||||
ReadBuffer buffer; |
|
||||
synchronized (this) { |
|
||||
if (isAlreadyQueued(stream, requestedOffset)) { |
|
||||
return; // already queued, do not queue again |
|
||||
} |
|
||||
if (freeList.isEmpty() && !tryEvict()) { |
|
||||
return; // no buffers available, cannot queue anything |
|
||||
} |
|
||||
|
|
||||
buffer = new ReadBuffer(); |
|
||||
buffer.setStream(stream); |
|
||||
buffer.setOffset(requestedOffset); |
|
||||
buffer.setLength(0); |
|
||||
buffer.setRequestedLength(requestedLength); |
|
||||
buffer.setStatus(ReadBufferStatus.NOT_AVAILABLE); |
|
||||
buffer.setLatch(new CountDownLatch(1)); |
|
||||
|
|
||||
Integer bufferIndex = freeList.pop(); // will return a value, since we have checked size > 0 already |
|
||||
|
|
||||
buffer.setBuffer(buffers[bufferIndex]); |
|
||||
buffer.setBufferindex(bufferIndex); |
|
||||
readAheadQueue.add(buffer); |
|
||||
notifyAll(); |
|
||||
} |
|
||||
if (LOGGER.isTraceEnabled()) { |
|
||||
LOGGER.trace("Done q-ing readAhead for file {} offset {} buffer idx {}", |
|
||||
stream.getPath(), requestedOffset, buffer.getBufferindex()); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
|
|
||||
/** |
|
||||
* {@link SeaweedInputStream} calls this method read any bytes already available in a buffer (thereby saving a |
|
||||
* remote read). This returns the bytes if the data already exists in buffer. If there is a buffer that is reading |
|
||||
* the requested offset, then this method blocks until that read completes. If the data is queued in a read-ahead |
|
||||
* but not picked up by a worker thread yet, then it cancels that read-ahead and reports cache miss. This is because |
|
||||
* depending on worker thread availability, the read-ahead may take a while - the calling thread can do it's own |
|
||||
* read to get the data faster (copmared to the read waiting in queue for an indeterminate amount of time). |
|
||||
* |
|
||||
* @param stream the file to read bytes for |
|
||||
* @param position the offset in the file to do a read for |
|
||||
* @param length the length to read |
|
||||
* @param buffer the buffer to read data into. Note that the buffer will be written into from offset 0. |
|
||||
* @return the number of bytes read |
|
||||
*/ |
|
||||
int getBlock(final SeaweedInputStream stream, final long position, final int length, final byte[] buffer) { |
|
||||
// not synchronized, so have to be careful with locking |
|
||||
if (LOGGER.isTraceEnabled()) { |
|
||||
LOGGER.trace("getBlock for file {} position {} thread {}", |
|
||||
stream.getPath(), position, Thread.currentThread().getName()); |
|
||||
} |
|
||||
|
|
||||
waitForProcess(stream, position); |
|
||||
|
|
||||
int bytesRead = 0; |
|
||||
synchronized (this) { |
|
||||
bytesRead = getBlockFromCompletedQueue(stream, position, length, buffer); |
|
||||
} |
|
||||
if (bytesRead > 0) { |
|
||||
if (LOGGER.isTraceEnabled()) { |
|
||||
LOGGER.trace("Done read from Cache for {} position {} length {}", |
|
||||
stream.getPath(), position, bytesRead); |
|
||||
} |
|
||||
return bytesRead; |
|
||||
} |
|
||||
|
|
||||
// otherwise, just say we got nothing - calling thread can do its own read |
|
||||
return 0; |
|
||||
} |
|
||||
|
|
||||
/* |
|
||||
* |
|
||||
* Internal methods |
|
||||
* |
|
||||
*/ |
|
||||
|
|
||||
private void waitForProcess(final SeaweedInputStream stream, final long position) { |
|
||||
ReadBuffer readBuf; |
|
||||
synchronized (this) { |
|
||||
clearFromReadAheadQueue(stream, position); |
|
||||
readBuf = getFromList(inProgressList, stream, position); |
|
||||
} |
|
||||
if (readBuf != null) { // if in in-progress queue, then block for it |
|
||||
try { |
|
||||
if (LOGGER.isTraceEnabled()) { |
|
||||
LOGGER.trace("got a relevant read buffer for file {} offset {} buffer idx {}", |
|
||||
stream.getPath(), readBuf.getOffset(), readBuf.getBufferindex()); |
|
||||
} |
|
||||
readBuf.getLatch().await(); // blocking wait on the caller stream's thread |
|
||||
// Note on correctness: readBuf gets out of inProgressList only in 1 place: after worker thread |
|
||||
// is done processing it (in doneReading). There, the latch is set after removing the buffer from |
|
||||
// inProgressList. So this latch is safe to be outside the synchronized block. |
|
||||
// Putting it in synchronized would result in a deadlock, since this thread would be holding the lock |
|
||||
// while waiting, so no one will be able to change any state. If this becomes more complex in the future, |
|
||||
// then the latch cane be removed and replaced with wait/notify whenever inProgressList is touched. |
|
||||
} catch (InterruptedException ex) { |
|
||||
Thread.currentThread().interrupt(); |
|
||||
} |
|
||||
if (LOGGER.isTraceEnabled()) { |
|
||||
LOGGER.trace("latch done for file {} buffer idx {} length {}", |
|
||||
stream.getPath(), readBuf.getBufferindex(), readBuf.getLength()); |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* If any buffer in the completedlist can be reclaimed then reclaim it and return the buffer to free list. |
|
||||
* The objective is to find just one buffer - there is no advantage to evicting more than one. |
|
||||
* |
|
||||
* @return whether the eviction succeeeded - i.e., were we able to free up one buffer |
|
||||
*/ |
|
||||
private synchronized boolean tryEvict() { |
|
||||
ReadBuffer nodeToEvict = null; |
|
||||
if (completedReadList.size() <= 0) { |
|
||||
return false; // there are no evict-able buffers |
|
||||
} |
|
||||
|
|
||||
// first, try buffers where all bytes have been consumed (approximated as first and last bytes consumed) |
|
||||
for (ReadBuffer buf : completedReadList) { |
|
||||
if (buf.isFirstByteConsumed() && buf.isLastByteConsumed()) { |
|
||||
nodeToEvict = buf; |
|
||||
break; |
|
||||
} |
|
||||
} |
|
||||
if (nodeToEvict != null) { |
|
||||
return evict(nodeToEvict); |
|
||||
} |
|
||||
|
|
||||
// next, try buffers where any bytes have been consumed (may be a bad idea? have to experiment and see) |
|
||||
for (ReadBuffer buf : completedReadList) { |
|
||||
if (buf.isAnyByteConsumed()) { |
|
||||
nodeToEvict = buf; |
|
||||
break; |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
if (nodeToEvict != null) { |
|
||||
return evict(nodeToEvict); |
|
||||
} |
|
||||
|
|
||||
// next, try any old nodes that have not been consumed |
|
||||
long earliestBirthday = Long.MAX_VALUE; |
|
||||
for (ReadBuffer buf : completedReadList) { |
|
||||
if (buf.getTimeStamp() < earliestBirthday) { |
|
||||
nodeToEvict = buf; |
|
||||
earliestBirthday = buf.getTimeStamp(); |
|
||||
} |
|
||||
} |
|
||||
if ((currentTimeMillis() - earliestBirthday > THRESHOLD_AGE_MILLISECONDS) && (nodeToEvict != null)) { |
|
||||
return evict(nodeToEvict); |
|
||||
} |
|
||||
|
|
||||
// nothing can be evicted |
|
||||
return false; |
|
||||
} |
|
||||
|
|
||||
private boolean evict(final ReadBuffer buf) { |
|
||||
freeList.push(buf.getBufferindex()); |
|
||||
completedReadList.remove(buf); |
|
||||
if (LOGGER.isTraceEnabled()) { |
|
||||
LOGGER.trace("Evicting buffer idx {}; was used for file {} offset {} length {}", |
|
||||
buf.getBufferindex(), buf.getStream().getPath(), buf.getOffset(), buf.getLength()); |
|
||||
} |
|
||||
return true; |
|
||||
} |
|
||||
|
|
||||
private boolean isAlreadyQueued(final SeaweedInputStream stream, final long requestedOffset) { |
|
||||
// returns true if any part of the buffer is already queued |
|
||||
return (isInList(readAheadQueue, stream, requestedOffset) |
|
||||
|| isInList(inProgressList, stream, requestedOffset) |
|
||||
|| isInList(completedReadList, stream, requestedOffset)); |
|
||||
} |
|
||||
|
|
||||
private boolean isInList(final Collection<ReadBuffer> list, final SeaweedInputStream stream, final long requestedOffset) { |
|
||||
return (getFromList(list, stream, requestedOffset) != null); |
|
||||
} |
|
||||
|
|
||||
private ReadBuffer getFromList(final Collection<ReadBuffer> list, final SeaweedInputStream stream, final long requestedOffset) { |
|
||||
for (ReadBuffer buffer : list) { |
|
||||
if (buffer.getStream() == stream) { |
|
||||
if (buffer.getStatus() == ReadBufferStatus.AVAILABLE |
|
||||
&& requestedOffset >= buffer.getOffset() |
|
||||
&& requestedOffset < buffer.getOffset() + buffer.getLength()) { |
|
||||
return buffer; |
|
||||
} else if (requestedOffset >= buffer.getOffset() |
|
||||
&& requestedOffset < buffer.getOffset() + buffer.getRequestedLength()) { |
|
||||
return buffer; |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
return null; |
|
||||
} |
|
||||
|
|
||||
private void clearFromReadAheadQueue(final SeaweedInputStream stream, final long requestedOffset) { |
|
||||
ReadBuffer buffer = getFromList(readAheadQueue, stream, requestedOffset); |
|
||||
if (buffer != null) { |
|
||||
readAheadQueue.remove(buffer); |
|
||||
notifyAll(); // lock is held in calling method |
|
||||
freeList.push(buffer.getBufferindex()); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
private int getBlockFromCompletedQueue(final SeaweedInputStream stream, final long position, final int length, |
|
||||
final byte[] buffer) { |
|
||||
ReadBuffer buf = getFromList(completedReadList, stream, position); |
|
||||
if (buf == null || position >= buf.getOffset() + buf.getLength()) { |
|
||||
return 0; |
|
||||
} |
|
||||
int cursor = (int) (position - buf.getOffset()); |
|
||||
int availableLengthInBuffer = buf.getLength() - cursor; |
|
||||
int lengthToCopy = Math.min(length, availableLengthInBuffer); |
|
||||
System.arraycopy(buf.getBuffer(), cursor, buffer, 0, lengthToCopy); |
|
||||
if (cursor == 0) { |
|
||||
buf.setFirstByteConsumed(true); |
|
||||
} |
|
||||
if (cursor + lengthToCopy == buf.getLength()) { |
|
||||
buf.setLastByteConsumed(true); |
|
||||
} |
|
||||
buf.setAnyByteConsumed(true); |
|
||||
return lengthToCopy; |
|
||||
} |
|
||||
|
|
||||
/* |
|
||||
* |
|
||||
* ReadBufferWorker-thread-facing methods |
|
||||
* |
|
||||
*/ |
|
||||
|
|
||||
/** |
|
||||
* ReadBufferWorker thread calls this to get the next buffer that it should work on. |
|
||||
* |
|
||||
* @return {@link ReadBuffer} |
|
||||
* @throws InterruptedException if thread is interrupted |
|
||||
*/ |
|
||||
ReadBuffer getNextBlockToRead() throws InterruptedException { |
|
||||
ReadBuffer buffer = null; |
|
||||
synchronized (this) { |
|
||||
//buffer = readAheadQueue.take(); // blocking method |
|
||||
while (readAheadQueue.size() == 0) { |
|
||||
wait(); |
|
||||
} |
|
||||
buffer = readAheadQueue.remove(); |
|
||||
notifyAll(); |
|
||||
if (buffer == null) { |
|
||||
return null; // should never happen |
|
||||
} |
|
||||
buffer.setStatus(ReadBufferStatus.READING_IN_PROGRESS); |
|
||||
inProgressList.add(buffer); |
|
||||
} |
|
||||
if (LOGGER.isTraceEnabled()) { |
|
||||
LOGGER.trace("ReadBufferWorker picked file {} for offset {}", |
|
||||
buffer.getStream().getPath(), buffer.getOffset()); |
|
||||
} |
|
||||
return buffer; |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* ReadBufferWorker thread calls this method to post completion. |
|
||||
* |
|
||||
* @param buffer the buffer whose read was completed |
|
||||
* @param result the {@link ReadBufferStatus} after the read operation in the worker thread |
|
||||
* @param bytesActuallyRead the number of bytes that the worker thread was actually able to read |
|
||||
*/ |
|
||||
void doneReading(final ReadBuffer buffer, final ReadBufferStatus result, final int bytesActuallyRead) { |
|
||||
if (LOGGER.isTraceEnabled()) { |
|
||||
LOGGER.trace("ReadBufferWorker completed file {} for offset {} bytes {}", |
|
||||
buffer.getStream().getPath(), buffer.getOffset(), bytesActuallyRead); |
|
||||
} |
|
||||
synchronized (this) { |
|
||||
inProgressList.remove(buffer); |
|
||||
if (result == ReadBufferStatus.AVAILABLE && bytesActuallyRead > 0) { |
|
||||
buffer.setStatus(ReadBufferStatus.AVAILABLE); |
|
||||
buffer.setTimeStamp(currentTimeMillis()); |
|
||||
buffer.setLength(bytesActuallyRead); |
|
||||
completedReadList.add(buffer); |
|
||||
} else { |
|
||||
freeList.push(buffer.getBufferindex()); |
|
||||
// buffer should go out of scope after the end of the calling method in ReadBufferWorker, and eligible for GC |
|
||||
} |
|
||||
} |
|
||||
//outside the synchronized, since anyone receiving a wake-up from the latch must see safe-published results |
|
||||
buffer.getLatch().countDown(); // wake up waiting threads (if any) |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* Similar to System.currentTimeMillis, except implemented with System.nanoTime(). |
|
||||
* System.currentTimeMillis can go backwards when system clock is changed (e.g., with NTP time synchronization), |
|
||||
* making it unsuitable for measuring time intervals. nanotime is strictly monotonically increasing per CPU core. |
|
||||
* Note: it is not monotonic across Sockets, and even within a CPU, its only the |
|
||||
* more recent parts which share a clock across all cores. |
|
||||
* |
|
||||
* @return current time in milliseconds |
|
||||
*/ |
|
||||
private long currentTimeMillis() { |
|
||||
return System.nanoTime() / 1000 / 1000; |
|
||||
} |
|
||||
} |
|
@ -1,29 +0,0 @@ |
|||||
/** |
|
||||
* Licensed to the Apache Software Foundation (ASF) under one |
|
||||
* or more contributor license agreements. See the NOTICE file |
|
||||
* distributed with this work for additional information |
|
||||
* regarding copyright ownership. The ASF licenses this file |
|
||||
* to you under the Apache License, Version 2.0 (the |
|
||||
* "License"); you may not use this file except in compliance |
|
||||
* with the License. You may obtain a copy of the License at |
|
||||
* |
|
||||
* http://www.apache.org/licenses/LICENSE-2.0 |
|
||||
* |
|
||||
* Unless required by applicable law or agreed to in writing, software |
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
||||
* See the License for the specific language governing permissions and |
|
||||
* limitations under the License. |
|
||||
*/ |
|
||||
|
|
||||
package seaweed.hdfs; |
|
||||
|
|
||||
/** |
|
||||
* The ReadBufferStatus for Rest AbfsClient |
|
||||
*/ |
|
||||
public enum ReadBufferStatus { |
|
||||
NOT_AVAILABLE, // buffers sitting in readaheadqueue have this stats |
|
||||
READING_IN_PROGRESS, // reading is in progress on this buffer. Buffer should be in inProgressList |
|
||||
AVAILABLE, // data is available in buffer. It should be in completedList |
|
||||
READ_FAILED // read completed, but failed. |
|
||||
} |
|
@ -1,70 +0,0 @@ |
|||||
/** |
|
||||
* Licensed to the Apache Software Foundation (ASF) under one |
|
||||
* or more contributor license agreements. See the NOTICE file |
|
||||
* distributed with this work for additional information |
|
||||
* regarding copyright ownership. The ASF licenses this file |
|
||||
* to you under the Apache License, Version 2.0 (the |
|
||||
* "License"); you may not use this file except in compliance |
|
||||
* with the License. You may obtain a copy of the License at |
|
||||
* |
|
||||
* http://www.apache.org/licenses/LICENSE-2.0 |
|
||||
* |
|
||||
* Unless required by applicable law or agreed to in writing, software |
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
||||
* See the License for the specific language governing permissions and |
|
||||
* limitations under the License. |
|
||||
*/ |
|
||||
|
|
||||
package seaweed.hdfs; |
|
||||
|
|
||||
import java.util.concurrent.CountDownLatch; |
|
||||
|
|
||||
class ReadBufferWorker implements Runnable { |
|
||||
|
|
||||
protected static final CountDownLatch UNLEASH_WORKERS = new CountDownLatch(1); |
|
||||
private int id; |
|
||||
|
|
||||
ReadBufferWorker(final int id) { |
|
||||
this.id = id; |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* return the ID of ReadBufferWorker. |
|
||||
*/ |
|
||||
public int getId() { |
|
||||
return this.id; |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* Waits until a buffer becomes available in ReadAheadQueue. |
|
||||
* Once a buffer becomes available, reads the file specified in it and then posts results back to buffer manager. |
|
||||
* Rinse and repeat. Forever. |
|
||||
*/ |
|
||||
public void run() { |
|
||||
try { |
|
||||
UNLEASH_WORKERS.await(); |
|
||||
} catch (InterruptedException ex) { |
|
||||
Thread.currentThread().interrupt(); |
|
||||
} |
|
||||
ReadBufferManager bufferManager = ReadBufferManager.getBufferManager(); |
|
||||
ReadBuffer buffer; |
|
||||
while (true) { |
|
||||
try { |
|
||||
buffer = bufferManager.getNextBlockToRead(); // blocks, until a buffer is available for this thread |
|
||||
} catch (InterruptedException ex) { |
|
||||
Thread.currentThread().interrupt(); |
|
||||
return; |
|
||||
} |
|
||||
if (buffer != null) { |
|
||||
try { |
|
||||
// do the actual read, from the file. |
|
||||
int bytesRead = buffer.getStream().readRemote(buffer.getOffset(), buffer.getBuffer(), 0, buffer.getRequestedLength()); |
|
||||
bufferManager.doneReading(buffer, ReadBufferStatus.AVAILABLE, bytesRead); // post result back to ReadBufferManager |
|
||||
} catch (Exception ex) { |
|
||||
bufferManager.doneReading(buffer, ReadBufferStatus.READ_FAILED, 0); |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
} |
|
Write
Preview
Loading…
Cancel
Save
Reference in new issue