Chris Lu
4 years ago
7 changed files with 317 additions and 343 deletions
-
93other/java/client/src/main/java/seaweedfs/client/SeaweedInputStream.java
-
4other/java/examples/src/main/java/com/seaweedfs/examples/UnzipFile.java
-
2other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedFileSystemStore.java
-
150other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedHadoopInputStream.java
-
2other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedFileSystemStore.java
-
150other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedHadoopInputStream.java
-
259other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedInputStream.java
@ -0,0 +1,150 @@ |
|||
package seaweed.hdfs; |
|||
|
|||
// based on org.apache.hadoop.fs.azurebfs.services.AbfsInputStream |
|||
|
|||
import org.apache.hadoop.fs.ByteBufferReadable; |
|||
import org.apache.hadoop.fs.FSInputStream; |
|||
import org.apache.hadoop.fs.FileSystem.Statistics; |
|||
import seaweedfs.client.FilerGrpcClient; |
|||
import seaweedfs.client.FilerProto; |
|||
import seaweedfs.client.SeaweedInputStream; |
|||
|
|||
import java.io.EOFException; |
|||
import java.io.IOException; |
|||
import java.nio.ByteBuffer; |
|||
|
|||
public class SeaweedHadoopInputStream extends FSInputStream implements ByteBufferReadable { |
|||
|
|||
private final SeaweedInputStream seaweedInputStream; |
|||
private final Statistics statistics; |
|||
|
|||
public SeaweedHadoopInputStream( |
|||
final FilerGrpcClient filerGrpcClient, |
|||
final Statistics statistics, |
|||
final String path, |
|||
final FilerProto.Entry entry) throws IOException { |
|||
this.seaweedInputStream = new SeaweedInputStream(filerGrpcClient, path, entry); |
|||
this.statistics = statistics; |
|||
} |
|||
|
|||
@Override |
|||
public int read() throws IOException { |
|||
return seaweedInputStream.read(); |
|||
} |
|||
|
|||
@Override |
|||
public int read(final byte[] b, final int off, final int len) throws IOException { |
|||
return seaweedInputStream.read(b, off, len); |
|||
} |
|||
|
|||
// implement ByteBufferReadable |
|||
@Override |
|||
public synchronized int read(ByteBuffer buf) throws IOException { |
|||
int bytesRead = seaweedInputStream.read(buf); |
|||
|
|||
if (bytesRead > 0) { |
|||
if (statistics != null) { |
|||
statistics.incrementBytesRead(bytesRead); |
|||
} |
|||
} |
|||
|
|||
return bytesRead; |
|||
} |
|||
|
|||
/** |
|||
* Seek to given position in stream. |
|||
* |
|||
* @param n position to seek to |
|||
* @throws IOException if there is an error |
|||
* @throws EOFException if attempting to seek past end of file |
|||
*/ |
|||
@Override |
|||
public synchronized void seek(long n) throws IOException { |
|||
seaweedInputStream.seek(n); |
|||
} |
|||
|
|||
@Override |
|||
public synchronized long skip(long n) throws IOException { |
|||
return seaweedInputStream.skip(n); |
|||
} |
|||
|
|||
/** |
|||
* Return the size of the remaining available bytes |
|||
* if the size is less than or equal to {@link Integer#MAX_VALUE}, |
|||
* otherwise, return {@link Integer#MAX_VALUE}. |
|||
* <p> |
|||
* This is to match the behavior of DFSInputStream.available(), |
|||
* which some clients may rely on (HBase write-ahead log reading in |
|||
* particular). |
|||
*/ |
|||
@Override |
|||
public synchronized int available() throws IOException { |
|||
return seaweedInputStream.available(); |
|||
} |
|||
|
|||
/** |
|||
* Returns the length of the file that this stream refers to. Note that the length returned is the length |
|||
* as of the time the Stream was opened. Specifically, if there have been subsequent appends to the file, |
|||
* they wont be reflected in the returned length. |
|||
* |
|||
* @return length of the file. |
|||
* @throws IOException if the stream is closed |
|||
*/ |
|||
public long length() throws IOException { |
|||
return seaweedInputStream.length(); |
|||
} |
|||
|
|||
/** |
|||
* Return the current offset from the start of the file |
|||
* |
|||
* @throws IOException throws {@link IOException} if there is an error |
|||
*/ |
|||
@Override |
|||
public synchronized long getPos() throws IOException { |
|||
return seaweedInputStream.getPos(); |
|||
} |
|||
|
|||
/** |
|||
* Seeks a different copy of the data. Returns true if |
|||
* found a new source, false otherwise. |
|||
* |
|||
* @throws IOException throws {@link IOException} if there is an error |
|||
*/ |
|||
@Override |
|||
public boolean seekToNewSource(long l) throws IOException { |
|||
return false; |
|||
} |
|||
|
|||
@Override |
|||
public synchronized void close() throws IOException { |
|||
seaweedInputStream.close(); |
|||
} |
|||
|
|||
/** |
|||
* Not supported by this stream. Throws {@link UnsupportedOperationException} |
|||
* |
|||
* @param readlimit ignored |
|||
*/ |
|||
@Override |
|||
public synchronized void mark(int readlimit) { |
|||
throw new UnsupportedOperationException("mark()/reset() not supported on this stream"); |
|||
} |
|||
|
|||
/** |
|||
* Not supported by this stream. Throws {@link UnsupportedOperationException} |
|||
*/ |
|||
@Override |
|||
public synchronized void reset() throws IOException { |
|||
throw new UnsupportedOperationException("mark()/reset() not supported on this stream"); |
|||
} |
|||
|
|||
/** |
|||
* gets whether mark and reset are supported by {@code ADLFileInputStream}. Always returns false. |
|||
* |
|||
* @return always {@code false} |
|||
*/ |
|||
@Override |
|||
public boolean markSupported() { |
|||
return false; |
|||
} |
|||
} |
@ -0,0 +1,150 @@ |
|||
package seaweed.hdfs; |
|||
|
|||
// based on org.apache.hadoop.fs.azurebfs.services.AbfsInputStream |
|||
|
|||
import org.apache.hadoop.fs.ByteBufferReadable; |
|||
import org.apache.hadoop.fs.FSInputStream; |
|||
import org.apache.hadoop.fs.FileSystem.Statistics; |
|||
import seaweedfs.client.FilerGrpcClient; |
|||
import seaweedfs.client.FilerProto; |
|||
import seaweedfs.client.SeaweedInputStream; |
|||
|
|||
import java.io.EOFException; |
|||
import java.io.IOException; |
|||
import java.nio.ByteBuffer; |
|||
|
|||
public class SeaweedHadoopInputStream extends FSInputStream implements ByteBufferReadable { |
|||
|
|||
private final SeaweedInputStream seaweedInputStream; |
|||
private final Statistics statistics; |
|||
|
|||
public SeaweedHadoopInputStream( |
|||
final FilerGrpcClient filerGrpcClient, |
|||
final Statistics statistics, |
|||
final String path, |
|||
final FilerProto.Entry entry) throws IOException { |
|||
this.seaweedInputStream = new SeaweedInputStream(filerGrpcClient, path, entry); |
|||
this.statistics = statistics; |
|||
} |
|||
|
|||
@Override |
|||
public int read() throws IOException { |
|||
return seaweedInputStream.read(); |
|||
} |
|||
|
|||
@Override |
|||
public int read(final byte[] b, final int off, final int len) throws IOException { |
|||
return seaweedInputStream.read(b, off, len); |
|||
} |
|||
|
|||
// implement ByteBufferReadable |
|||
@Override |
|||
public synchronized int read(ByteBuffer buf) throws IOException { |
|||
int bytesRead = seaweedInputStream.read(buf); |
|||
|
|||
if (bytesRead > 0) { |
|||
if (statistics != null) { |
|||
statistics.incrementBytesRead(bytesRead); |
|||
} |
|||
} |
|||
|
|||
return bytesRead; |
|||
} |
|||
|
|||
/** |
|||
* Seek to given position in stream. |
|||
* |
|||
* @param n position to seek to |
|||
* @throws IOException if there is an error |
|||
* @throws EOFException if attempting to seek past end of file |
|||
*/ |
|||
@Override |
|||
public synchronized void seek(long n) throws IOException { |
|||
seaweedInputStream.seek(n); |
|||
} |
|||
|
|||
@Override |
|||
public synchronized long skip(long n) throws IOException { |
|||
return seaweedInputStream.skip(n); |
|||
} |
|||
|
|||
/** |
|||
* Return the size of the remaining available bytes |
|||
* if the size is less than or equal to {@link Integer#MAX_VALUE}, |
|||
* otherwise, return {@link Integer#MAX_VALUE}. |
|||
* <p> |
|||
* This is to match the behavior of DFSInputStream.available(), |
|||
* which some clients may rely on (HBase write-ahead log reading in |
|||
* particular). |
|||
*/ |
|||
@Override |
|||
public synchronized int available() throws IOException { |
|||
return seaweedInputStream.available(); |
|||
} |
|||
|
|||
/** |
|||
* Returns the length of the file that this stream refers to. Note that the length returned is the length |
|||
* as of the time the Stream was opened. Specifically, if there have been subsequent appends to the file, |
|||
* they wont be reflected in the returned length. |
|||
* |
|||
* @return length of the file. |
|||
* @throws IOException if the stream is closed |
|||
*/ |
|||
public long length() throws IOException { |
|||
return seaweedInputStream.length(); |
|||
} |
|||
|
|||
/** |
|||
* Return the current offset from the start of the file |
|||
* |
|||
* @throws IOException throws {@link IOException} if there is an error |
|||
*/ |
|||
@Override |
|||
public synchronized long getPos() throws IOException { |
|||
return seaweedInputStream.getPos(); |
|||
} |
|||
|
|||
/** |
|||
* Seeks a different copy of the data. Returns true if |
|||
* found a new source, false otherwise. |
|||
* |
|||
* @throws IOException throws {@link IOException} if there is an error |
|||
*/ |
|||
@Override |
|||
public boolean seekToNewSource(long l) throws IOException { |
|||
return false; |
|||
} |
|||
|
|||
@Override |
|||
public synchronized void close() throws IOException { |
|||
seaweedInputStream.close(); |
|||
} |
|||
|
|||
/** |
|||
* Not supported by this stream. Throws {@link UnsupportedOperationException} |
|||
* |
|||
* @param readlimit ignored |
|||
*/ |
|||
@Override |
|||
public synchronized void mark(int readlimit) { |
|||
throw new UnsupportedOperationException("mark()/reset() not supported on this stream"); |
|||
} |
|||
|
|||
/** |
|||
* Not supported by this stream. Throws {@link UnsupportedOperationException} |
|||
*/ |
|||
@Override |
|||
public synchronized void reset() throws IOException { |
|||
throw new UnsupportedOperationException("mark()/reset() not supported on this stream"); |
|||
} |
|||
|
|||
/** |
|||
* gets whether mark and reset are supported by {@code ADLFileInputStream}. Always returns false. |
|||
* |
|||
* @return always {@code false} |
|||
*/ |
|||
@Override |
|||
public boolean markSupported() { |
|||
return false; |
|||
} |
|||
} |
@ -1,259 +0,0 @@ |
|||
package seaweed.hdfs; |
|||
|
|||
// based on org.apache.hadoop.fs.azurebfs.services.AbfsInputStream |
|||
|
|||
import org.apache.hadoop.fs.ByteBufferReadable; |
|||
import org.apache.hadoop.fs.FSExceptionMessages; |
|||
import org.apache.hadoop.fs.FSInputStream; |
|||
import org.apache.hadoop.fs.FileSystem.Statistics; |
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
import seaweedfs.client.FilerGrpcClient; |
|||
import seaweedfs.client.FilerProto; |
|||
import seaweedfs.client.SeaweedRead; |
|||
|
|||
import java.io.EOFException; |
|||
import java.io.IOException; |
|||
import java.nio.ByteBuffer; |
|||
import java.util.List; |
|||
|
|||
public class SeaweedInputStream extends FSInputStream implements ByteBufferReadable { |
|||
|
|||
private static final Logger LOG = LoggerFactory.getLogger(SeaweedInputStream.class); |
|||
|
|||
private final FilerGrpcClient filerGrpcClient; |
|||
private final Statistics statistics; |
|||
private final String path; |
|||
private final FilerProto.Entry entry; |
|||
private final List<SeaweedRead.VisibleInterval> visibleIntervalList; |
|||
private final long contentLength; |
|||
|
|||
private long position = 0; // cursor of the file |
|||
|
|||
private boolean closed = false; |
|||
|
|||
public SeaweedInputStream( |
|||
final FilerGrpcClient filerGrpcClient, |
|||
final Statistics statistics, |
|||
final String path, |
|||
final FilerProto.Entry entry) throws IOException { |
|||
this.filerGrpcClient = filerGrpcClient; |
|||
this.statistics = statistics; |
|||
this.path = path; |
|||
this.entry = entry; |
|||
this.contentLength = SeaweedRead.fileSize(entry); |
|||
|
|||
this.visibleIntervalList = SeaweedRead.nonOverlappingVisibleIntervals(filerGrpcClient, entry.getChunksList()); |
|||
|
|||
LOG.debug("new path:{} entry:{} visibleIntervalList:{}", path, entry, visibleIntervalList); |
|||
|
|||
} |
|||
|
|||
public String getPath() { |
|||
return path; |
|||
} |
|||
|
|||
@Override |
|||
public int read() throws IOException { |
|||
byte[] b = new byte[1]; |
|||
int numberOfBytesRead = read(b, 0, 1); |
|||
if (numberOfBytesRead < 0) { |
|||
return -1; |
|||
} else { |
|||
return (b[0] & 0xFF); |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
public int read(final byte[] b, final int off, final int len) throws IOException { |
|||
|
|||
if (b == null) { |
|||
throw new IllegalArgumentException("null byte array passed in to read() method"); |
|||
} |
|||
if (off >= b.length) { |
|||
throw new IllegalArgumentException("offset greater than length of array"); |
|||
} |
|||
if (len < 0) { |
|||
throw new IllegalArgumentException("requested read length is less than zero"); |
|||
} |
|||
if (len > (b.length - off)) { |
|||
throw new IllegalArgumentException("requested read length is more than will fit after requested offset in buffer"); |
|||
} |
|||
|
|||
ByteBuffer buf = ByteBuffer.wrap(b, off, len); |
|||
return read(buf); |
|||
|
|||
} |
|||
|
|||
// implement ByteBufferReadable |
|||
@Override |
|||
public synchronized int read(ByteBuffer buf) throws IOException { |
|||
|
|||
if (position < 0) { |
|||
throw new IllegalArgumentException("attempting to read from negative offset"); |
|||
} |
|||
if (position >= contentLength) { |
|||
return -1; // Hadoop prefers -1 to EOFException |
|||
} |
|||
|
|||
long bytesRead = 0; |
|||
int len = buf.remaining(); |
|||
int start = (int) this.position; |
|||
if (start+len <= entry.getContent().size()) { |
|||
entry.getContent().substring(start, start+len).copyTo(buf); |
|||
} else { |
|||
bytesRead = SeaweedRead.read(this.filerGrpcClient, this.visibleIntervalList, this.position, buf, SeaweedRead.fileSize(entry)); |
|||
} |
|||
|
|||
if (bytesRead > Integer.MAX_VALUE) { |
|||
throw new IOException("Unexpected Content-Length"); |
|||
} |
|||
|
|||
if (bytesRead > 0) { |
|||
this.position += bytesRead; |
|||
if (statistics != null) { |
|||
statistics.incrementBytesRead(bytesRead); |
|||
} |
|||
} |
|||
|
|||
return (int) bytesRead; |
|||
} |
|||
|
|||
/** |
|||
* Seek to given position in stream. |
|||
* |
|||
* @param n position to seek to |
|||
* @throws IOException if there is an error |
|||
* @throws EOFException if attempting to seek past end of file |
|||
*/ |
|||
@Override |
|||
public synchronized void seek(long n) throws IOException { |
|||
if (closed) { |
|||
throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); |
|||
} |
|||
if (n < 0) { |
|||
throw new EOFException(FSExceptionMessages.NEGATIVE_SEEK); |
|||
} |
|||
if (n > contentLength) { |
|||
throw new EOFException(FSExceptionMessages.CANNOT_SEEK_PAST_EOF); |
|||
} |
|||
|
|||
this.position = n; |
|||
|
|||
} |
|||
|
|||
@Override |
|||
public synchronized long skip(long n) throws IOException { |
|||
if (closed) { |
|||
throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); |
|||
} |
|||
if (this.position == contentLength) { |
|||
if (n > 0) { |
|||
throw new EOFException(FSExceptionMessages.CANNOT_SEEK_PAST_EOF); |
|||
} |
|||
} |
|||
long newPos = this.position + n; |
|||
if (newPos < 0) { |
|||
newPos = 0; |
|||
n = newPos - this.position; |
|||
} |
|||
if (newPos > contentLength) { |
|||
newPos = contentLength; |
|||
n = newPos - this.position; |
|||
} |
|||
seek(newPos); |
|||
return n; |
|||
} |
|||
|
|||
/** |
|||
* Return the size of the remaining available bytes |
|||
* if the size is less than or equal to {@link Integer#MAX_VALUE}, |
|||
* otherwise, return {@link Integer#MAX_VALUE}. |
|||
* <p> |
|||
* This is to match the behavior of DFSInputStream.available(), |
|||
* which some clients may rely on (HBase write-ahead log reading in |
|||
* particular). |
|||
*/ |
|||
@Override |
|||
public synchronized int available() throws IOException { |
|||
if (closed) { |
|||
throw new IOException( |
|||
FSExceptionMessages.STREAM_IS_CLOSED); |
|||
} |
|||
final long remaining = this.contentLength - this.getPos(); |
|||
return remaining <= Integer.MAX_VALUE |
|||
? (int) remaining : Integer.MAX_VALUE; |
|||
} |
|||
|
|||
/** |
|||
* Returns the length of the file that this stream refers to. Note that the length returned is the length |
|||
* as of the time the Stream was opened. Specifically, if there have been subsequent appends to the file, |
|||
* they wont be reflected in the returned length. |
|||
* |
|||
* @return length of the file. |
|||
* @throws IOException if the stream is closed |
|||
*/ |
|||
public long length() throws IOException { |
|||
if (closed) { |
|||
throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); |
|||
} |
|||
return contentLength; |
|||
} |
|||
|
|||
/** |
|||
* Return the current offset from the start of the file |
|||
* |
|||
* @throws IOException throws {@link IOException} if there is an error |
|||
*/ |
|||
@Override |
|||
public synchronized long getPos() throws IOException { |
|||
if (closed) { |
|||
throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); |
|||
} |
|||
return position; |
|||
} |
|||
|
|||
/** |
|||
* Seeks a different copy of the data. Returns true if |
|||
* found a new source, false otherwise. |
|||
* |
|||
* @throws IOException throws {@link IOException} if there is an error |
|||
*/ |
|||
@Override |
|||
public boolean seekToNewSource(long l) throws IOException { |
|||
return false; |
|||
} |
|||
|
|||
@Override |
|||
public synchronized void close() throws IOException { |
|||
closed = true; |
|||
} |
|||
|
|||
/** |
|||
* Not supported by this stream. Throws {@link UnsupportedOperationException} |
|||
* |
|||
* @param readlimit ignored |
|||
*/ |
|||
@Override |
|||
public synchronized void mark(int readlimit) { |
|||
throw new UnsupportedOperationException("mark()/reset() not supported on this stream"); |
|||
} |
|||
|
|||
/** |
|||
* Not supported by this stream. Throws {@link UnsupportedOperationException} |
|||
*/ |
|||
@Override |
|||
public synchronized void reset() throws IOException { |
|||
throw new UnsupportedOperationException("mark()/reset() not supported on this stream"); |
|||
} |
|||
|
|||
/** |
|||
* gets whether mark and reset are supported by {@code ADLFileInputStream}. Always returns false. |
|||
* |
|||
* @return always {@code false} |
|||
*/ |
|||
@Override |
|||
public boolean markSupported() { |
|||
return false; |
|||
} |
|||
} |
Write
Preview
Loading…
Cancel
Save
Reference in new issue