7 changed files with 317 additions and 343 deletions
			
			
		- 
					93other/java/client/src/main/java/seaweedfs/client/SeaweedInputStream.java
- 
					4other/java/examples/src/main/java/com/seaweedfs/examples/UnzipFile.java
- 
					2other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedFileSystemStore.java
- 
					150other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedHadoopInputStream.java
- 
					2other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedFileSystemStore.java
- 
					150other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedHadoopInputStream.java
- 
					259other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedInputStream.java
| @ -0,0 +1,150 @@ | |||
| package seaweed.hdfs; | |||
| 
 | |||
| // based on org.apache.hadoop.fs.azurebfs.services.AbfsInputStream | |||
| 
 | |||
| import org.apache.hadoop.fs.ByteBufferReadable; | |||
| import org.apache.hadoop.fs.FSInputStream; | |||
| import org.apache.hadoop.fs.FileSystem.Statistics; | |||
| import seaweedfs.client.FilerGrpcClient; | |||
| import seaweedfs.client.FilerProto; | |||
| import seaweedfs.client.SeaweedInputStream; | |||
| 
 | |||
| import java.io.EOFException; | |||
| import java.io.IOException; | |||
| import java.nio.ByteBuffer; | |||
| 
 | |||
| public class SeaweedHadoopInputStream extends FSInputStream implements ByteBufferReadable { | |||
| 
 | |||
|     private final SeaweedInputStream seaweedInputStream; | |||
|     private final Statistics statistics; | |||
| 
 | |||
|     public SeaweedHadoopInputStream( | |||
|             final FilerGrpcClient filerGrpcClient, | |||
|             final Statistics statistics, | |||
|             final String path, | |||
|             final FilerProto.Entry entry) throws IOException { | |||
|         this.seaweedInputStream = new SeaweedInputStream(filerGrpcClient, path, entry); | |||
|         this.statistics = statistics; | |||
|     } | |||
| 
 | |||
|     @Override | |||
|     public int read() throws IOException { | |||
|         return seaweedInputStream.read(); | |||
|     } | |||
| 
 | |||
|     @Override | |||
|     public int read(final byte[] b, final int off, final int len) throws IOException { | |||
|         return seaweedInputStream.read(b, off, len); | |||
|     } | |||
| 
 | |||
|     // implement ByteBufferReadable | |||
|     @Override | |||
|     public synchronized int read(ByteBuffer buf) throws IOException { | |||
|         int bytesRead = seaweedInputStream.read(buf); | |||
| 
 | |||
|         if (bytesRead > 0) { | |||
|             if (statistics != null) { | |||
|                 statistics.incrementBytesRead(bytesRead); | |||
|             } | |||
|         } | |||
| 
 | |||
|         return bytesRead; | |||
|     } | |||
| 
 | |||
|     /** | |||
|      * Seek to given position in stream. | |||
|      * | |||
|      * @param n position to seek to | |||
|      * @throws IOException  if there is an error | |||
|      * @throws EOFException if attempting to seek past end of file | |||
|      */ | |||
|     @Override | |||
|     public synchronized void seek(long n) throws IOException { | |||
|         seaweedInputStream.seek(n); | |||
|     } | |||
| 
 | |||
|     @Override | |||
|     public synchronized long skip(long n) throws IOException { | |||
|         return seaweedInputStream.skip(n); | |||
|     } | |||
| 
 | |||
|     /** | |||
|      * Return the size of the remaining available bytes | |||
|      * if the size is less than or equal to {@link Integer#MAX_VALUE}, | |||
|      * otherwise, return {@link Integer#MAX_VALUE}. | |||
|      * <p> | |||
|      * This is to match the behavior of DFSInputStream.available(), | |||
|      * which some clients may rely on (HBase write-ahead log reading in | |||
|      * particular). | |||
|      */ | |||
|     @Override | |||
|     public synchronized int available() throws IOException { | |||
|         return seaweedInputStream.available(); | |||
|     } | |||
| 
 | |||
|     /** | |||
|      * Returns the length of the file that this stream refers to. Note that the length returned is the length | |||
|      * as of the time the Stream was opened. Specifically, if there have been subsequent appends to the file, | |||
|      * they wont be reflected in the returned length. | |||
|      * | |||
|      * @return length of the file. | |||
|      * @throws IOException if the stream is closed | |||
|      */ | |||
|     public long length() throws IOException { | |||
|         return seaweedInputStream.length(); | |||
|     } | |||
| 
 | |||
|     /** | |||
|      * Return the current offset from the start of the file | |||
|      * | |||
|      * @throws IOException throws {@link IOException} if there is an error | |||
|      */ | |||
|     @Override | |||
|     public synchronized long getPos() throws IOException { | |||
|         return seaweedInputStream.getPos(); | |||
|     } | |||
| 
 | |||
|     /** | |||
|      * Seeks a different copy of the data.  Returns true if | |||
|      * found a new source, false otherwise. | |||
|      * | |||
|      * @throws IOException throws {@link IOException} if there is an error | |||
|      */ | |||
|     @Override | |||
|     public boolean seekToNewSource(long l) throws IOException { | |||
|         return false; | |||
|     } | |||
| 
 | |||
|     @Override | |||
|     public synchronized void close() throws IOException { | |||
|         seaweedInputStream.close(); | |||
|     } | |||
| 
 | |||
|     /** | |||
|      * Not supported by this stream. Throws {@link UnsupportedOperationException} | |||
|      * | |||
|      * @param readlimit ignored | |||
|      */ | |||
|     @Override | |||
|     public synchronized void mark(int readlimit) { | |||
|         throw new UnsupportedOperationException("mark()/reset() not supported on this stream"); | |||
|     } | |||
| 
 | |||
|     /** | |||
|      * Not supported by this stream. Throws {@link UnsupportedOperationException} | |||
|      */ | |||
|     @Override | |||
|     public synchronized void reset() throws IOException { | |||
|         throw new UnsupportedOperationException("mark()/reset() not supported on this stream"); | |||
|     } | |||
| 
 | |||
|     /** | |||
|      * gets whether mark and reset are supported by {@code ADLFileInputStream}. Always returns false. | |||
|      * | |||
|      * @return always {@code false} | |||
|      */ | |||
|     @Override | |||
|     public boolean markSupported() { | |||
|         return false; | |||
|     } | |||
| } | |||
| @ -0,0 +1,150 @@ | |||
| package seaweed.hdfs; | |||
| 
 | |||
| // based on org.apache.hadoop.fs.azurebfs.services.AbfsInputStream | |||
| 
 | |||
| import org.apache.hadoop.fs.ByteBufferReadable; | |||
| import org.apache.hadoop.fs.FSInputStream; | |||
| import org.apache.hadoop.fs.FileSystem.Statistics; | |||
| import seaweedfs.client.FilerGrpcClient; | |||
| import seaweedfs.client.FilerProto; | |||
| import seaweedfs.client.SeaweedInputStream; | |||
| 
 | |||
| import java.io.EOFException; | |||
| import java.io.IOException; | |||
| import java.nio.ByteBuffer; | |||
| 
 | |||
| public class SeaweedHadoopInputStream extends FSInputStream implements ByteBufferReadable { | |||
| 
 | |||
|     private final SeaweedInputStream seaweedInputStream; | |||
|     private final Statistics statistics; | |||
| 
 | |||
|     public SeaweedHadoopInputStream( | |||
|             final FilerGrpcClient filerGrpcClient, | |||
|             final Statistics statistics, | |||
|             final String path, | |||
|             final FilerProto.Entry entry) throws IOException { | |||
|         this.seaweedInputStream = new SeaweedInputStream(filerGrpcClient, path, entry); | |||
|         this.statistics = statistics; | |||
|     } | |||
| 
 | |||
|     @Override | |||
|     public int read() throws IOException { | |||
|         return seaweedInputStream.read(); | |||
|     } | |||
| 
 | |||
|     @Override | |||
|     public int read(final byte[] b, final int off, final int len) throws IOException { | |||
|         return seaweedInputStream.read(b, off, len); | |||
|     } | |||
| 
 | |||
|     // implement ByteBufferReadable | |||
|     @Override | |||
|     public synchronized int read(ByteBuffer buf) throws IOException { | |||
|         int bytesRead = seaweedInputStream.read(buf); | |||
| 
 | |||
|         if (bytesRead > 0) { | |||
|             if (statistics != null) { | |||
|                 statistics.incrementBytesRead(bytesRead); | |||
|             } | |||
|         } | |||
| 
 | |||
|         return bytesRead; | |||
|     } | |||
| 
 | |||
|     /** | |||
|      * Seek to given position in stream. | |||
|      * | |||
|      * @param n position to seek to | |||
|      * @throws IOException  if there is an error | |||
|      * @throws EOFException if attempting to seek past end of file | |||
|      */ | |||
|     @Override | |||
|     public synchronized void seek(long n) throws IOException { | |||
|         seaweedInputStream.seek(n); | |||
|     } | |||
| 
 | |||
|     @Override | |||
|     public synchronized long skip(long n) throws IOException { | |||
|         return seaweedInputStream.skip(n); | |||
|     } | |||
| 
 | |||
|     /** | |||
|      * Return the size of the remaining available bytes | |||
|      * if the size is less than or equal to {@link Integer#MAX_VALUE}, | |||
|      * otherwise, return {@link Integer#MAX_VALUE}. | |||
|      * <p> | |||
|      * This is to match the behavior of DFSInputStream.available(), | |||
|      * which some clients may rely on (HBase write-ahead log reading in | |||
|      * particular). | |||
|      */ | |||
|     @Override | |||
|     public synchronized int available() throws IOException { | |||
|         return seaweedInputStream.available(); | |||
|     } | |||
| 
 | |||
|     /** | |||
|      * Returns the length of the file that this stream refers to. Note that the length returned is the length | |||
|      * as of the time the Stream was opened. Specifically, if there have been subsequent appends to the file, | |||
|      * they wont be reflected in the returned length. | |||
|      * | |||
|      * @return length of the file. | |||
|      * @throws IOException if the stream is closed | |||
|      */ | |||
|     public long length() throws IOException { | |||
|         return seaweedInputStream.length(); | |||
|     } | |||
| 
 | |||
|     /** | |||
|      * Return the current offset from the start of the file | |||
|      * | |||
|      * @throws IOException throws {@link IOException} if there is an error | |||
|      */ | |||
|     @Override | |||
|     public synchronized long getPos() throws IOException { | |||
|         return seaweedInputStream.getPos(); | |||
|     } | |||
| 
 | |||
|     /** | |||
|      * Seeks a different copy of the data.  Returns true if | |||
|      * found a new source, false otherwise. | |||
|      * | |||
|      * @throws IOException throws {@link IOException} if there is an error | |||
|      */ | |||
|     @Override | |||
|     public boolean seekToNewSource(long l) throws IOException { | |||
|         return false; | |||
|     } | |||
| 
 | |||
|     @Override | |||
|     public synchronized void close() throws IOException { | |||
|         seaweedInputStream.close(); | |||
|     } | |||
| 
 | |||
|     /** | |||
|      * Not supported by this stream. Throws {@link UnsupportedOperationException} | |||
|      * | |||
|      * @param readlimit ignored | |||
|      */ | |||
|     @Override | |||
|     public synchronized void mark(int readlimit) { | |||
|         throw new UnsupportedOperationException("mark()/reset() not supported on this stream"); | |||
|     } | |||
| 
 | |||
|     /** | |||
|      * Not supported by this stream. Throws {@link UnsupportedOperationException} | |||
|      */ | |||
|     @Override | |||
|     public synchronized void reset() throws IOException { | |||
|         throw new UnsupportedOperationException("mark()/reset() not supported on this stream"); | |||
|     } | |||
| 
 | |||
|     /** | |||
|      * gets whether mark and reset are supported by {@code ADLFileInputStream}. Always returns false. | |||
|      * | |||
|      * @return always {@code false} | |||
|      */ | |||
|     @Override | |||
|     public boolean markSupported() { | |||
|         return false; | |||
|     } | |||
| } | |||
| @ -1,259 +0,0 @@ | |||
| package seaweed.hdfs; | |||
| 
 | |||
| // based on org.apache.hadoop.fs.azurebfs.services.AbfsInputStream | |||
| 
 | |||
| import org.apache.hadoop.fs.ByteBufferReadable; | |||
| import org.apache.hadoop.fs.FSExceptionMessages; | |||
| import org.apache.hadoop.fs.FSInputStream; | |||
| import org.apache.hadoop.fs.FileSystem.Statistics; | |||
| import org.slf4j.Logger; | |||
| import org.slf4j.LoggerFactory; | |||
| import seaweedfs.client.FilerGrpcClient; | |||
| import seaweedfs.client.FilerProto; | |||
| import seaweedfs.client.SeaweedRead; | |||
| 
 | |||
| import java.io.EOFException; | |||
| import java.io.IOException; | |||
| import java.nio.ByteBuffer; | |||
| import java.util.List; | |||
| 
 | |||
| public class SeaweedInputStream extends FSInputStream implements ByteBufferReadable { | |||
| 
 | |||
|     private static final Logger LOG = LoggerFactory.getLogger(SeaweedInputStream.class); | |||
| 
 | |||
|     private final FilerGrpcClient filerGrpcClient; | |||
|     private final Statistics statistics; | |||
|     private final String path; | |||
|     private final FilerProto.Entry entry; | |||
|     private final List<SeaweedRead.VisibleInterval> visibleIntervalList; | |||
|     private final long contentLength; | |||
| 
 | |||
|     private long position = 0;  // cursor of the file | |||
| 
 | |||
|     private boolean closed = false; | |||
| 
 | |||
|     public SeaweedInputStream( | |||
|             final FilerGrpcClient filerGrpcClient, | |||
|             final Statistics statistics, | |||
|             final String path, | |||
|             final FilerProto.Entry entry) throws IOException { | |||
|         this.filerGrpcClient = filerGrpcClient; | |||
|         this.statistics = statistics; | |||
|         this.path = path; | |||
|         this.entry = entry; | |||
|         this.contentLength = SeaweedRead.fileSize(entry); | |||
| 
 | |||
|         this.visibleIntervalList = SeaweedRead.nonOverlappingVisibleIntervals(filerGrpcClient, entry.getChunksList()); | |||
| 
 | |||
|         LOG.debug("new path:{} entry:{} visibleIntervalList:{}", path, entry, visibleIntervalList); | |||
| 
 | |||
|     } | |||
| 
 | |||
|     public String getPath() { | |||
|         return path; | |||
|     } | |||
| 
 | |||
|     @Override | |||
|     public int read() throws IOException { | |||
|         byte[] b = new byte[1]; | |||
|         int numberOfBytesRead = read(b, 0, 1); | |||
|         if (numberOfBytesRead < 0) { | |||
|             return -1; | |||
|         } else { | |||
|             return (b[0] & 0xFF); | |||
|         } | |||
|     } | |||
| 
 | |||
|     @Override | |||
|     public int read(final byte[] b, final int off, final int len) throws IOException { | |||
| 
 | |||
|         if (b == null) { | |||
|             throw new IllegalArgumentException("null byte array passed in to read() method"); | |||
|         } | |||
|         if (off >= b.length) { | |||
|             throw new IllegalArgumentException("offset greater than length of array"); | |||
|         } | |||
|         if (len < 0) { | |||
|             throw new IllegalArgumentException("requested read length is less than zero"); | |||
|         } | |||
|         if (len > (b.length - off)) { | |||
|             throw new IllegalArgumentException("requested read length is more than will fit after requested offset in buffer"); | |||
|         } | |||
| 
 | |||
|         ByteBuffer buf = ByteBuffer.wrap(b, off, len); | |||
|         return read(buf); | |||
| 
 | |||
|     } | |||
| 
 | |||
|     // implement ByteBufferReadable | |||
|     @Override | |||
|     public synchronized int read(ByteBuffer buf) throws IOException { | |||
| 
 | |||
|         if (position < 0) { | |||
|             throw new IllegalArgumentException("attempting to read from negative offset"); | |||
|         } | |||
|         if (position >= contentLength) { | |||
|             return -1;  // Hadoop prefers -1 to EOFException | |||
|         } | |||
| 
 | |||
|         long bytesRead = 0; | |||
|         int len = buf.remaining(); | |||
|         int start = (int) this.position; | |||
|         if (start+len <= entry.getContent().size()) { | |||
|             entry.getContent().substring(start, start+len).copyTo(buf); | |||
|         } else { | |||
|             bytesRead = SeaweedRead.read(this.filerGrpcClient, this.visibleIntervalList, this.position, buf, SeaweedRead.fileSize(entry)); | |||
|         } | |||
| 
 | |||
|         if (bytesRead > Integer.MAX_VALUE) { | |||
|             throw new IOException("Unexpected Content-Length"); | |||
|         } | |||
| 
 | |||
|         if (bytesRead > 0) { | |||
|             this.position += bytesRead; | |||
|             if (statistics != null) { | |||
|                 statistics.incrementBytesRead(bytesRead); | |||
|             } | |||
|         } | |||
| 
 | |||
|         return (int) bytesRead; | |||
|     } | |||
| 
 | |||
|     /** | |||
|      * Seek to given position in stream. | |||
|      * | |||
|      * @param n position to seek to | |||
|      * @throws IOException  if there is an error | |||
|      * @throws EOFException if attempting to seek past end of file | |||
|      */ | |||
|     @Override | |||
|     public synchronized void seek(long n) throws IOException { | |||
|         if (closed) { | |||
|             throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); | |||
|         } | |||
|         if (n < 0) { | |||
|             throw new EOFException(FSExceptionMessages.NEGATIVE_SEEK); | |||
|         } | |||
|         if (n > contentLength) { | |||
|             throw new EOFException(FSExceptionMessages.CANNOT_SEEK_PAST_EOF); | |||
|         } | |||
| 
 | |||
|         this.position = n; | |||
| 
 | |||
|     } | |||
| 
 | |||
|     @Override | |||
|     public synchronized long skip(long n) throws IOException { | |||
|         if (closed) { | |||
|             throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); | |||
|         } | |||
|         if (this.position == contentLength) { | |||
|             if (n > 0) { | |||
|                 throw new EOFException(FSExceptionMessages.CANNOT_SEEK_PAST_EOF); | |||
|             } | |||
|         } | |||
|         long newPos = this.position + n; | |||
|         if (newPos < 0) { | |||
|             newPos = 0; | |||
|             n = newPos - this.position; | |||
|         } | |||
|         if (newPos > contentLength) { | |||
|             newPos = contentLength; | |||
|             n = newPos - this.position; | |||
|         } | |||
|         seek(newPos); | |||
|         return n; | |||
|     } | |||
| 
 | |||
|     /** | |||
|      * Return the size of the remaining available bytes | |||
|      * if the size is less than or equal to {@link Integer#MAX_VALUE}, | |||
|      * otherwise, return {@link Integer#MAX_VALUE}. | |||
|      * <p> | |||
|      * This is to match the behavior of DFSInputStream.available(), | |||
|      * which some clients may rely on (HBase write-ahead log reading in | |||
|      * particular). | |||
|      */ | |||
|     @Override | |||
|     public synchronized int available() throws IOException { | |||
|         if (closed) { | |||
|             throw new IOException( | |||
|                     FSExceptionMessages.STREAM_IS_CLOSED); | |||
|         } | |||
|         final long remaining = this.contentLength - this.getPos(); | |||
|         return remaining <= Integer.MAX_VALUE | |||
|                 ? (int) remaining : Integer.MAX_VALUE; | |||
|     } | |||
| 
 | |||
|     /** | |||
|      * Returns the length of the file that this stream refers to. Note that the length returned is the length | |||
|      * as of the time the Stream was opened. Specifically, if there have been subsequent appends to the file, | |||
|      * they wont be reflected in the returned length. | |||
|      * | |||
|      * @return length of the file. | |||
|      * @throws IOException if the stream is closed | |||
|      */ | |||
|     public long length() throws IOException { | |||
|         if (closed) { | |||
|             throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); | |||
|         } | |||
|         return contentLength; | |||
|     } | |||
| 
 | |||
|     /** | |||
|      * Return the current offset from the start of the file | |||
|      * | |||
|      * @throws IOException throws {@link IOException} if there is an error | |||
|      */ | |||
|     @Override | |||
|     public synchronized long getPos() throws IOException { | |||
|         if (closed) { | |||
|             throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); | |||
|         } | |||
|         return position; | |||
|     } | |||
| 
 | |||
|     /** | |||
|      * Seeks a different copy of the data.  Returns true if | |||
|      * found a new source, false otherwise. | |||
|      * | |||
|      * @throws IOException throws {@link IOException} if there is an error | |||
|      */ | |||
|     @Override | |||
|     public boolean seekToNewSource(long l) throws IOException { | |||
|         return false; | |||
|     } | |||
| 
 | |||
|     @Override | |||
|     public synchronized void close() throws IOException { | |||
|         closed = true; | |||
|     } | |||
| 
 | |||
|     /** | |||
|      * Not supported by this stream. Throws {@link UnsupportedOperationException} | |||
|      * | |||
|      * @param readlimit ignored | |||
|      */ | |||
|     @Override | |||
|     public synchronized void mark(int readlimit) { | |||
|         throw new UnsupportedOperationException("mark()/reset() not supported on this stream"); | |||
|     } | |||
| 
 | |||
|     /** | |||
|      * Not supported by this stream. Throws {@link UnsupportedOperationException} | |||
|      */ | |||
|     @Override | |||
|     public synchronized void reset() throws IOException { | |||
|         throw new UnsupportedOperationException("mark()/reset() not supported on this stream"); | |||
|     } | |||
| 
 | |||
|     /** | |||
|      * gets whether mark and reset are supported by {@code ADLFileInputStream}. Always returns false. | |||
|      * | |||
|      * @return always {@code false} | |||
|      */ | |||
|     @Override | |||
|     public boolean markSupported() { | |||
|         return false; | |||
|     } | |||
| } | |||
						Write
						Preview
					
					
					Loading…
					
					Cancel
						Save
					
		Reference in new issue