Browse Source

Merge pull request #12 from chrislusf/master

sync
pull/1480/head
hilimd 4 years ago
committed by GitHub
parent
commit
b0d6330cf4
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      k8s/seaweedfs/Chart.yaml
  2. 2
      k8s/seaweedfs/values.yaml
  3. 2
      other/java/client/pom.xml
  4. 2
      other/java/client/pom.xml.deploy
  5. 2
      other/java/client/pom_debug.xml
  6. 55
      other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java
  7. 2
      other/java/hdfs2/dependency-reduced-pom.xml
  8. 2
      other/java/hdfs2/pom.xml
  9. 4
      other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedFileSystemStore.java
  10. 4
      other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedInputStream.java
  11. 184
      other/java/hdfs3/dependency-reduced-pom.xml
  12. 2
      other/java/hdfs3/pom.xml
  13. 4
      other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedFileSystemStore.java
  14. 4
      other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedInputStream.java
  15. 58
      test/random_access/pom.xml
  16. 753
      test/random_access/src/main/java/seaweedfs/client/btree/BTreePersistentIndexedCache.java
  17. 59
      test/random_access/src/main/java/seaweedfs/client/btree/Block.java
  18. 51
      test/random_access/src/main/java/seaweedfs/client/btree/BlockPayload.java
  19. 75
      test/random_access/src/main/java/seaweedfs/client/btree/BlockPointer.java
  20. 68
      test/random_access/src/main/java/seaweedfs/client/btree/BlockStore.java
  21. 30
      test/random_access/src/main/java/seaweedfs/client/btree/BufferCaster.java
  22. 74
      test/random_access/src/main/java/seaweedfs/client/btree/ByteInput.java
  23. 74
      test/random_access/src/main/java/seaweedfs/client/btree/ByteOutput.java
  24. 129
      test/random_access/src/main/java/seaweedfs/client/btree/CachingBlockStore.java
  25. 22
      test/random_access/src/main/java/seaweedfs/client/btree/CorruptedCacheException.java
  26. 274
      test/random_access/src/main/java/seaweedfs/client/btree/FileBackedBlockStore.java
  27. 283
      test/random_access/src/main/java/seaweedfs/client/btree/FreeListBlockStore.java
  28. 75
      test/random_access/src/main/java/seaweedfs/client/btree/KeyHasher.java
  29. 54
      test/random_access/src/main/java/seaweedfs/client/btree/RandomAccessFileInputStream.java
  30. 48
      test/random_access/src/main/java/seaweedfs/client/btree/RandomAccessFileOutputStream.java
  31. 87
      test/random_access/src/main/java/seaweedfs/client/btree/StateCheckBlockStore.java
  32. 526
      test/random_access/src/main/java/seaweedfs/client/btree/StreamByteBuffer.java
  33. 88
      test/random_access/src/main/java/seaweedfs/client/btree/UncheckedException.java
  34. 36
      test/random_access/src/main/java/seaweedfs/client/btree/UncheckedIOException.java
  35. 133
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/AbstractDecoder.java
  36. 101
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/AbstractEncoder.java
  37. 40
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/AbstractSerializer.java
  38. 79
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/Cast.java
  39. 43
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/ClassLoaderObjectInputStream.java
  40. 140
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/Decoder.java
  41. 73
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/DefaultSerializer.java
  42. 110
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/Encoder.java
  43. 31
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/FlushableEncoder.java
  44. 28
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/ObjectReader.java
  45. 21
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/ObjectWriter.java
  46. 33
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/Serializer.java
  47. 33
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/StatefulSerializer.java
  48. 210
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/kryo/KryoBackedDecoder.java
  49. 134
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/kryo/KryoBackedEncoder.java
  50. 188
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/kryo/StringDeduplicatingKryoBackedDecoder.java
  51. 128
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/kryo/StringDeduplicatingKryoBackedEncoder.java
  52. 51
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/kryo/TypeSafeSerializer.java
  53. 143
      test/random_access/src/test/java/seaewedfs/mmap/MmapFileTest.java
  54. 476
      test/random_access/src/test/java/seaweedfs/client/btree/BTreePersistentIndexedCacheTest.java
  55. 21
      test/s3/basic/basic_test.go
  56. 6
      unmaintained/diff_volume_servers/diff_volume_servers.go
  57. 4
      unmaintained/fix_dat/fix_dat.go
  58. 83
      unmaintained/see_dat/see_dat_gzip.go
  59. 2
      unmaintained/see_idx/see_idx.go
  60. 19
      weed/Makefile
  61. 6
      weed/command/export.go
  62. 2
      weed/command/fix.go
  63. 2
      weed/command/server.go
  64. 2
      weed/command/volume.go
  65. 10
      weed/filer2/entry.go
  66. 2
      weed/filer2/entry_codec.go
  67. 2
      weed/filer2/filechunk_manifest.go
  68. 41
      weed/filer2/filechunks.go
  69. 135
      weed/filer2/filechunks_test.go
  70. 61
      weed/filer2/filer.go
  71. 6
      weed/filer2/filer_delete_entry.go
  72. 8
      weed/filer2/filer_deletion.go
  73. 2
      weed/filer2/leveldb/leveldb_store_test.go
  74. 2
      weed/filer2/leveldb2/leveldb2_store_test.go
  75. 96
      weed/filer2/reader_at.go
  76. 156
      weed/filer2/reader_at_test.go
  77. 6
      weed/filer2/stream.go
  78. 47
      weed/filesys/dir.go
  79. 4
      weed/filesys/dir_link.go
  80. 12
      weed/filesys/dir_rename.go
  81. 25
      weed/filesys/dirty_page.go
  82. 15
      weed/filesys/dirty_page_interval.go
  83. 51
      weed/filesys/file.go
  84. 76
      weed/filesys/filehandle.go
  85. 9
      weed/filesys/fscache.go
  86. 21
      weed/filesys/fscache_test.go
  87. 5
      weed/filesys/meta_cache/meta_cache.go
  88. 2
      weed/filesys/meta_cache/meta_cache_init.go
  89. 16
      weed/filesys/wfs.go
  90. 2
      weed/filesys/wfs_deletion.go
  91. 18
      weed/operation/upload_content.go
  92. 9
      weed/pb/filer_pb/filer_client.go
  93. 8
      weed/pb/filer_pb/filer_pb_helper.go
  94. 2
      weed/pb/filer_pb/filer_pb_helper_test.go
  95. 17
      weed/pb/volume_server.proto
  96. 2227
      weed/pb/volume_server_pb/volume_server.pb.go
  97. 4
      weed/replication/sink/azuresink/azure_sink.go
  98. 4
      weed/replication/sink/b2sink/b2_sink.go
  99. 4
      weed/replication/sink/gcssink/gcs_sink.go
  100. 2
      weed/replication/sink/s3sink/s3_sink.go

2
k8s/seaweedfs/Chart.yaml

@ -1,4 +1,4 @@
apiVersion: v1 apiVersion: v1
description: SeaweedFS description: SeaweedFS
name: seaweedfs name: seaweedfs
version: 1.88
version: 1.90

2
k8s/seaweedfs/values.yaml

@ -4,7 +4,7 @@ global:
registry: "" registry: ""
repository: "" repository: ""
imageName: chrislusf/seaweedfs imageName: chrislusf/seaweedfs
imageTag: "1.88"
imageTag: "1.90"
imagePullPolicy: IfNotPresent imagePullPolicy: IfNotPresent
imagePullSecrets: imagepullsecret imagePullSecrets: imagepullsecret
restartPolicy: Always restartPolicy: Always

2
other/java/client/pom.xml

@ -5,7 +5,7 @@
<groupId>com.github.chrislusf</groupId> <groupId>com.github.chrislusf</groupId>
<artifactId>seaweedfs-client</artifactId> <artifactId>seaweedfs-client</artifactId>
<version>1.4.5</version>
<version>1.4.6</version>
<parent> <parent>
<groupId>org.sonatype.oss</groupId> <groupId>org.sonatype.oss</groupId>

2
other/java/client/pom.xml.deploy

@ -5,7 +5,7 @@
<groupId>com.github.chrislusf</groupId> <groupId>com.github.chrislusf</groupId>
<artifactId>seaweedfs-client</artifactId> <artifactId>seaweedfs-client</artifactId>
<version>1.4.5</version>
<version>1.4.6</version>
<parent> <parent>
<groupId>org.sonatype.oss</groupId> <groupId>org.sonatype.oss</groupId>

2
other/java/client/pom_debug.xml

@ -5,7 +5,7 @@
<groupId>com.github.chrislusf</groupId> <groupId>com.github.chrislusf</groupId>
<artifactId>seaweedfs-client</artifactId> <artifactId>seaweedfs-client</artifactId>
<version>1.4.5</version>
<version>1.4.6</version>
<parent> <parent>
<groupId>org.sonatype.oss</groupId> <groupId>org.sonatype.oss</groupId>

55
other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java

@ -23,7 +23,7 @@ public class SeaweedRead {
// returns bytesRead // returns bytesRead
public static long read(FilerGrpcClient filerGrpcClient, List<VisibleInterval> visibleIntervals, public static long read(FilerGrpcClient filerGrpcClient, List<VisibleInterval> visibleIntervals,
final long position, final byte[] buffer, final int bufferOffset, final long position, final byte[] buffer, final int bufferOffset,
final int bufferLength) throws IOException {
final int bufferLength, final long fileSize) throws IOException {
List<ChunkView> chunkViews = viewFromVisibles(visibleIntervals, position, bufferLength); List<ChunkView> chunkViews = viewFromVisibles(visibleIntervals, position, bufferLength);
@ -42,6 +42,14 @@ public class SeaweedRead {
long readCount = 0; long readCount = 0;
int startOffset = bufferOffset; int startOffset = bufferOffset;
for (ChunkView chunkView : chunkViews) { for (ChunkView chunkView : chunkViews) {
if (startOffset < chunkView.logicOffset) {
long gap = chunkView.logicOffset - startOffset;
LOG.debug("zero [{},{})", startOffset, startOffset + gap);
readCount += gap;
startOffset += gap;
}
FilerProto.Locations locations = vid2Locations.get(parseVolumeId(chunkView.fileId)); FilerProto.Locations locations = vid2Locations.get(parseVolumeId(chunkView.fileId));
if (locations == null || locations.getLocationsCount() == 0) { if (locations == null || locations.getLocationsCount() == 0) {
LOG.error("failed to locate {}", chunkView.fileId); LOG.error("failed to locate {}", chunkView.fileId);
@ -51,11 +59,22 @@ public class SeaweedRead {
int len = readChunkView(position, buffer, startOffset, chunkView, locations); int len = readChunkView(position, buffer, startOffset, chunkView, locations);
LOG.debug("read [{},{}) {} size {}", startOffset, startOffset + len, chunkView.fileId, chunkView.size);
readCount += len; readCount += len;
startOffset += len; startOffset += len;
} }
long limit = Math.min(bufferLength, fileSize);
if (startOffset < limit) {
long gap = limit - startOffset;
LOG.debug("zero2 [{},{})", startOffset, startOffset + gap);
readCount += gap;
startOffset += gap;
}
return readCount; return readCount;
} }
@ -71,7 +90,7 @@ public class SeaweedRead {
int len = (int) chunkView.size; int len = (int) chunkView.size;
LOG.debug("readChunkView fid:{} chunkData.length:{} chunkView.offset:{} buffer.length:{} startOffset:{} len:{}", LOG.debug("readChunkView fid:{} chunkData.length:{} chunkView.offset:{} buffer.length:{} startOffset:{} len:{}",
chunkView.fileId, chunkData.length, chunkView.offset, buffer.length, startOffset, len); chunkView.fileId, chunkData.length, chunkView.offset, buffer.length, startOffset, len);
System.arraycopy(chunkData, (int) chunkView.offset, buffer, startOffset, len);
System.arraycopy(chunkData, startOffset - (int) (chunkView.logicOffset - chunkView.offset), buffer, startOffset, len);
return len; return len;
} }
@ -93,7 +112,7 @@ public class SeaweedRead {
Header contentEncodingHeader = entity.getContentEncoding(); Header contentEncodingHeader = entity.getContentEncoding();
if (contentEncodingHeader != null) { if (contentEncodingHeader != null) {
HeaderElement[] encodings =contentEncodingHeader.getElements();
HeaderElement[] encodings = contentEncodingHeader.getElements();
for (int i = 0; i < encodings.length; i++) { for (int i = 0; i < encodings.length; i++) {
if (encodings[i].getName().equalsIgnoreCase("gzip")) { if (encodings[i].getName().equalsIgnoreCase("gzip")) {
entity = new GzipDecompressingEntity(entity); entity = new GzipDecompressingEntity(entity);
@ -134,18 +153,19 @@ public class SeaweedRead {
long stop = offset + size; long stop = offset + size;
for (VisibleInterval chunk : visibleIntervals) { for (VisibleInterval chunk : visibleIntervals) {
if (chunk.start <= offset && offset < chunk.stop && offset < stop) {
long chunkStart = Math.max(offset, chunk.start);
long chunkStop = Math.min(stop, chunk.stop);
if (chunkStart < chunkStop) {
boolean isFullChunk = chunk.isFullChunk && chunk.start == offset && chunk.stop <= stop; boolean isFullChunk = chunk.isFullChunk && chunk.start == offset && chunk.stop <= stop;
views.add(new ChunkView( views.add(new ChunkView(
chunk.fileId, chunk.fileId,
offset - chunk.start,
Math.min(chunk.stop, stop) - offset,
offset,
chunkStart - chunk.start + chunk.chunkOffset,
chunkStop - chunkStart,
chunkStart,
isFullChunk, isFullChunk,
chunk.cipherKey, chunk.cipherKey,
chunk.isCompressed chunk.isCompressed
)); ));
offset = Math.min(chunk.stop, stop);
} }
} }
return views; return views;
@ -160,7 +180,13 @@ public class SeaweedRead {
Arrays.sort(chunks, new Comparator<FilerProto.FileChunk>() { Arrays.sort(chunks, new Comparator<FilerProto.FileChunk>() {
@Override @Override
public int compare(FilerProto.FileChunk a, FilerProto.FileChunk b) { public int compare(FilerProto.FileChunk a, FilerProto.FileChunk b) {
return (int) (a.getMtime() - b.getMtime());
// if just a.getMtime() - b.getMtime(), it will overflow!
if (a.getMtime() < b.getMtime()) {
return -1;
} else if (a.getMtime() > b.getMtime()) {
return 1;
}
return 0;
} }
}); });
@ -181,6 +207,7 @@ public class SeaweedRead {
chunk.getOffset() + chunk.getSize(), chunk.getOffset() + chunk.getSize(),
chunk.getFileId(), chunk.getFileId(),
chunk.getMtime(), chunk.getMtime(),
0,
true, true,
chunk.getCipherKey().toByteArray(), chunk.getCipherKey().toByteArray(),
chunk.getIsCompressed() chunk.getIsCompressed()
@ -203,6 +230,7 @@ public class SeaweedRead {
chunk.getOffset(), chunk.getOffset(),
v.fileId, v.fileId,
v.modifiedTime, v.modifiedTime,
v.chunkOffset,
false, false,
v.cipherKey, v.cipherKey,
v.isCompressed v.isCompressed
@ -215,6 +243,7 @@ public class SeaweedRead {
v.stop, v.stop,
v.fileId, v.fileId,
v.modifiedTime, v.modifiedTime,
v.chunkOffset + (chunkStop - v.start),
false, false,
v.cipherKey, v.cipherKey,
v.isCompressed v.isCompressed
@ -247,6 +276,10 @@ public class SeaweedRead {
return fileId; return fileId;
} }
public static long fileSize(FilerProto.Entry entry) {
return Math.max(totalSize(entry.getChunksList()), entry.getAttributes().getFileSize());
}
public static long totalSize(List<FilerProto.FileChunk> chunksList) { public static long totalSize(List<FilerProto.FileChunk> chunksList) {
long size = 0; long size = 0;
for (FilerProto.FileChunk chunk : chunksList) { for (FilerProto.FileChunk chunk : chunksList) {
@ -263,15 +296,17 @@ public class SeaweedRead {
public final long stop; public final long stop;
public final long modifiedTime; public final long modifiedTime;
public final String fileId; public final String fileId;
public final long chunkOffset;
public final boolean isFullChunk; public final boolean isFullChunk;
public final byte[] cipherKey; public final byte[] cipherKey;
public final boolean isCompressed; public final boolean isCompressed;
public VisibleInterval(long start, long stop, String fileId, long modifiedTime, boolean isFullChunk, byte[] cipherKey, boolean isCompressed) {
public VisibleInterval(long start, long stop, String fileId, long modifiedTime, long chunkOffset, boolean isFullChunk, byte[] cipherKey, boolean isCompressed) {
this.start = start; this.start = start;
this.stop = stop; this.stop = stop;
this.modifiedTime = modifiedTime; this.modifiedTime = modifiedTime;
this.fileId = fileId; this.fileId = fileId;
this.chunkOffset = chunkOffset;
this.isFullChunk = isFullChunk; this.isFullChunk = isFullChunk;
this.cipherKey = cipherKey; this.cipherKey = cipherKey;
this.isCompressed = isCompressed; this.isCompressed = isCompressed;

2
other/java/hdfs2/dependency-reduced-pom.xml

@ -301,7 +301,7 @@
</snapshotRepository> </snapshotRepository>
</distributionManagement> </distributionManagement>
<properties> <properties>
<seaweedfs.client.version>1.4.5</seaweedfs.client.version>
<seaweedfs.client.version>1.4.6</seaweedfs.client.version>
<hadoop.version>2.9.2</hadoop.version> <hadoop.version>2.9.2</hadoop.version>
</properties> </properties>
</project> </project>

2
other/java/hdfs2/pom.xml

@ -5,7 +5,7 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<properties> <properties>
<seaweedfs.client.version>1.4.5</seaweedfs.client.version>
<seaweedfs.client.version>1.4.6</seaweedfs.client.version>
<hadoop.version>2.9.2</hadoop.version> <hadoop.version>2.9.2</hadoop.version>
</properties> </properties>

4
other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedFileSystemStore.java

@ -124,7 +124,7 @@ public class SeaweedFileSystemStore {
private FileStatus doGetFileStatus(Path path, FilerProto.Entry entry) { private FileStatus doGetFileStatus(Path path, FilerProto.Entry entry) {
FilerProto.FuseAttributes attributes = entry.getAttributes(); FilerProto.FuseAttributes attributes = entry.getAttributes();
long length = SeaweedRead.totalSize(entry.getChunksList());
long length = SeaweedRead.fileSize(entry);
boolean isDir = entry.getIsDirectory(); boolean isDir = entry.getIsDirectory();
int block_replication = 1; int block_replication = 1;
int blocksize = 512; int blocksize = 512;
@ -185,7 +185,7 @@ public class SeaweedFileSystemStore {
entry.mergeFrom(existingEntry); entry.mergeFrom(existingEntry);
entry.getAttributesBuilder().setMtime(now); entry.getAttributesBuilder().setMtime(now);
LOG.debug("createFile merged entry path:{} entry:{} from:{}", path, entry, existingEntry); LOG.debug("createFile merged entry path:{} entry:{} from:{}", path, entry, existingEntry);
writePosition = SeaweedRead.totalSize(existingEntry.getChunksList());
writePosition = SeaweedRead.fileSize(existingEntry);
replication = existingEntry.getAttributes().getReplication(); replication = existingEntry.getAttributes().getReplication();
} }
} }

4
other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedInputStream.java

@ -41,7 +41,7 @@ public class SeaweedInputStream extends FSInputStream {
this.statistics = statistics; this.statistics = statistics;
this.path = path; this.path = path;
this.entry = entry; this.entry = entry;
this.contentLength = SeaweedRead.totalSize(entry.getChunksList());
this.contentLength = SeaweedRead.fileSize(entry);
this.bufferSize = bufferSize; this.bufferSize = bufferSize;
this.visibleIntervalList = SeaweedRead.nonOverlappingVisibleIntervals(filerGrpcClient, entry.getChunksList()); this.visibleIntervalList = SeaweedRead.nonOverlappingVisibleIntervals(filerGrpcClient, entry.getChunksList());
@ -87,7 +87,7 @@ public class SeaweedInputStream extends FSInputStream {
throw new IllegalArgumentException("requested read length is more than will fit after requested offset in buffer"); throw new IllegalArgumentException("requested read length is more than will fit after requested offset in buffer");
} }
long bytesRead = SeaweedRead.read(this.filerGrpcClient, this.visibleIntervalList, this.position, b, off, len);
long bytesRead = SeaweedRead.read(this.filerGrpcClient, this.visibleIntervalList, this.position, b, off, len, SeaweedRead.fileSize(entry));
if (bytesRead > Integer.MAX_VALUE) { if (bytesRead > Integer.MAX_VALUE) {
throw new IOException("Unexpected Content-Length"); throw new IOException("Unexpected Content-Length");
} }

184
other/java/hdfs3/dependency-reduced-pom.xml

@ -120,6 +120,188 @@
</plugin> </plugin>
</plugins> </plugins>
</build> </build>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.1.1</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<artifactId>hadoop-hdfs-client</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-yarn-api</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-yarn-client</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-mapreduce-client-jobclient</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-annotations</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.1.1</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<artifactId>commons-cli</artifactId>
<groupId>commons-cli</groupId>
</exclusion>
<exclusion>
<artifactId>commons-math3</artifactId>
<groupId>org.apache.commons</groupId>
</exclusion>
<exclusion>
<artifactId>commons-io</artifactId>
<groupId>commons-io</groupId>
</exclusion>
<exclusion>
<artifactId>commons-net</artifactId>
<groupId>commons-net</groupId>
</exclusion>
<exclusion>
<artifactId>commons-collections</artifactId>
<groupId>commons-collections</groupId>
</exclusion>
<exclusion>
<artifactId>javax.servlet-api</artifactId>
<groupId>javax.servlet</groupId>
</exclusion>
<exclusion>
<artifactId>jetty-server</artifactId>
<groupId>org.eclipse.jetty</groupId>
</exclusion>
<exclusion>
<artifactId>jetty-util</artifactId>
<groupId>org.eclipse.jetty</groupId>
</exclusion>
<exclusion>
<artifactId>jetty-servlet</artifactId>
<groupId>org.eclipse.jetty</groupId>
</exclusion>
<exclusion>
<artifactId>jetty-webapp</artifactId>
<groupId>org.eclipse.jetty</groupId>
</exclusion>
<exclusion>
<artifactId>jsp-api</artifactId>
<groupId>javax.servlet.jsp</groupId>
</exclusion>
<exclusion>
<artifactId>jersey-core</artifactId>
<groupId>com.sun.jersey</groupId>
</exclusion>
<exclusion>
<artifactId>jersey-servlet</artifactId>
<groupId>com.sun.jersey</groupId>
</exclusion>
<exclusion>
<artifactId>jersey-json</artifactId>
<groupId>com.sun.jersey</groupId>
</exclusion>
<exclusion>
<artifactId>jersey-server</artifactId>
<groupId>com.sun.jersey</groupId>
</exclusion>
<exclusion>
<artifactId>log4j</artifactId>
<groupId>log4j</groupId>
</exclusion>
<exclusion>
<artifactId>commons-lang</artifactId>
<groupId>commons-lang</groupId>
</exclusion>
<exclusion>
<artifactId>commons-beanutils</artifactId>
<groupId>commons-beanutils</groupId>
</exclusion>
<exclusion>
<artifactId>commons-configuration2</artifactId>
<groupId>org.apache.commons</groupId>
</exclusion>
<exclusion>
<artifactId>commons-lang3</artifactId>
<groupId>org.apache.commons</groupId>
</exclusion>
<exclusion>
<artifactId>slf4j-log4j12</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
<exclusion>
<artifactId>avro</artifactId>
<groupId>org.apache.avro</groupId>
</exclusion>
<exclusion>
<artifactId>re2j</artifactId>
<groupId>com.google.re2j</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-auth</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>jsch</artifactId>
<groupId>com.jcraft</groupId>
</exclusion>
<exclusion>
<artifactId>curator-client</artifactId>
<groupId>org.apache.curator</groupId>
</exclusion>
<exclusion>
<artifactId>curator-recipes</artifactId>
<groupId>org.apache.curator</groupId>
</exclusion>
<exclusion>
<artifactId>htrace-core4</artifactId>
<groupId>org.apache.htrace</groupId>
</exclusion>
<exclusion>
<artifactId>zookeeper</artifactId>
<groupId>org.apache.zookeeper</groupId>
</exclusion>
<exclusion>
<artifactId>commons-compress</artifactId>
<groupId>org.apache.commons</groupId>
</exclusion>
<exclusion>
<artifactId>kerb-simplekdc</artifactId>
<groupId>org.apache.kerby</groupId>
</exclusion>
<exclusion>
<artifactId>jackson-databind</artifactId>
<groupId>com.fasterxml.jackson.core</groupId>
</exclusion>
<exclusion>
<artifactId>stax2-api</artifactId>
<groupId>org.codehaus.woodstox</groupId>
</exclusion>
<exclusion>
<artifactId>woodstox-core</artifactId>
<groupId>com.fasterxml.woodstox</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-annotations</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
<distributionManagement> <distributionManagement>
<snapshotRepository> <snapshotRepository>
<id>ossrh</id> <id>ossrh</id>
@ -127,7 +309,7 @@
</snapshotRepository> </snapshotRepository>
</distributionManagement> </distributionManagement>
<properties> <properties>
<seaweedfs.client.version>1.4.5</seaweedfs.client.version>
<seaweedfs.client.version>1.4.6</seaweedfs.client.version>
<hadoop.version>3.1.1</hadoop.version> <hadoop.version>3.1.1</hadoop.version>
</properties> </properties>
</project> </project>

2
other/java/hdfs3/pom.xml

@ -5,7 +5,7 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<properties> <properties>
<seaweedfs.client.version>1.4.5</seaweedfs.client.version>
<seaweedfs.client.version>1.4.6</seaweedfs.client.version>
<hadoop.version>3.1.1</hadoop.version> <hadoop.version>3.1.1</hadoop.version>
</properties> </properties>

4
other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedFileSystemStore.java

@ -124,7 +124,7 @@ public class SeaweedFileSystemStore {
private FileStatus doGetFileStatus(Path path, FilerProto.Entry entry) { private FileStatus doGetFileStatus(Path path, FilerProto.Entry entry) {
FilerProto.FuseAttributes attributes = entry.getAttributes(); FilerProto.FuseAttributes attributes = entry.getAttributes();
long length = SeaweedRead.totalSize(entry.getChunksList());
long length = SeaweedRead.fileSize(entry);
boolean isDir = entry.getIsDirectory(); boolean isDir = entry.getIsDirectory();
int block_replication = 1; int block_replication = 1;
int blocksize = 512; int blocksize = 512;
@ -185,7 +185,7 @@ public class SeaweedFileSystemStore {
entry.mergeFrom(existingEntry); entry.mergeFrom(existingEntry);
entry.getAttributesBuilder().setMtime(now); entry.getAttributesBuilder().setMtime(now);
LOG.debug("createFile merged entry path:{} entry:{} from:{}", path, entry, existingEntry); LOG.debug("createFile merged entry path:{} entry:{} from:{}", path, entry, existingEntry);
writePosition = SeaweedRead.totalSize(existingEntry.getChunksList());
writePosition = SeaweedRead.fileSize(existingEntry);
replication = existingEntry.getAttributes().getReplication(); replication = existingEntry.getAttributes().getReplication();
} }
} }

4
other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedInputStream.java

@ -41,7 +41,7 @@ public class SeaweedInputStream extends FSInputStream {
this.statistics = statistics; this.statistics = statistics;
this.path = path; this.path = path;
this.entry = entry; this.entry = entry;
this.contentLength = SeaweedRead.totalSize(entry.getChunksList());
this.contentLength = SeaweedRead.fileSize(entry);
this.bufferSize = bufferSize; this.bufferSize = bufferSize;
this.visibleIntervalList = SeaweedRead.nonOverlappingVisibleIntervals(filerGrpcClient, entry.getChunksList()); this.visibleIntervalList = SeaweedRead.nonOverlappingVisibleIntervals(filerGrpcClient, entry.getChunksList());
@ -87,7 +87,7 @@ public class SeaweedInputStream extends FSInputStream {
throw new IllegalArgumentException("requested read length is more than will fit after requested offset in buffer"); throw new IllegalArgumentException("requested read length is more than will fit after requested offset in buffer");
} }
long bytesRead = SeaweedRead.read(this.filerGrpcClient, this.visibleIntervalList, this.position, b, off, len);
long bytesRead = SeaweedRead.read(this.filerGrpcClient, this.visibleIntervalList, this.position, b, off, len, SeaweedRead.fileSize(entry));
if (bytesRead > Integer.MAX_VALUE) { if (bytesRead > Integer.MAX_VALUE) {
throw new IOException("Unexpected Content-Length"); throw new IOException("Unexpected Content-Length");
} }

58
test/random_access/pom.xml

@ -0,0 +1,58 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.seaweedfs.test</groupId>
<artifactId>random_access</artifactId>
<packaging>jar</packaging>
<version>1.0-SNAPSHOT</version>
<properties>
<guava.version>28.0-jre</guava.version>
</properties>
<dependencies>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>${guava.version}</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.25</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.esotericsoftware.kryo</groupId>
<artifactId>kryo</artifactId>
<version>2.24.0</version>
</dependency>
</dependencies>
<build>
<extensions>
<extension>
<groupId>kr.motd.maven</groupId>
<artifactId>os-maven-plugin</artifactId>
<version>1.6.2</version>
</extension>
</extensions>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>8</source>
<target>8</target>
</configuration>
</plugin>
</plugins>
</build>
</project>

753
test/random_access/src/main/java/seaweedfs/client/btree/BTreePersistentIndexedCache.java

@ -0,0 +1,753 @@
/*
* Copyright 2010 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import com.google.common.collect.ImmutableSet;
import seaweedfs.client.btree.serialize.Serializer;
import seaweedfs.client.btree.serialize.kryo.KryoBackedDecoder;
import seaweedfs.client.btree.serialize.kryo.KryoBackedEncoder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
// todo - stream serialised value to file
// todo - handle hash collisions (properly, this time)
// todo - don't store null links to child blocks in leaf index blocks
// todo - align block boundaries
// todo - thread safety control
// todo - merge small values into a single data block
// todo - discard when file corrupt
// todo - include data directly in index entry when serializer can guarantee small fixed sized data
// todo - free list leaks disk space
// todo - merge adjacent free blocks
// todo - use more efficient lookup for free block with nearest size
@SuppressWarnings("unchecked")
public class BTreePersistentIndexedCache<K, V> {
private static final Logger LOGGER = LoggerFactory.getLogger(BTreePersistentIndexedCache.class);
private final File cacheFile;
private final KeyHasher<K> keyHasher;
private final Serializer<V> serializer;
private final short maxChildIndexEntries;
private final int minIndexChildNodes;
private final StateCheckBlockStore store;
private HeaderBlock header;
public BTreePersistentIndexedCache(File cacheFile, Serializer<K> keySerializer, Serializer<V> valueSerializer) {
this(cacheFile, keySerializer, valueSerializer, (short) 512, 512);
}
public BTreePersistentIndexedCache(File cacheFile, Serializer<K> keySerializer, Serializer<V> valueSerializer,
short maxChildIndexEntries, int maxFreeListEntries) {
this.cacheFile = cacheFile;
this.keyHasher = new KeyHasher<K>(keySerializer);
this.serializer = valueSerializer;
this.maxChildIndexEntries = maxChildIndexEntries;
this.minIndexChildNodes = maxChildIndexEntries / 2;
BlockStore cachingStore = new CachingBlockStore(new FileBackedBlockStore(cacheFile), ImmutableSet.of(IndexBlock.class, FreeListBlockStore.FreeListBlock.class));
this.store = new StateCheckBlockStore(new FreeListBlockStore(cachingStore, maxFreeListEntries));
try {
open();
} catch (Exception e) {
throw new UncheckedIOException(String.format("Could not open %s.", this), e);
}
}
@Override
public String toString() {
return "cache " + cacheFile.getName() + " (" + cacheFile + ")";
}
private void open() throws Exception {
LOGGER.debug("Opening {}", this);
try {
doOpen();
} catch (CorruptedCacheException e) {
rebuild();
}
}
private void doOpen() throws Exception {
BlockStore.Factory factory = new BlockStore.Factory() {
@Override
public Object create(Class<? extends BlockPayload> type) {
if (type == HeaderBlock.class) {
return new HeaderBlock();
}
if (type == IndexBlock.class) {
return new IndexBlock();
}
if (type == DataBlock.class) {
return new DataBlock();
}
throw new UnsupportedOperationException();
}
};
Runnable initAction = new Runnable() {
@Override
public void run() {
header = new HeaderBlock();
store.write(header);
header.index.newRoot();
store.flush();
}
};
store.open(initAction, factory);
header = store.readFirst(HeaderBlock.class);
}
public V get(K key) {
try {
try {
DataBlock block = header.getRoot().get(key);
if (block != null) {
return block.getValue();
}
return null;
} catch (CorruptedCacheException e) {
rebuild();
return null;
}
} catch (Exception e) {
throw new UncheckedIOException(String.format("Could not read entry '%s' from %s.", key, this), e);
}
}
public void put(K key, V value) {
try {
long hashCode = keyHasher.getHashCode(key);
Lookup lookup = header.getRoot().find(hashCode);
DataBlock newBlock = null;
if (lookup.entry != null) {
DataBlock block = store.read(lookup.entry.dataBlock, DataBlock.class);
DataBlockUpdateResult updateResult = block.useNewValue(value);
if (updateResult.isFailed()) {
store.remove(block);
newBlock = new DataBlock(value, updateResult.getSerializedValue());
}
} else {
newBlock = new DataBlock(value);
}
if (newBlock != null) {
store.write(newBlock);
lookup.indexBlock.put(hashCode, newBlock.getPos());
}
store.flush();
} catch (Exception e) {
throw new UncheckedIOException(String.format("Could not add entry '%s' to %s.", key, this), e);
}
}
public void remove(K key) {
try {
Lookup lookup = header.getRoot().find(key);
if (lookup.entry == null) {
return;
}
lookup.indexBlock.remove(lookup.entry);
DataBlock block = store.read(lookup.entry.dataBlock, DataBlock.class);
store.remove(block);
store.flush();
} catch (Exception e) {
throw new UncheckedIOException(String.format("Could not remove entry '%s' from %s.", key, this), e);
}
}
private IndexBlock load(BlockPointer pos, IndexRoot root, IndexBlock parent, int index) {
IndexBlock block = store.read(pos, IndexBlock.class);
block.root = root;
block.parent = parent;
block.parentEntryIndex = index;
return block;
}
public void reset() {
close();
try {
open();
} catch (Exception e) {
throw new UncheckedIOException(e);
}
}
public void close() {
LOGGER.debug("Closing {}", this);
try {
store.close();
} catch (Exception e) {
throw new UncheckedIOException(e);
}
}
public boolean isOpen() {
return store.isOpen();
}
private void rebuild() {
LOGGER.warn("{} is corrupt. Discarding.", this);
try {
clear();
} catch (Exception e) {
LOGGER.warn("{} couldn't be rebuilt. Closing.", this);
close();
}
}
public void verify() {
try {
doVerify();
} catch (Exception e) {
throw new UncheckedIOException(String.format("Some problems were found when checking the integrity of %s.",
this), e);
}
}
private void doVerify() throws Exception {
List<BlockPayload> blocks = new ArrayList<BlockPayload>();
HeaderBlock header = store.readFirst(HeaderBlock.class);
blocks.add(header);
verifyTree(header.getRoot(), "", blocks, Long.MAX_VALUE, true);
Collections.sort(blocks, new Comparator<BlockPayload>() {
@Override
public int compare(BlockPayload block, BlockPayload block1) {
return block.getPos().compareTo(block1.getPos());
}
});
for (int i = 0; i < blocks.size() - 1; i++) {
Block b1 = blocks.get(i).getBlock();
Block b2 = blocks.get(i + 1).getBlock();
if (b1.getPos().getPos() + b1.getSize() > b2.getPos().getPos()) {
throw new IOException(String.format("%s overlaps with %s", b1, b2));
}
}
}
private void verifyTree(IndexBlock current, String prefix, Collection<BlockPayload> blocks, long maxValue,
boolean loadData) throws Exception {
blocks.add(current);
if (!prefix.equals("") && current.entries.size() < maxChildIndexEntries / 2) {
throw new IOException(String.format("Too few entries found in %s", current));
}
if (current.entries.size() > maxChildIndexEntries) {
throw new IOException(String.format("Too many entries found in %s", current));
}
boolean isLeaf = current.entries.size() == 0 || current.entries.get(0).childIndexBlock.isNull();
if (isLeaf ^ current.tailPos.isNull()) {
throw new IOException(String.format("Mismatched leaf/tail-node in %s", current));
}
long min = Long.MIN_VALUE;
for (IndexEntry entry : current.entries) {
if (isLeaf ^ entry.childIndexBlock.isNull()) {
throw new IOException(String.format("Mismatched leaf/non-leaf entry in %s", current));
}
if (entry.hashCode >= maxValue || entry.hashCode <= min) {
throw new IOException(String.format("Out-of-order key in %s", current));
}
min = entry.hashCode;
if (!entry.childIndexBlock.isNull()) {
IndexBlock child = store.read(entry.childIndexBlock, IndexBlock.class);
verifyTree(child, " " + prefix, blocks, entry.hashCode, loadData);
}
if (loadData) {
DataBlock block = store.read(entry.dataBlock, DataBlock.class);
blocks.add(block);
}
}
if (!current.tailPos.isNull()) {
IndexBlock tail = store.read(current.tailPos, IndexBlock.class);
verifyTree(tail, " " + prefix, blocks, maxValue, loadData);
}
}
public void clear() {
store.clear();
close();
try {
doOpen();
} catch (Exception e) {
throw new UncheckedIOException(e);
}
}
private class IndexRoot {
private BlockPointer rootPos = BlockPointer.start();
private HeaderBlock owner;
private IndexRoot(HeaderBlock owner) {
this.owner = owner;
}
public void setRootPos(BlockPointer rootPos) {
this.rootPos = rootPos;
store.write(owner);
}
public IndexBlock getRoot() {
return load(rootPos, this, null, 0);
}
public IndexBlock newRoot() {
IndexBlock block = new IndexBlock();
store.write(block);
setRootPos(block.getPos());
return block;
}
}
private class HeaderBlock extends BlockPayload {
private IndexRoot index;
private HeaderBlock() {
index = new IndexRoot(this);
}
@Override
protected byte getType() {
return 0x55;
}
@Override
protected int getSize() {
return Block.LONG_SIZE + Block.SHORT_SIZE;
}
@Override
protected void read(DataInputStream instr) throws Exception {
index.rootPos = BlockPointer.pos(instr.readLong());
short actualChildIndexEntries = instr.readShort();
if (actualChildIndexEntries != maxChildIndexEntries) {
throw blockCorruptedException();
}
}
@Override
protected void write(DataOutputStream outstr) throws Exception {
outstr.writeLong(index.rootPos.getPos());
outstr.writeShort(maxChildIndexEntries);
}
public IndexBlock getRoot() throws Exception {
return index.getRoot();
}
}
private class IndexBlock extends BlockPayload {
private final List<IndexEntry> entries = new ArrayList<IndexEntry>();
private BlockPointer tailPos = BlockPointer.start();
// Transient fields
private IndexBlock parent;
private int parentEntryIndex;
private IndexRoot root;
@Override
protected byte getType() {
return 0x77;
}
@Override
protected int getSize() {
return Block.INT_SIZE + Block.LONG_SIZE + (3 * Block.LONG_SIZE) * maxChildIndexEntries;
}
@Override
public void read(DataInputStream instr) throws IOException {
int count = instr.readInt();
entries.clear();
for (int i = 0; i < count; i++) {
IndexEntry entry = new IndexEntry();
entry.hashCode = instr.readLong();
entry.dataBlock = BlockPointer.pos(instr.readLong());
entry.childIndexBlock = BlockPointer.pos(instr.readLong());
entries.add(entry);
}
tailPos = BlockPointer.pos(instr.readLong());
}
@Override
public void write(DataOutputStream outstr) throws IOException {
outstr.writeInt(entries.size());
for (IndexEntry entry : entries) {
outstr.writeLong(entry.hashCode);
outstr.writeLong(entry.dataBlock.getPos());
outstr.writeLong(entry.childIndexBlock.getPos());
}
outstr.writeLong(tailPos.getPos());
}
public void put(long hashCode, BlockPointer pos) throws Exception {
int index = Collections.binarySearch(entries, new IndexEntry(hashCode));
IndexEntry entry;
if (index >= 0) {
entry = entries.get(index);
} else {
assert tailPos.isNull();
entry = new IndexEntry();
entry.hashCode = hashCode;
entry.childIndexBlock = BlockPointer.start();
index = -index - 1;
entries.add(index, entry);
}
entry.dataBlock = pos;
store.write(this);
maybeSplit();
}
private void maybeSplit() throws Exception {
if (entries.size() > maxChildIndexEntries) {
int splitPos = entries.size() / 2;
IndexEntry splitEntry = entries.remove(splitPos);
if (parent == null) {
parent = root.newRoot();
}
IndexBlock sibling = new IndexBlock();
store.write(sibling);
List<IndexEntry> siblingEntries = entries.subList(splitPos, entries.size());
sibling.entries.addAll(siblingEntries);
siblingEntries.clear();
sibling.tailPos = tailPos;
tailPos = splitEntry.childIndexBlock;
splitEntry.childIndexBlock = BlockPointer.start();
parent.add(this, splitEntry, sibling);
}
}
private void add(IndexBlock left, IndexEntry entry, IndexBlock right) throws Exception {
int index = left.parentEntryIndex;
if (index < entries.size()) {
IndexEntry parentEntry = entries.get(index);
assert parentEntry.childIndexBlock.equals(left.getPos());
parentEntry.childIndexBlock = right.getPos();
} else {
assert index == entries.size() && (tailPos.isNull() || tailPos.equals(left.getPos()));
tailPos = right.getPos();
}
entries.add(index, entry);
entry.childIndexBlock = left.getPos();
store.write(this);
maybeSplit();
}
public DataBlock get(K key) throws Exception {
Lookup lookup = find(key);
if (lookup.entry == null) {
return null;
}
return store.read(lookup.entry.dataBlock, DataBlock.class);
}
public Lookup find(K key) throws Exception {
long checksum = keyHasher.getHashCode(key);
return find(checksum);
}
private Lookup find(long hashCode) throws Exception {
int index = Collections.binarySearch(entries, new IndexEntry(hashCode));
if (index >= 0) {
return new Lookup(this, entries.get(index));
}
index = -index - 1;
BlockPointer childBlockPos;
if (index == entries.size()) {
childBlockPos = tailPos;
} else {
childBlockPos = entries.get(index).childIndexBlock;
}
if (childBlockPos.isNull()) {
return new Lookup(this, null);
}
IndexBlock childBlock = load(childBlockPos, root, this, index);
return childBlock.find(hashCode);
}
public void remove(IndexEntry entry) throws Exception {
int index = entries.indexOf(entry);
assert index >= 0;
entries.remove(index);
store.write(this);
if (entry.childIndexBlock.isNull()) {
maybeMerge();
} else {
// Not a leaf node. Move up an entry from a leaf node, then possibly merge the leaf node
IndexBlock leafBlock = load(entry.childIndexBlock, root, this, index);
leafBlock = leafBlock.findHighestLeaf();
IndexEntry highestEntry = leafBlock.entries.remove(leafBlock.entries.size() - 1);
highestEntry.childIndexBlock = entry.childIndexBlock;
entries.add(index, highestEntry);
store.write(leafBlock);
leafBlock.maybeMerge();
}
}
private void maybeMerge() throws Exception {
if (parent == null) {
// This is the root block. Can have any number of children <= maxChildIndexEntries
if (entries.size() == 0 && !tailPos.isNull()) {
// This is an empty root block, discard it
header.index.setRootPos(tailPos);
store.remove(this);
}
return;
}
// This is not the root block. Must have children >= minIndexChildNodes
if (entries.size() >= minIndexChildNodes) {
return;
}
// Attempt to merge with the left sibling
IndexBlock left = parent.getPrevious(this);
if (left != null) {
assert entries.size() + left.entries.size() <= maxChildIndexEntries * 2;
if (left.entries.size() > minIndexChildNodes) {
// There are enough entries in this block and the left sibling to make up 2 blocks, so redistribute
// the entries evenly between them
left.mergeFrom(this);
left.maybeSplit();
return;
} else {
// There are only enough entries to make up 1 block, so move the entries of the left sibling into
// this block and discard the left sibling. Might also need to merge the parent
left.mergeFrom(this);
parent.maybeMerge();
return;
}
}
// Attempt to merge with the right sibling
IndexBlock right = parent.getNext(this);
if (right != null) {
assert entries.size() + right.entries.size() <= maxChildIndexEntries * 2;
if (right.entries.size() > minIndexChildNodes) {
// There are enough entries in this block and the right sibling to make up 2 blocks, so redistribute
// the entries evenly between them
mergeFrom(right);
maybeSplit();
return;
} else {
// There are only enough entries to make up 1 block, so move the entries of the right sibling into
// this block and discard this block. Might also need to merge the parent
mergeFrom(right);
parent.maybeMerge();
return;
}
}
// Should not happen
throw new IllegalStateException(String.format("%s does not have any siblings.", getBlock()));
}
private void mergeFrom(IndexBlock right) throws Exception {
IndexEntry newChildEntry = parent.entries.remove(parentEntryIndex);
if (right.getPos().equals(parent.tailPos)) {
parent.tailPos = getPos();
} else {
IndexEntry newParentEntry = parent.entries.get(parentEntryIndex);
assert newParentEntry.childIndexBlock.equals(right.getPos());
newParentEntry.childIndexBlock = getPos();
}
entries.add(newChildEntry);
entries.addAll(right.entries);
newChildEntry.childIndexBlock = tailPos;
tailPos = right.tailPos;
store.write(parent);
store.write(this);
store.remove(right);
}
private IndexBlock getNext(IndexBlock indexBlock) throws Exception {
int index = indexBlock.parentEntryIndex + 1;
if (index > entries.size()) {
return null;
}
if (index == entries.size()) {
return load(tailPos, root, this, index);
}
return load(entries.get(index).childIndexBlock, root, this, index);
}
private IndexBlock getPrevious(IndexBlock indexBlock) throws Exception {
int index = indexBlock.parentEntryIndex - 1;
if (index < 0) {
return null;
}
return load(entries.get(index).childIndexBlock, root, this, index);
}
private IndexBlock findHighestLeaf() throws Exception {
if (tailPos.isNull()) {
return this;
}
return load(tailPos, root, this, entries.size()).findHighestLeaf();
}
}
private static class IndexEntry implements Comparable<IndexEntry> {
long hashCode;
BlockPointer dataBlock;
BlockPointer childIndexBlock;
private IndexEntry() {
}
private IndexEntry(long hashCode) {
this.hashCode = hashCode;
}
@Override
public int compareTo(IndexEntry indexEntry) {
if (hashCode > indexEntry.hashCode) {
return 1;
}
if (hashCode < indexEntry.hashCode) {
return -1;
}
return 0;
}
}
private class Lookup {
final IndexBlock indexBlock;
final IndexEntry entry;
private Lookup(IndexBlock indexBlock, IndexEntry entry) {
this.indexBlock = indexBlock;
this.entry = entry;
}
}
private class DataBlock extends BlockPayload {
private int size;
private StreamByteBuffer buffer;
private V value;
private DataBlock() {
}
public DataBlock(V value) throws Exception {
this.value = value;
setValue(value);
size = buffer.totalBytesUnread();
}
public DataBlock(V value, StreamByteBuffer buffer) throws Exception {
this.value = value;
this.buffer = buffer;
size = buffer.totalBytesUnread();
}
public void setValue(V value) throws Exception {
buffer = StreamByteBuffer.createWithChunkSizeInDefaultRange(size);
KryoBackedEncoder encoder = new KryoBackedEncoder(buffer.getOutputStream());
serializer.write(encoder, value);
encoder.flush();
}
public V getValue() throws Exception {
if (value == null) {
value = serializer.read(new KryoBackedDecoder(buffer.getInputStream()));
buffer = null;
}
return value;
}
@Override
protected byte getType() {
return 0x33;
}
@Override
protected int getSize() {
return 2 * Block.INT_SIZE + size;
}
@Override
public void read(DataInputStream instr) throws Exception {
size = instr.readInt();
int bytes = instr.readInt();
buffer = StreamByteBuffer.of(instr, bytes);
}
@Override
public void write(DataOutputStream outstr) throws Exception {
outstr.writeInt(size);
outstr.writeInt(buffer.totalBytesUnread());
buffer.writeTo(outstr);
buffer = null;
}
public DataBlockUpdateResult useNewValue(V value) throws Exception {
setValue(value);
boolean ok = buffer.totalBytesUnread() <= size;
if (ok) {
this.value = value;
store.write(this);
return DataBlockUpdateResult.success();
} else {
return DataBlockUpdateResult.failed(buffer);
}
}
}
private static class DataBlockUpdateResult {
private static final DataBlockUpdateResult SUCCESS = new DataBlockUpdateResult(true, null);
private final boolean success;
private final StreamByteBuffer serializedValue;
private DataBlockUpdateResult(boolean success, StreamByteBuffer serializedValue) {
this.success = success;
this.serializedValue = serializedValue;
}
static DataBlockUpdateResult success() {
return SUCCESS;
}
static DataBlockUpdateResult failed(StreamByteBuffer serializedValue) {
return new DataBlockUpdateResult(false, serializedValue);
}
public boolean isFailed() {
return !success;
}
public StreamByteBuffer getSerializedValue() {
return serializedValue;
}
}
}

59
test/random_access/src/main/java/seaweedfs/client/btree/Block.java

@ -0,0 +1,59 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
public abstract class Block {
static final int LONG_SIZE = 8;
static final int INT_SIZE = 4;
static final int SHORT_SIZE = 2;
private BlockPayload payload;
protected Block(BlockPayload payload) {
this.payload = payload;
payload.setBlock(this);
}
public BlockPayload getPayload() {
return payload;
}
protected void detach() {
payload.setBlock(null);
payload = null;
}
public abstract BlockPointer getPos();
public abstract int getSize();
public abstract RuntimeException blockCorruptedException();
@Override
public String toString() {
return payload.getClass().getSimpleName() + " " + getPos();
}
public BlockPointer getNextPos() {
return BlockPointer.pos(getPos().getPos() + getSize());
}
public abstract boolean hasPos();
public abstract void setPos(BlockPointer pos);
public abstract void setSize(int size);
}

51
test/random_access/src/main/java/seaweedfs/client/btree/BlockPayload.java

@ -0,0 +1,51 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import java.io.DataInputStream;
import java.io.DataOutputStream;
public abstract class BlockPayload {
private Block block;
public Block getBlock() {
return block;
}
public void setBlock(Block block) {
this.block = block;
}
public BlockPointer getPos() {
return getBlock().getPos();
}
public BlockPointer getNextPos() {
return getBlock().getNextPos();
}
protected abstract int getSize();
protected abstract byte getType();
protected abstract void read(DataInputStream inputStream) throws Exception;
protected abstract void write(DataOutputStream outputStream) throws Exception;
protected RuntimeException blockCorruptedException() {
return getBlock().blockCorruptedException();
}
}

75
test/random_access/src/main/java/seaweedfs/client/btree/BlockPointer.java

@ -0,0 +1,75 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import com.google.common.primitives.Longs;
public class BlockPointer implements Comparable<BlockPointer> {
private static final BlockPointer NULL = new BlockPointer(-1);
public static BlockPointer start() {
return NULL;
}
public static BlockPointer pos(long pos) {
if (pos < -1) {
throw new CorruptedCacheException("block pointer must be >= -1, but was" + pos);
}
if (pos == -1) {
return NULL;
}
return new BlockPointer(pos);
}
private final long pos;
private BlockPointer(long pos) {
this.pos = pos;
}
public boolean isNull() {
return pos < 0;
}
public long getPos() {
return pos;
}
@Override
public String toString() {
return String.valueOf(pos);
}
@Override
public boolean equals(Object obj) {
if (obj == null || obj.getClass() != getClass()) {
return false;
}
BlockPointer other = (BlockPointer) obj;
return pos == other.pos;
}
@Override
public int hashCode() {
return Longs.hashCode(pos);
}
@Override
public int compareTo(BlockPointer o) {
return Longs.compare(pos, o.pos);
}
}

68
test/random_access/src/main/java/seaweedfs/client/btree/BlockStore.java

@ -0,0 +1,68 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
public interface BlockStore {
/**
* Opens this store, calling the given action if the store is empty.
*/
void open(Runnable initAction, Factory factory);
/**
* Closes this store.
*/
void close();
/**
* Discards all blocks from this store.
*/
void clear();
/**
* Removes the given block from this store.
*/
void remove(BlockPayload block);
/**
* Reads the first block from this store.
*/
<T extends BlockPayload> T readFirst(Class<T> payloadType);
/**
* Reads a block from this store.
*/
<T extends BlockPayload> T read(BlockPointer pos, Class<T> payloadType);
/**
* Writes a block to this store, adding the block if required.
*/
void write(BlockPayload block);
/**
* Adds a new block to this store. Allocates space for the block, but does not write the contents of the block
* until {@link #write(BlockPayload)} is called.
*/
void attach(BlockPayload block);
/**
* Flushes any pending updates for this store.
*/
void flush();
interface Factory {
Object create(Class<? extends BlockPayload> type);
}
}

30
test/random_access/src/main/java/seaweedfs/client/btree/BufferCaster.java

@ -0,0 +1,30 @@
/*
* Copyright 2018 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import java.nio.Buffer;
public class BufferCaster {
/**
* Without this cast, when the code compiled by Java 9+ is executed on Java 8, it will throw
* java.lang.NoSuchMethodError: Method flip()Ljava/nio/ByteBuffer; does not exist in class java.nio.ByteBuffer
*/
@SuppressWarnings("RedundantCast")
public static <T extends Buffer> Buffer cast(T byteBuffer) {
return (Buffer) byteBuffer;
}
}

74
test/random_access/src/main/java/seaweedfs/client/btree/ByteInput.java

@ -0,0 +1,74 @@
/*
* Copyright 2014 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import com.google.common.io.CountingInputStream;
import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
/**
* Allows a stream of bytes to be read from a particular location of some backing byte stream.
*/
class ByteInput {
private final RandomAccessFile file;
private final ResettableBufferedInputStream bufferedInputStream;
private CountingInputStream countingInputStream;
public ByteInput(RandomAccessFile file) {
this.file = file;
bufferedInputStream = new ResettableBufferedInputStream(new RandomAccessFileInputStream(file));
}
/**
* Starts reading from the given offset.
*/
public DataInputStream start(long offset) throws IOException {
file.seek(offset);
bufferedInputStream.clear();
countingInputStream = new CountingInputStream(bufferedInputStream);
return new DataInputStream(countingInputStream);
}
/**
* Returns the number of bytes read since {@link #start(long)} was called.
*/
public long getBytesRead() {
return countingInputStream.getCount();
}
/**
* Finishes reading, resetting any buffered state.
*/
public void done() {
countingInputStream = null;
}
private static class ResettableBufferedInputStream extends BufferedInputStream {
ResettableBufferedInputStream(InputStream input) {
super(input);
}
void clear() {
count = 0;
pos = 0;
}
}
}

74
test/random_access/src/main/java/seaweedfs/client/btree/ByteOutput.java

@ -0,0 +1,74 @@
/*
* Copyright 2014 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import com.google.common.io.CountingOutputStream;
import java.io.BufferedOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.RandomAccessFile;
/**
* Allows a stream of bytes to be written to a particular location of some backing byte stream.
*/
class ByteOutput {
private final RandomAccessFile file;
private final ResettableBufferedOutputStream bufferedOutputStream;
private CountingOutputStream countingOutputStream;
public ByteOutput(RandomAccessFile file) {
this.file = file;
bufferedOutputStream = new ResettableBufferedOutputStream(new RandomAccessFileOutputStream(file));
}
/**
* Starts writing to the given offset. Can be beyond the current length of the file.
*/
public DataOutputStream start(long offset) throws IOException {
file.seek(offset);
bufferedOutputStream.clear();
countingOutputStream = new CountingOutputStream(bufferedOutputStream);
return new DataOutputStream(countingOutputStream);
}
/**
* Returns the number of byte written since {@link #start(long)} was called.
*/
public long getBytesWritten() {
return countingOutputStream.getCount();
}
/**
* Finishes writing, flushing and resetting any buffered state
*/
public void done() throws IOException {
countingOutputStream.flush();
countingOutputStream = null;
}
private static class ResettableBufferedOutputStream extends BufferedOutputStream {
ResettableBufferedOutputStream(OutputStream output) {
super(output);
}
void clear() {
count = 0;
}
}
}

129
test/random_access/src/main/java/seaweedfs/client/btree/CachingBlockStore.java

@ -0,0 +1,129 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.common.collect.ImmutableSet;
import javax.annotation.Nullable;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
public class CachingBlockStore implements BlockStore {
private final BlockStore store;
private final Map<BlockPointer, BlockPayload> dirty = new LinkedHashMap<BlockPointer, BlockPayload>();
private final Cache<BlockPointer, BlockPayload> indexBlockCache = CacheBuilder.newBuilder().maximumSize(100).concurrencyLevel(1).build();
private final ImmutableSet<Class<? extends BlockPayload>> cacheableBlockTypes;
public CachingBlockStore(BlockStore store, Collection<Class<? extends BlockPayload>> cacheableBlockTypes) {
this.store = store;
this.cacheableBlockTypes = ImmutableSet.copyOf(cacheableBlockTypes);
}
@Override
public void open(Runnable initAction, Factory factory) {
store.open(initAction, factory);
}
@Override
public void close() {
flush();
indexBlockCache.invalidateAll();
store.close();
}
@Override
public void clear() {
dirty.clear();
indexBlockCache.invalidateAll();
store.clear();
}
@Override
public void flush() {
Iterator<BlockPayload> iterator = dirty.values().iterator();
while (iterator.hasNext()) {
BlockPayload block = iterator.next();
iterator.remove();
store.write(block);
}
store.flush();
}
@Override
public void attach(BlockPayload block) {
store.attach(block);
}
@Override
public void remove(BlockPayload block) {
dirty.remove(block.getPos());
if (isCacheable(block)) {
indexBlockCache.invalidate(block.getPos());
}
store.remove(block);
}
@Override
public <T extends BlockPayload> T readFirst(Class<T> payloadType) {
T block = store.readFirst(payloadType);
maybeCache(block);
return block;
}
@Override
public <T extends BlockPayload> T read(BlockPointer pos, Class<T> payloadType) {
T block = payloadType.cast(dirty.get(pos));
if (block != null) {
return block;
}
block = maybeGetFromCache(pos, payloadType);
if (block != null) {
return block;
}
block = store.read(pos, payloadType);
maybeCache(block);
return block;
}
@Nullable
private <T extends BlockPayload> T maybeGetFromCache(BlockPointer pos, Class<T> payloadType) {
if (cacheableBlockTypes.contains(payloadType)) {
return payloadType.cast(indexBlockCache.getIfPresent(pos));
}
return null;
}
@Override
public void write(BlockPayload block) {
store.attach(block);
maybeCache(block);
dirty.put(block.getPos(), block);
}
private <T extends BlockPayload> void maybeCache(T block) {
if (isCacheable(block)) {
indexBlockCache.put(block.getPos(), block);
}
}
private <T extends BlockPayload> boolean isCacheable(T block) {
return cacheableBlockTypes.contains(block.getClass());
}
}

22
test/random_access/src/main/java/seaweedfs/client/btree/CorruptedCacheException.java

@ -0,0 +1,22 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
class CorruptedCacheException extends RuntimeException {
CorruptedCacheException(String message) {
super(message);
}
}

274
test/random_access/src/main/java/seaweedfs/client/btree/FileBackedBlockStore.java

@ -0,0 +1,274 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.RandomAccessFile;
public class FileBackedBlockStore implements BlockStore {
private final File cacheFile;
private RandomAccessFile file;
private ByteOutput output;
private ByteInput input;
private long nextBlock;
private Factory factory;
private long currentFileSize;
public FileBackedBlockStore(File cacheFile) {
this.cacheFile = cacheFile;
}
@Override
public String toString() {
return "cache '" + cacheFile + "'";
}
@Override
public void open(Runnable runnable, Factory factory) {
this.factory = factory;
try {
cacheFile.getParentFile().mkdirs();
file = openRandomAccessFile();
output = new ByteOutput(file);
input = new ByteInput(file);
currentFileSize = file.length();
nextBlock = currentFileSize;
if (currentFileSize == 0) {
runnable.run();
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
private RandomAccessFile openRandomAccessFile() throws FileNotFoundException {
try {
return randomAccessFile("rw");
} catch (FileNotFoundException e) {
return randomAccessFile("r");
}
}
private RandomAccessFile randomAccessFile(String mode) throws FileNotFoundException {
return new RandomAccessFile(cacheFile, mode);
}
@Override
public void close() {
try {
file.close();
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
@Override
public void clear() {
try {
file.setLength(0);
currentFileSize = 0;
} catch (IOException e) {
throw new UncheckedIOException(e);
}
nextBlock = 0;
}
@Override
public void attach(BlockPayload block) {
if (block.getBlock() == null) {
block.setBlock(new BlockImpl(block));
}
}
@Override
public void remove(BlockPayload block) {
BlockImpl blockImpl = (BlockImpl) block.getBlock();
blockImpl.detach();
}
@Override
public void flush() {
}
@Override
public <T extends BlockPayload> T readFirst(Class<T> payloadType) {
return read(BlockPointer.pos(0), payloadType);
}
@Override
public <T extends BlockPayload> T read(BlockPointer pos, Class<T> payloadType) {
assert !pos.isNull();
try {
T payload = payloadType.cast(factory.create(payloadType));
BlockImpl block = new BlockImpl(payload, pos);
block.read();
return payload;
} catch (CorruptedCacheException e) {
throw e;
} catch (Exception e) {
throw new UncheckedIOException(e);
}
}
@Override
public void write(BlockPayload block) {
BlockImpl blockImpl = (BlockImpl) block.getBlock();
try {
blockImpl.write();
} catch (CorruptedCacheException e) {
throw e;
} catch (Exception e) {
throw new UncheckedIOException(e);
}
}
private long alloc(long length) {
long pos = nextBlock;
nextBlock += length;
return pos;
}
private final class BlockImpl extends Block {
private static final int HEADER_SIZE = 1 + INT_SIZE; // type, payload size
private static final int TAIL_SIZE = INT_SIZE;
private BlockPointer pos;
private int payloadSize;
private BlockImpl(BlockPayload payload, BlockPointer pos) {
this(payload);
setPos(pos);
}
public BlockImpl(BlockPayload payload) {
super(payload);
pos = null;
payloadSize = -1;
}
@Override
public boolean hasPos() {
return pos != null;
}
@Override
public BlockPointer getPos() {
if (pos == null) {
pos = BlockPointer.pos(alloc(getSize()));
}
return pos;
}
@Override
public void setPos(BlockPointer pos) {
assert this.pos == null && !pos.isNull();
this.pos = pos;
}
@Override
public int getSize() {
if (payloadSize < 0) {
payloadSize = getPayload().getSize();
}
return payloadSize + HEADER_SIZE + TAIL_SIZE;
}
@Override
public void setSize(int size) {
int newPayloadSize = size - HEADER_SIZE - TAIL_SIZE;
assert newPayloadSize >= payloadSize;
payloadSize = newPayloadSize;
}
public void write() throws Exception {
long pos = getPos().getPos();
DataOutputStream outputStream = output.start(pos);
BlockPayload payload = getPayload();
// Write header
outputStream.writeByte(payload.getType());
outputStream.writeInt(payloadSize);
long finalSize = pos + HEADER_SIZE + TAIL_SIZE + payloadSize;
// Write body
payload.write(outputStream);
// Write count
long bytesWritten = output.getBytesWritten();
if (bytesWritten > Integer.MAX_VALUE) {
throw new IllegalArgumentException("Block payload exceeds maximum size");
}
outputStream.writeInt((int) bytesWritten);
output.done();
// System.out.println(String.format("wrote [%d,%d)", pos, pos + bytesWritten + 4));
// Pad
if (currentFileSize < finalSize) {
// System.out.println(String.format("pad length %d => %d", currentFileSize, finalSize));
file.setLength(finalSize);
currentFileSize = finalSize;
}
}
public void read() throws Exception {
long pos = getPos().getPos();
assert pos >= 0;
if (pos + HEADER_SIZE >= currentFileSize) {
throw blockCorruptedException();
}
DataInputStream inputStream = input.start(pos);
BlockPayload payload = getPayload();
// Read header
byte type = inputStream.readByte();
if (type != payload.getType()) {
throw blockCorruptedException();
}
// Read body
payloadSize = inputStream.readInt();
if (pos + HEADER_SIZE + TAIL_SIZE + payloadSize > currentFileSize) {
throw blockCorruptedException();
}
payload.read(inputStream);
// Read and verify count
long actualCount = input.getBytesRead();
long count = inputStream.readInt();
if (actualCount != count) {
System.out.println(String.format("read expected %d actual %d, pos %d payloadSize %d currentFileSize %d", count, actualCount, pos, payloadSize, currentFileSize));
throw blockCorruptedException();
}
input.done();
}
@Override
public RuntimeException blockCorruptedException() {
return new CorruptedCacheException(String.format("Corrupted %s found in %s.", this,
FileBackedBlockStore.this));
}
}
}

283
test/random_access/src/main/java/seaweedfs/client/btree/FreeListBlockStore.java

@ -0,0 +1,283 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
public class FreeListBlockStore implements BlockStore {
private final BlockStore store;
private final BlockStore freeListStore;
private final int maxBlockEntries;
private FreeListBlock freeListBlock;
public FreeListBlockStore(BlockStore store, int maxBlockEntries) {
this.store = store;
freeListStore = this;
this.maxBlockEntries = maxBlockEntries;
}
@Override
public void open(final Runnable initAction, final Factory factory) {
Runnable freeListInitAction = new Runnable() {
@Override
public void run() {
freeListBlock = new FreeListBlock();
store.write(freeListBlock);
store.flush();
initAction.run();
}
};
Factory freeListFactory = new Factory() {
@Override
public Object create(Class<? extends BlockPayload> type) {
if (type == FreeListBlock.class) {
return new FreeListBlock();
}
return factory.create(type);
}
};
store.open(freeListInitAction, freeListFactory);
freeListBlock = store.readFirst(FreeListBlock.class);
}
@Override
public void close() {
freeListBlock = null;
store.close();
}
@Override
public void clear() {
store.clear();
}
@Override
public void remove(BlockPayload block) {
Block container = block.getBlock();
store.remove(block);
freeListBlock.add(container.getPos(), container.getSize());
}
@Override
public <T extends BlockPayload> T readFirst(Class<T> payloadType) {
return store.read(freeListBlock.getNextPos(), payloadType);
}
@Override
public <T extends BlockPayload> T read(BlockPointer pos, Class<T> payloadType) {
return store.read(pos, payloadType);
}
@Override
public void write(BlockPayload block) {
attach(block);
store.write(block);
}
@Override
public void attach(BlockPayload block) {
store.attach(block);
freeListBlock.alloc(block.getBlock());
}
@Override
public void flush() {
store.flush();
}
private void verify() {
FreeListBlock block = store.readFirst(FreeListBlock.class);
verify(block, Integer.MAX_VALUE);
}
private void verify(FreeListBlock block, int maxValue) {
if (block.largestInNextBlock > maxValue) {
throw new RuntimeException("corrupt free list");
}
int current = 0;
for (FreeListEntry entry : block.entries) {
if (entry.size > maxValue) {
throw new RuntimeException("corrupt free list");
}
if (entry.size < block.largestInNextBlock) {
throw new RuntimeException("corrupt free list");
}
if (entry.size < current) {
throw new RuntimeException("corrupt free list");
}
current = entry.size;
}
if (!block.nextBlock.isNull()) {
verify(store.read(block.nextBlock, FreeListBlock.class), block.largestInNextBlock);
}
}
public class FreeListBlock extends BlockPayload {
private List<FreeListEntry> entries = new ArrayList<FreeListEntry>();
private int largestInNextBlock;
private BlockPointer nextBlock = BlockPointer.start();
// Transient fields
private FreeListBlock prev;
private FreeListBlock next;
@Override
protected int getSize() {
return Block.LONG_SIZE + Block.INT_SIZE + Block.INT_SIZE + maxBlockEntries * (Block.LONG_SIZE
+ Block.INT_SIZE);
}
@Override
protected byte getType() {
return 0x44;
}
@Override
protected void read(DataInputStream inputStream) throws Exception {
nextBlock = BlockPointer.pos(inputStream.readLong());
largestInNextBlock = inputStream.readInt();
int count = inputStream.readInt();
for (int i = 0; i < count; i++) {
BlockPointer pos = BlockPointer.pos(inputStream.readLong());
int size = inputStream.readInt();
entries.add(new FreeListEntry(pos, size));
}
}
@Override
protected void write(DataOutputStream outputStream) throws Exception {
outputStream.writeLong(nextBlock.getPos());
outputStream.writeInt(largestInNextBlock);
outputStream.writeInt(entries.size());
for (FreeListEntry entry : entries) {
outputStream.writeLong(entry.pos.getPos());
outputStream.writeInt(entry.size);
}
}
public void add(BlockPointer pos, int size) {
assert !pos.isNull() && size >= 0;
if (size == 0) {
return;
}
if (size < largestInNextBlock) {
FreeListBlock next = getNextBlock();
next.add(pos, size);
return;
}
FreeListEntry entry = new FreeListEntry(pos, size);
int index = Collections.binarySearch(entries, entry);
if (index < 0) {
index = -index - 1;
}
entries.add(index, entry);
if (entries.size() > maxBlockEntries) {
FreeListBlock newBlock = new FreeListBlock();
newBlock.largestInNextBlock = largestInNextBlock;
newBlock.nextBlock = nextBlock;
newBlock.prev = this;
newBlock.next = next;
next = newBlock;
List<FreeListEntry> newBlockEntries = entries.subList(0, entries.size() / 2);
newBlock.entries.addAll(newBlockEntries);
newBlockEntries.clear();
largestInNextBlock = newBlock.entries.get(newBlock.entries.size() - 1).size;
freeListStore.write(newBlock);
nextBlock = newBlock.getPos();
}
freeListStore.write(this);
}
private FreeListBlock getNextBlock() {
if (next == null) {
next = freeListStore.read(nextBlock, FreeListBlock.class);
next.prev = this;
}
return next;
}
public void alloc(Block block) {
if (block.hasPos()) {
return;
}
int requiredSize = block.getSize();
if (entries.isEmpty() || requiredSize <= largestInNextBlock) {
if (nextBlock.isNull()) {
return;
}
getNextBlock().alloc(block);
return;
}
int index = Collections.binarySearch(entries, new FreeListEntry(null, requiredSize));
if (index < 0) {
index = -index - 1;
}
if (index == entries.size()) {
// Largest free block is too small
return;
}
FreeListEntry entry = entries.remove(index);
block.setPos(entry.pos);
block.setSize(entry.size);
freeListStore.write(this);
if (entries.size() == 0 && prev != null) {
prev.nextBlock = nextBlock;
prev.largestInNextBlock = largestInNextBlock;
prev.next = next;
if (next != null) {
next.prev = prev;
}
freeListStore.write(prev);
freeListStore.remove(this);
}
}
}
private static class FreeListEntry implements Comparable<FreeListEntry> {
final BlockPointer pos;
final int size;
private FreeListEntry(BlockPointer pos, int size) {
this.pos = pos;
this.size = size;
}
@Override
public int compareTo(FreeListEntry o) {
if (size > o.size) {
return 1;
}
if (size < o.size) {
return -1;
}
return 0;
}
}
}

75
test/random_access/src/main/java/seaweedfs/client/btree/KeyHasher.java

@ -0,0 +1,75 @@
/*
* Copyright 2014 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import seaweedfs.client.btree.serialize.Serializer;
import seaweedfs.client.btree.serialize.kryo.KryoBackedEncoder;
import java.io.IOException;
import java.io.OutputStream;
import java.math.BigInteger;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
class KeyHasher<K> {
private final Serializer<K> serializer;
private final MessageDigestStream digestStream = new MessageDigestStream();
private final KryoBackedEncoder encoder = new KryoBackedEncoder(digestStream);
public KeyHasher(Serializer<K> serializer) {
this.serializer = serializer;
}
long getHashCode(K key) throws Exception {
serializer.write(encoder, key);
encoder.flush();
return digestStream.getChecksum();
}
private static class MessageDigestStream extends OutputStream {
MessageDigest messageDigest;
private MessageDigestStream() {
try {
messageDigest = MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException e) {
throw UncheckedException.throwAsUncheckedException(e);
}
}
@Override
public void write(int b) throws IOException {
messageDigest.update((byte) b);
}
@Override
public void write(byte[] b) throws IOException {
messageDigest.update(b);
}
@Override
public void write(byte[] b, int off, int len) throws IOException {
messageDigest.update(b, off, len);
}
long getChecksum() {
byte[] digest = messageDigest.digest();
assert digest.length == 16;
return new BigInteger(digest).longValue();
}
}
}

54
test/random_access/src/main/java/seaweedfs/client/btree/RandomAccessFileInputStream.java

@ -0,0 +1,54 @@
/*
* Copyright 2013 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
/**
* Reads from a {@link RandomAccessFile}. Each operation reads from and advances the current position of the file.
*
* <p>Closing this stream does not close the underlying file.
*/
public class RandomAccessFileInputStream extends InputStream {
private final RandomAccessFile file;
public RandomAccessFileInputStream(RandomAccessFile file) {
this.file = file;
}
@Override
public long skip(long n) throws IOException {
file.seek(file.getFilePointer() + n);
return n;
}
@Override
public int read(byte[] bytes) throws IOException {
return file.read(bytes);
}
@Override
public int read() throws IOException {
return file.read();
}
@Override
public int read(byte[] bytes, int offset, int length) throws IOException {
return file.read(bytes, offset, length);
}
}

48
test/random_access/src/main/java/seaweedfs/client/btree/RandomAccessFileOutputStream.java

@ -0,0 +1,48 @@
/*
* Copyright 2013 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import java.io.IOException;
import java.io.OutputStream;
import java.io.RandomAccessFile;
/**
* Writes to a {@link RandomAccessFile}. Each operation writes to and advances the current position of the file.
*
* <p>Closing this stream does not close the underlying file. Flushing this stream does nothing.
*/
public class RandomAccessFileOutputStream extends OutputStream {
private final RandomAccessFile file;
public RandomAccessFileOutputStream(RandomAccessFile file) {
this.file = file;
}
@Override
public void write(int i) throws IOException {
file.write(i);
}
@Override
public void write(byte[] bytes) throws IOException {
file.write(bytes);
}
@Override
public void write(byte[] bytes, int offset, int length) throws IOException {
file.write(bytes, offset, length);
}
}

87
test/random_access/src/main/java/seaweedfs/client/btree/StateCheckBlockStore.java

@ -0,0 +1,87 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
public class StateCheckBlockStore implements BlockStore {
private final BlockStore blockStore;
private boolean open;
public StateCheckBlockStore(BlockStore blockStore) {
this.blockStore = blockStore;
}
@Override
public void open(Runnable initAction, Factory factory) {
assert !open;
open = true;
blockStore.open(initAction, factory);
}
public boolean isOpen() {
return open;
}
@Override
public void close() {
if (!open) {
return;
}
open = false;
blockStore.close();
}
@Override
public void clear() {
assert open;
blockStore.clear();
}
@Override
public void remove(BlockPayload block) {
assert open;
blockStore.remove(block);
}
@Override
public <T extends BlockPayload> T readFirst(Class<T> payloadType) {
assert open;
return blockStore.readFirst(payloadType);
}
@Override
public <T extends BlockPayload> T read(BlockPointer pos, Class<T> payloadType) {
assert open;
return blockStore.read(pos, payloadType);
}
@Override
public void write(BlockPayload block) {
assert open;
blockStore.write(block);
}
@Override
public void attach(BlockPayload block) {
assert open;
blockStore.attach(block);
}
@Override
public void flush() {
assert open;
blockStore.flush();
}
}

526
test/random_access/src/main/java/seaweedfs/client/btree/StreamByteBuffer.java

@ -0,0 +1,526 @@
/*
* Copyright 2016 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
/**
* An in-memory buffer that provides OutputStream and InputStream interfaces.
*
* This is more efficient than using ByteArrayOutputStream/ByteArrayInputStream
*
* Reading the buffer will clear the buffer.
* This is not thread-safe, it is intended to be used by a single Thread.
*/
public class StreamByteBuffer {
private static final int DEFAULT_CHUNK_SIZE = 4096;
private static final int MAX_CHUNK_SIZE = 1024 * 1024;
private LinkedList<StreamByteBufferChunk> chunks = new LinkedList<StreamByteBufferChunk>();
private StreamByteBufferChunk currentWriteChunk;
private StreamByteBufferChunk currentReadChunk;
private int chunkSize;
private int nextChunkSize;
private int maxChunkSize;
private StreamByteBufferOutputStream output;
private StreamByteBufferInputStream input;
private int totalBytesUnreadInList;
public StreamByteBuffer() {
this(DEFAULT_CHUNK_SIZE);
}
public StreamByteBuffer(int chunkSize) {
this.chunkSize = chunkSize;
this.nextChunkSize = chunkSize;
this.maxChunkSize = Math.max(chunkSize, MAX_CHUNK_SIZE);
currentWriteChunk = new StreamByteBufferChunk(nextChunkSize);
output = new StreamByteBufferOutputStream();
input = new StreamByteBufferInputStream();
}
public static StreamByteBuffer of(InputStream inputStream) throws IOException {
StreamByteBuffer buffer = new StreamByteBuffer(chunkSizeInDefaultRange(inputStream.available()));
buffer.readFully(inputStream);
return buffer;
}
public static StreamByteBuffer of(InputStream inputStream, int len) throws IOException {
StreamByteBuffer buffer = new StreamByteBuffer(chunkSizeInDefaultRange(len));
buffer.readFrom(inputStream, len);
return buffer;
}
public static StreamByteBuffer createWithChunkSizeInDefaultRange(int value) {
return new StreamByteBuffer(chunkSizeInDefaultRange(value));
}
static int chunkSizeInDefaultRange(int value) {
return valueInRange(value, DEFAULT_CHUNK_SIZE, MAX_CHUNK_SIZE);
}
private static int valueInRange(int value, int min, int max) {
return Math.min(Math.max(value, min), max);
}
public OutputStream getOutputStream() {
return output;
}
public InputStream getInputStream() {
return input;
}
public void writeTo(OutputStream target) throws IOException {
while (prepareRead() != -1) {
currentReadChunk.writeTo(target);
}
}
public void readFrom(InputStream inputStream, int len) throws IOException {
int bytesLeft = len;
while (bytesLeft > 0) {
int spaceLeft = allocateSpace();
int limit = Math.min(spaceLeft, bytesLeft);
int readBytes = currentWriteChunk.readFrom(inputStream, limit);
if (readBytes == -1) {
throw new EOFException("Unexpected EOF");
}
bytesLeft -= readBytes;
}
}
public void readFully(InputStream inputStream) throws IOException {
while (true) {
int len = allocateSpace();
int readBytes = currentWriteChunk.readFrom(inputStream, len);
if (readBytes == -1) {
break;
}
}
}
public byte[] readAsByteArray() {
byte[] buf = new byte[totalBytesUnread()];
input.readImpl(buf, 0, buf.length);
return buf;
}
public List<byte[]> readAsListOfByteArrays() {
List<byte[]> listOfByteArrays = new ArrayList<byte[]>(chunks.size() + 1);
byte[] buf;
while ((buf = input.readNextBuffer()) != null) {
if (buf.length > 0) {
listOfByteArrays.add(buf);
}
}
return listOfByteArrays;
}
public String readAsString(String encoding) {
Charset charset = Charset.forName(encoding);
return readAsString(charset);
}
public String readAsString() {
return readAsString(Charset.defaultCharset());
}
public String readAsString(Charset charset) {
try {
return doReadAsString(charset);
} catch (CharacterCodingException e) {
throw new UncheckedIOException(e);
}
}
private String doReadAsString(Charset charset) throws CharacterCodingException {
int unreadSize = totalBytesUnread();
if (unreadSize > 0) {
return readAsCharBuffer(charset).toString();
}
return "";
}
private CharBuffer readAsCharBuffer(Charset charset) throws CharacterCodingException {
CharsetDecoder decoder = charset.newDecoder().onMalformedInput(
CodingErrorAction.REPLACE).onUnmappableCharacter(
CodingErrorAction.REPLACE);
CharBuffer charbuffer = CharBuffer.allocate(totalBytesUnread());
ByteBuffer buf = null;
boolean wasUnderflow = false;
ByteBuffer nextBuf = null;
boolean needsFlush = false;
while (hasRemaining(nextBuf) || hasRemaining(buf) || prepareRead() != -1) {
if (hasRemaining(buf)) {
// handle decoding underflow, multi-byte unicode character at buffer chunk boundary
if (!wasUnderflow) {
throw new IllegalStateException("Unexpected state. Buffer has remaining bytes without underflow in decoding.");
}
if (!hasRemaining(nextBuf) && prepareRead() != -1) {
nextBuf = currentReadChunk.readToNioBuffer();
}
// copy one by one until the underflow has been resolved
buf = ByteBuffer.allocate(buf.remaining() + 1).put(buf);
buf.put(nextBuf.get());
BufferCaster.cast(buf).flip();
} else {
if (hasRemaining(nextBuf)) {
buf = nextBuf;
} else if (prepareRead() != -1) {
buf = currentReadChunk.readToNioBuffer();
if (!hasRemaining(buf)) {
throw new IllegalStateException("Unexpected state. Buffer is empty.");
}
}
nextBuf = null;
}
boolean endOfInput = !hasRemaining(nextBuf) && prepareRead() == -1;
int bufRemainingBefore = buf.remaining();
CoderResult result = decoder.decode(buf, charbuffer, false);
if (bufRemainingBefore > buf.remaining()) {
needsFlush = true;
}
if (endOfInput) {
result = decoder.decode(ByteBuffer.allocate(0), charbuffer, true);
if (!result.isUnderflow()) {
result.throwException();
}
break;
}
wasUnderflow = result.isUnderflow();
}
if (needsFlush) {
CoderResult result = decoder.flush(charbuffer);
if (!result.isUnderflow()) {
result.throwException();
}
}
clear();
// push back remaining bytes of multi-byte unicode character
while (hasRemaining(buf)) {
byte b = buf.get();
try {
getOutputStream().write(b);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
BufferCaster.cast(charbuffer).flip();
return charbuffer;
}
private boolean hasRemaining(ByteBuffer nextBuf) {
return nextBuf != null && nextBuf.hasRemaining();
}
public int totalBytesUnread() {
int total = totalBytesUnreadInList;
if (currentReadChunk != null) {
total += currentReadChunk.bytesUnread();
}
if (currentWriteChunk != currentReadChunk && currentWriteChunk != null) {
total += currentWriteChunk.bytesUnread();
}
return total;
}
protected int allocateSpace() {
int spaceLeft = currentWriteChunk.spaceLeft();
if (spaceLeft == 0) {
addChunk(currentWriteChunk);
currentWriteChunk = new StreamByteBufferChunk(nextChunkSize);
if (nextChunkSize < maxChunkSize) {
nextChunkSize = Math.min(nextChunkSize * 2, maxChunkSize);
}
spaceLeft = currentWriteChunk.spaceLeft();
}
return spaceLeft;
}
protected int prepareRead() {
int bytesUnread = (currentReadChunk != null) ? currentReadChunk.bytesUnread() : 0;
if (bytesUnread == 0) {
if (!chunks.isEmpty()) {
currentReadChunk = chunks.removeFirst();
bytesUnread = currentReadChunk.bytesUnread();
totalBytesUnreadInList -= bytesUnread;
} else if (currentReadChunk != currentWriteChunk) {
currentReadChunk = currentWriteChunk;
bytesUnread = currentReadChunk.bytesUnread();
} else {
bytesUnread = -1;
}
}
return bytesUnread;
}
public static StreamByteBuffer of(List<byte[]> listOfByteArrays) {
StreamByteBuffer buffer = new StreamByteBuffer();
buffer.addChunks(listOfByteArrays);
return buffer;
}
private void addChunks(List<byte[]> listOfByteArrays) {
for (byte[] buf : listOfByteArrays) {
addChunk(new StreamByteBufferChunk(buf));
}
}
private void addChunk(StreamByteBufferChunk chunk) {
chunks.add(chunk);
totalBytesUnreadInList += chunk.bytesUnread();
}
static class StreamByteBufferChunk {
private int pointer;
private byte[] buffer;
private int size;
private int used;
public StreamByteBufferChunk(int size) {
this.size = size;
buffer = new byte[size];
}
public StreamByteBufferChunk(byte[] buf) {
this.size = buf.length;
this.buffer = buf;
this.used = buf.length;
}
public ByteBuffer readToNioBuffer() {
if (pointer < used) {
ByteBuffer result;
if (pointer > 0 || used < size) {
result = ByteBuffer.wrap(buffer, pointer, used - pointer);
} else {
result = ByteBuffer.wrap(buffer);
}
pointer = used;
return result;
}
return null;
}
public boolean write(byte b) {
if (used < size) {
buffer[used++] = b;
return true;
}
return false;
}
public void write(byte[] b, int off, int len) {
System.arraycopy(b, off, buffer, used, len);
used = used + len;
}
public void read(byte[] b, int off, int len) {
System.arraycopy(buffer, pointer, b, off, len);
pointer = pointer + len;
}
public void writeTo(OutputStream target) throws IOException {
if (pointer < used) {
target.write(buffer, pointer, used - pointer);
pointer = used;
}
}
public void reset() {
pointer = 0;
}
public int bytesUsed() {
return used;
}
public int bytesUnread() {
return used - pointer;
}
public int read() {
if (pointer < used) {
return buffer[pointer++] & 0xff;
}
return -1;
}
public int spaceLeft() {
return size - used;
}
public int readFrom(InputStream inputStream, int len) throws IOException {
int readBytes = inputStream.read(buffer, used, len);
if(readBytes > 0) {
used += readBytes;
}
return readBytes;
}
public void clear() {
used = pointer = 0;
}
public byte[] readBuffer() {
if (used == buffer.length && pointer == 0) {
pointer = used;
return buffer;
} else if (pointer < used) {
byte[] buf = new byte[used - pointer];
read(buf, 0, used - pointer);
return buf;
} else {
return new byte[0];
}
}
}
class StreamByteBufferOutputStream extends OutputStream {
private boolean closed;
@Override
public void write(byte[] b, int off, int len) throws IOException {
if (b == null) {
throw new NullPointerException();
}
if ((off < 0) || (off > b.length) || (len < 0)
|| ((off + len) > b.length) || ((off + len) < 0)) {
throw new IndexOutOfBoundsException();
}
if (len == 0) {
return;
}
int bytesLeft = len;
int currentOffset = off;
while (bytesLeft > 0) {
int spaceLeft = allocateSpace();
int writeBytes = Math.min(spaceLeft, bytesLeft);
currentWriteChunk.write(b, currentOffset, writeBytes);
bytesLeft -= writeBytes;
currentOffset += writeBytes;
}
}
@Override
public void close() throws IOException {
closed = true;
}
public boolean isClosed() {
return closed;
}
@Override
public void write(int b) throws IOException {
allocateSpace();
currentWriteChunk.write((byte) b);
}
public StreamByteBuffer getBuffer() {
return StreamByteBuffer.this;
}
}
class StreamByteBufferInputStream extends InputStream {
@Override
public int read() throws IOException {
prepareRead();
return currentReadChunk.read();
}
@Override
public int read(byte[] b, int off, int len) throws IOException {
return readImpl(b, off, len);
}
int readImpl(byte[] b, int off, int len) {
if (b == null) {
throw new NullPointerException();
}
if ((off < 0) || (off > b.length) || (len < 0)
|| ((off + len) > b.length) || ((off + len) < 0)) {
throw new IndexOutOfBoundsException();
}
if (len == 0) {
return 0;
}
int bytesLeft = len;
int currentOffset = off;
int bytesUnread = prepareRead();
int totalBytesRead = 0;
while (bytesLeft > 0 && bytesUnread != -1) {
int readBytes = Math.min(bytesUnread, bytesLeft);
currentReadChunk.read(b, currentOffset, readBytes);
bytesLeft -= readBytes;
currentOffset += readBytes;
totalBytesRead += readBytes;
bytesUnread = prepareRead();
}
if (totalBytesRead > 0) {
return totalBytesRead;
}
return -1;
}
@Override
public int available() throws IOException {
return totalBytesUnread();
}
public StreamByteBuffer getBuffer() {
return StreamByteBuffer.this;
}
public byte[] readNextBuffer() {
if (prepareRead() != -1) {
return currentReadChunk.readBuffer();
}
return null;
}
}
public void clear() {
chunks.clear();
currentReadChunk = null;
totalBytesUnreadInList = 0;
currentWriteChunk.clear();
}
}

88
test/random_access/src/main/java/seaweedfs/client/btree/UncheckedException.java

@ -0,0 +1,88 @@
/*
* Copyright 2010 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.util.concurrent.Callable;
/**
* Wraps a checked exception. Carries no other context.
*/
public final class UncheckedException extends RuntimeException {
private UncheckedException(Throwable cause) {
super(cause);
}
private UncheckedException(String message, Throwable cause) {
super(message, cause);
}
/**
* Note: always throws the failure in some form. The return value is to keep the compiler happy.
*/
public static RuntimeException throwAsUncheckedException(Throwable t) {
return throwAsUncheckedException(t, false);
}
/**
* Note: always throws the failure in some form. The return value is to keep the compiler happy.
*/
public static RuntimeException throwAsUncheckedException(Throwable t, boolean preserveMessage) {
if (t instanceof InterruptedException) {
Thread.currentThread().interrupt();
}
if (t instanceof RuntimeException) {
throw (RuntimeException) t;
}
if (t instanceof Error) {
throw (Error) t;
}
if (t instanceof IOException) {
if (preserveMessage) {
throw new UncheckedIOException(t.getMessage(), t);
} else {
throw new UncheckedIOException(t);
}
}
if (preserveMessage) {
throw new UncheckedException(t.getMessage(), t);
} else {
throw new UncheckedException(t);
}
}
public static <T> T callUnchecked(Callable<T> callable) {
try {
return callable.call();
} catch (Exception e) {
throw throwAsUncheckedException(e);
}
}
/**
* Unwraps passed InvocationTargetException hence making the stack of exceptions cleaner without losing information.
*
* Note: always throws the failure in some form. The return value is to keep the compiler happy.
*
* @param e to be unwrapped
* @return an instance of RuntimeException based on the target exception of the parameter.
*/
public static RuntimeException unwrapAndRethrow(InvocationTargetException e) {
return UncheckedException.throwAsUncheckedException(e.getTargetException());
}
}

36
test/random_access/src/main/java/seaweedfs/client/btree/UncheckedIOException.java

@ -0,0 +1,36 @@
/*
* Copyright 2012 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
/**
* <code>UncheckedIOException</code> is used to wrap an {@link java.io.IOException} into an unchecked exception.
*/
public class UncheckedIOException extends RuntimeException {
public UncheckedIOException() {
}
public UncheckedIOException(String message) {
super(message);
}
public UncheckedIOException(String message, Throwable cause) {
super(message, cause);
}
public UncheckedIOException(Throwable cause) {
super(cause);
}
}

133
test/random_access/src/main/java/seaweedfs/client/btree/serialize/AbstractDecoder.java

@ -0,0 +1,133 @@
/*
* Copyright 2013 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import javax.annotation.Nullable;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
public abstract class AbstractDecoder implements Decoder {
private DecoderStream stream;
@Override
public InputStream getInputStream() {
if (stream == null) {
stream = new DecoderStream();
}
return stream;
}
@Override
public void readBytes(byte[] buffer) throws IOException {
readBytes(buffer, 0, buffer.length);
}
@Override
public byte[] readBinary() throws EOFException, IOException {
int size = readSmallInt();
byte[] result = new byte[size];
readBytes(result);
return result;
}
@Override
public int readSmallInt() throws EOFException, IOException {
return readInt();
}
@Override
public long readSmallLong() throws EOFException, IOException {
return readLong();
}
@Nullable
@Override
public Integer readNullableSmallInt() throws IOException {
if (readBoolean()) {
return readSmallInt();
} else {
return null;
}
}
@Override
public String readNullableString() throws EOFException, IOException {
if (readBoolean()) {
return readString();
} else {
return null;
}
}
@Override
public void skipBytes(long count) throws EOFException, IOException {
long remaining = count;
while (remaining > 0) {
long skipped = maybeSkip(remaining);
if (skipped <= 0) {
break;
}
remaining -= skipped;
}
if (remaining > 0) {
throw new EOFException();
}
}
@Override
public <T> T decodeChunked(DecodeAction<Decoder, T> decodeAction) throws EOFException, Exception {
throw new UnsupportedOperationException();
}
@Override
public void skipChunked() throws EOFException, IOException {
throw new UnsupportedOperationException();
}
protected abstract int maybeReadBytes(byte[] buffer, int offset, int count) throws IOException;
protected abstract long maybeSkip(long count) throws IOException;
private class DecoderStream extends InputStream {
byte[] buffer = new byte[1];
@Override
public long skip(long n) throws IOException {
return maybeSkip(n);
}
@Override
public int read() throws IOException {
int read = maybeReadBytes(buffer, 0, 1);
if (read <= 0) {
return read;
}
return buffer[0] & 0xff;
}
@Override
public int read(byte[] buffer) throws IOException {
return maybeReadBytes(buffer, 0, buffer.length);
}
@Override
public int read(byte[] buffer, int offset, int count) throws IOException {
return maybeReadBytes(buffer, offset, count);
}
}
}

101
test/random_access/src/main/java/seaweedfs/client/btree/serialize/AbstractEncoder.java

@ -0,0 +1,101 @@
/*
* Copyright 2013 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import javax.annotation.Nullable;
import java.io.IOException;
import java.io.OutputStream;
public abstract class AbstractEncoder implements Encoder {
private EncoderStream stream;
@Override
public OutputStream getOutputStream() {
if (stream == null) {
stream = new EncoderStream();
}
return stream;
}
@Override
public void writeBytes(byte[] bytes) throws IOException {
writeBytes(bytes, 0, bytes.length);
}
@Override
public void writeBinary(byte[] bytes) throws IOException {
writeBinary(bytes, 0, bytes.length);
}
@Override
public void writeBinary(byte[] bytes, int offset, int count) throws IOException {
writeSmallInt(count);
writeBytes(bytes, offset, count);
}
@Override
public void encodeChunked(EncodeAction<Encoder> writeAction) throws Exception {
throw new UnsupportedOperationException();
}
@Override
public void writeSmallInt(int value) throws IOException {
writeInt(value);
}
@Override
public void writeSmallLong(long value) throws IOException {
writeLong(value);
}
@Override
public void writeNullableSmallInt(@Nullable Integer value) throws IOException {
if (value == null) {
writeBoolean(false);
} else {
writeBoolean(true);
writeSmallInt(value);
}
}
@Override
public void writeNullableString(@Nullable CharSequence value) throws IOException {
if (value == null) {
writeBoolean(false);
} else {
writeBoolean(true);
writeString(value.toString());
}
}
private class EncoderStream extends OutputStream {
@Override
public void write(byte[] buffer) throws IOException {
writeBytes(buffer);
}
@Override
public void write(byte[] buffer, int offset, int length) throws IOException {
writeBytes(buffer, offset, length);
}
@Override
public void write(int b) throws IOException {
writeByte((byte) b);
}
}
}

40
test/random_access/src/main/java/seaweedfs/client/btree/serialize/AbstractSerializer.java

@ -0,0 +1,40 @@
/*
* Copyright 2016 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import com.google.common.base.Objects;
/**
* This abstract class provide a sensible default implementation for {@code Serializer} equality. This equality
* implementation is required to enable cache instance reuse within the same Gradle runtime. Serializers are used
* as cache parameter which need to be compared to determine compatible cache.
*/
public abstract class AbstractSerializer<T> implements Serializer<T> {
@Override
public boolean equals(Object obj) {
if (obj == null) {
return false;
}
return Objects.equal(obj.getClass(), getClass());
}
@Override
public int hashCode() {
return Objects.hashCode(getClass());
}
}

79
test/random_access/src/main/java/seaweedfs/client/btree/serialize/Cast.java

@ -0,0 +1,79 @@
/*
* Copyright 2012 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import javax.annotation.Nullable;
public abstract class Cast {
/**
* Casts the given object to the given type, providing a better error message than the default.
*
* The standard {@link Class#cast(Object)} method produces unsatisfactory error messages on some platforms
* when it fails. All this method does is provide a better, consistent, error message.
*
* This should be used whenever there is a chance the cast could fail. If in doubt, use this.
*
* @param outputType The type to cast the input to
* @param object The object to be cast (must not be {@code null})
* @param <O> The type to be cast to
* @param <I> The type of the object to be vast
* @return The input object, cast to the output type
*/
public static <O, I> O cast(Class<O> outputType, I object) {
try {
return outputType.cast(object);
} catch (ClassCastException e) {
throw new ClassCastException(String.format(
"Failed to cast object %s of type %s to target type %s", object, object.getClass().getName(), outputType.getName()
));
}
}
/**
* Casts the given object to the given type, providing a better error message than the default.
*
* The standard {@link Class#cast(Object)} method produces unsatisfactory error messages on some platforms
* when it fails. All this method does is provide a better, consistent, error message.
*
* This should be used whenever there is a chance the cast could fail. If in doubt, use this.
*
* @param outputType The type to cast the input to
* @param object The object to be cast
* @param <O> The type to be cast to
* @param <I> The type of the object to be vast
* @return The input object, cast to the output type
*/
@Nullable
public static <O, I> O castNullable(Class<O> outputType, @Nullable I object) {
if (object == null) {
return null;
}
return cast(outputType, object);
}
@SuppressWarnings("unchecked")
@Nullable
public static <T> T uncheckedCast(@Nullable Object object) {
return (T) object;
}
@SuppressWarnings("unchecked")
public static <T> T uncheckedNonnullCast(Object object) {
return (T) object;
}
}

43
test/random_access/src/main/java/seaweedfs/client/btree/serialize/ClassLoaderObjectInputStream.java

@ -0,0 +1,43 @@
/*
* Copyright 2010 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectStreamClass;
public class ClassLoaderObjectInputStream extends ObjectInputStream {
private final ClassLoader loader;
public ClassLoaderObjectInputStream(InputStream in, ClassLoader loader) throws IOException {
super(in);
this.loader = loader;
}
public ClassLoader getClassLoader() {
return loader;
}
@Override
protected Class<?> resolveClass(ObjectStreamClass desc) throws IOException, ClassNotFoundException {
try {
return Class.forName(desc.getName(), false, loader);
} catch (ClassNotFoundException e) {
return super.resolveClass(desc);
}
}
}

140
test/random_access/src/main/java/seaweedfs/client/btree/serialize/Decoder.java

@ -0,0 +1,140 @@
/*
* Copyright 2013 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import javax.annotation.Nullable;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
/**
* Provides a way to decode structured data from a backing byte stream. Implementations may buffer incoming bytes read
* from the backing stream prior to decoding.
*/
public interface Decoder {
/**
* Returns an InputStream which can be used to read raw bytes.
*/
InputStream getInputStream();
/**
* Reads a signed 64 bit long value. Can read any value that was written using {@link Encoder#writeLong(long)}.
*
* @throws EOFException when the end of the byte stream is reached before the long value can be fully read.
*/
long readLong() throws EOFException, IOException;
/**
* Reads a signed 64 bit int value. Can read any value that was written using {@link Encoder#writeSmallLong(long)}.
*
* @throws EOFException when the end of the byte stream is reached before the int value can be fully read.
*/
long readSmallLong() throws EOFException, IOException;
/**
* Reads a signed 32 bit int value. Can read any value that was written using {@link Encoder#writeInt(int)}.
*
* @throws EOFException when the end of the byte stream is reached before the int value can be fully read.
*/
int readInt() throws EOFException, IOException;
/**
* Reads a signed 32 bit int value. Can read any value that was written using {@link Encoder#writeSmallInt(int)}.
*
* @throws EOFException when the end of the byte stream is reached before the int value can be fully read.
*/
int readSmallInt() throws EOFException, IOException;
/**
* Reads a nullable signed 32 bit int value.
*
* @see #readSmallInt()
*/
@Nullable
Integer readNullableSmallInt() throws EOFException, IOException;
/**
* Reads a boolean value. Can read any value that was written using {@link Encoder#writeBoolean(boolean)}.
*
* @throws EOFException when the end of the byte stream is reached before the boolean value can be fully read.
*/
boolean readBoolean() throws EOFException, IOException;
/**
* Reads a non-null string value. Can read any value that was written using {@link Encoder#writeString(CharSequence)}.
*
* @throws EOFException when the end of the byte stream is reached before the string can be fully read.
*/
String readString() throws EOFException, IOException;
/**
* Reads a nullable string value. Can reads any value that was written using {@link Encoder#writeNullableString(CharSequence)}.
*
* @throws EOFException when the end of the byte stream is reached before the string can be fully read.
*/
@Nullable
String readNullableString() throws EOFException, IOException;
/**
* Reads a byte value. Can read any byte value that was written using one of the raw byte methods on {@link Encoder}, such as {@link Encoder#writeByte(byte)} or {@link Encoder#getOutputStream()}
*
* @throws EOFException when the end of the byte stream is reached.
*/
byte readByte() throws EOFException, IOException;
/**
* Reads bytes into the given buffer, filling the buffer. Can read any byte values that were written using one of the raw byte methods on {@link Encoder}, such as {@link
* Encoder#writeBytes(byte[])} or {@link Encoder#getOutputStream()}
*
* @throws EOFException when the end of the byte stream is reached before the buffer is full.
*/
void readBytes(byte[] buffer) throws EOFException, IOException;
/**
* Reads the specified number of bytes into the given buffer. Can read any byte values that were written using one of the raw byte methods on {@link Encoder}, such as {@link
* Encoder#writeBytes(byte[])} or {@link Encoder#getOutputStream()}
*
* @throws EOFException when the end of the byte stream is reached before the specified number of bytes were read.
*/
void readBytes(byte[] buffer, int offset, int count) throws EOFException, IOException;
/**
* Reads a byte array. Can read any byte array written using {@link Encoder#writeBinary(byte[])} or {@link Encoder#writeBinary(byte[], int, int)}.
*
* @throws EOFException when the end of the byte stream is reached before the byte array was fully read.
*/
byte[] readBinary() throws EOFException, IOException;
/**
* Skips the given number of bytes. Can skip over any byte values that were written using one of the raw byte methods on {@link Encoder}.
*/
void skipBytes(long count) throws EOFException, IOException;
/**
* Reads a byte stream written using {@link Encoder#encodeChunked(Encoder.EncodeAction)}.
*/
<T> T decodeChunked(DecodeAction<Decoder, T> decodeAction) throws EOFException, Exception;
/**
* Skips over a byte stream written using {@link Encoder#encodeChunked(Encoder.EncodeAction)}, discarding its content.
*/
void skipChunked() throws EOFException, IOException;
interface DecodeAction<IN, OUT> {
OUT read(IN source) throws Exception;
}
}

73
test/random_access/src/main/java/seaweedfs/client/btree/serialize/DefaultSerializer.java

@ -0,0 +1,73 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import com.google.common.base.Objects;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.io.StreamCorruptedException;
public class DefaultSerializer<T> extends AbstractSerializer<T> {
private ClassLoader classLoader;
public DefaultSerializer() {
classLoader = getClass().getClassLoader();
}
public DefaultSerializer(ClassLoader classLoader) {
this.classLoader = classLoader != null ? classLoader : getClass().getClassLoader();
}
public ClassLoader getClassLoader() {
return classLoader;
}
public void setClassLoader(ClassLoader classLoader) {
this.classLoader = classLoader;
}
@Override
public T read(Decoder decoder) throws Exception {
try {
return Cast.uncheckedNonnullCast(new ClassLoaderObjectInputStream(decoder.getInputStream(), classLoader).readObject());
} catch (StreamCorruptedException e) {
return null;
}
}
@Override
public void write(Encoder encoder, T value) throws IOException {
ObjectOutputStream objectStr = new ObjectOutputStream(encoder.getOutputStream());
objectStr.writeObject(value);
objectStr.flush();
}
@Override
public boolean equals(Object obj) {
if (!super.equals(obj)) {
return false;
}
DefaultSerializer<?> rhs = (DefaultSerializer<?>) obj;
return Objects.equal(classLoader, rhs.classLoader);
}
@Override
public int hashCode() {
return Objects.hashCode(super.hashCode(), classLoader);
}
}

110
test/random_access/src/main/java/seaweedfs/client/btree/serialize/Encoder.java

@ -0,0 +1,110 @@
/*
* Copyright 2013 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import javax.annotation.Nullable;
import java.io.IOException;
import java.io.OutputStream;
/**
* Provides a way to encode structured data to a backing byte stream. Implementations may buffer outgoing encoded bytes prior
* to writing to the backing byte stream.
*/
public interface Encoder {
/**
* Returns an {@link OutputStream) that can be used to write raw bytes to the stream.
*/
OutputStream getOutputStream();
/**
* Writes a raw byte value to the stream.
*/
void writeByte(byte value) throws IOException;
/**
* Writes the given raw bytes to the stream. Does not encode any length information.
*/
void writeBytes(byte[] bytes) throws IOException;
/**
* Writes the given raw bytes to the stream. Does not encode any length information.
*/
void writeBytes(byte[] bytes, int offset, int count) throws IOException;
/**
* Writes the given byte array to the stream. Encodes the bytes and length information.
*/
void writeBinary(byte[] bytes) throws IOException;
/**
* Writes the given byte array to the stream. Encodes the bytes and length information.
*/
void writeBinary(byte[] bytes, int offset, int count) throws IOException;
/**
* Appends an encoded stream to this stream. Encodes the stream as a series of chunks with length information.
*/
void encodeChunked(EncodeAction<Encoder> writeAction) throws Exception;
/**
* Writes a signed 64 bit long value. The implementation may encode the value as a variable number of bytes, not necessarily as 8 bytes.
*/
void writeLong(long value) throws IOException;
/**
* Writes a signed 64 bit long value whose value is likely to be small and positive but may not be. The implementation may encode the value in a way that is more efficient for small positive
* values.
*/
void writeSmallLong(long value) throws IOException;
/**
* Writes a signed 32 bit int value. The implementation may encode the value as a variable number of bytes, not necessarily as 4 bytes.
*/
void writeInt(int value) throws IOException;
/**
* Writes a signed 32 bit int value whose value is likely to be small and positive but may not be. The implementation may encode the value in a way that
* is more efficient for small positive values.
*/
void writeSmallInt(int value) throws IOException;
/**
* Writes a nullable signed 32 bit int value whose value is likely to be small and positive but may not be.
*
* @see #writeSmallInt(int)
*/
void writeNullableSmallInt(@Nullable Integer value) throws IOException;
/**
* Writes a boolean value.
*/
void writeBoolean(boolean value) throws IOException;
/**
* Writes a non-null string value.
*/
void writeString(CharSequence value) throws IOException;
/**
* Writes a nullable string value.
*/
void writeNullableString(@Nullable CharSequence value) throws IOException;
interface EncodeAction<T> {
void write(T target) throws Exception;
}
}

31
test/random_access/src/main/java/seaweedfs/client/btree/serialize/FlushableEncoder.java

@ -0,0 +1,31 @@
/*
* Copyright 2013 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import java.io.Flushable;
import java.io.IOException;
/**
* Represents an {@link Encoder} that buffers encoded data prior to writing to the backing stream.
*/
public interface FlushableEncoder extends Encoder, Flushable {
/**
* Ensures that all buffered data has been written to the backing stream. Does not flush the backing stream.
*/
@Override
void flush() throws IOException;
}

28
test/random_access/src/main/java/seaweedfs/client/btree/serialize/ObjectReader.java

@ -0,0 +1,28 @@
/*
* Copyright 2012 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import java.io.EOFException;
public interface ObjectReader<T> {
/**
* Reads the next object from the stream.
*
* @throws EOFException When the next object cannot be fully read due to reaching the end of stream.
*/
T read() throws EOFException, Exception;
}

21
test/random_access/src/main/java/seaweedfs/client/btree/serialize/ObjectWriter.java

@ -0,0 +1,21 @@
/*
* Copyright 2012 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
public interface ObjectWriter<T> {
void write(T value) throws Exception;
}

33
test/random_access/src/main/java/seaweedfs/client/btree/serialize/Serializer.java

@ -0,0 +1,33 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import java.io.EOFException;
public interface Serializer<T> {
/**
* Reads the next object from the given stream. The implementation must not perform any buffering, so that it reads only those bytes from the input stream that are
* required to deserialize the next object.
*
* @throws EOFException When the next object cannot be fully read due to reaching the end of stream.
*/
T read(Decoder decoder) throws EOFException, Exception;
/**
* Writes the given object to the given stream. The implementation must not perform any buffering.
*/
void write(Encoder encoder, T value) throws Exception;
}

33
test/random_access/src/main/java/seaweedfs/client/btree/serialize/StatefulSerializer.java

@ -0,0 +1,33 @@
/*
* Copyright 2012 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
/**
* Implementations must allow concurrent reading and writing, so that a thread can read and a thread can write at the same time.
* Implementations do not need to support multiple read threads or multiple write threads.
*/
public interface StatefulSerializer<T> {
/**
* Should not perform any buffering
*/
ObjectReader<T> newReader(Decoder decoder);
/**
* Should not perform any buffering
*/
ObjectWriter<T> newWriter(Encoder encoder);
}

210
test/random_access/src/main/java/seaweedfs/client/btree/serialize/kryo/KryoBackedDecoder.java

@ -0,0 +1,210 @@
/*
* Copyright 2013 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize.kryo;
import com.esotericsoftware.kryo.KryoException;
import com.esotericsoftware.kryo.io.Input;
import seaweedfs.client.btree.serialize.AbstractDecoder;
import seaweedfs.client.btree.serialize.Decoder;
import java.io.Closeable;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
/**
* Note that this decoder uses buffering, so will attempt to read beyond the end of the encoded data. This means you should use this type only when this decoder will be used to decode the entire
* stream.
*/
public class KryoBackedDecoder extends AbstractDecoder implements Decoder, Closeable {
private final Input input;
private final InputStream inputStream;
private long extraSkipped;
private KryoBackedDecoder nested;
public KryoBackedDecoder(InputStream inputStream) {
this(inputStream, 4096);
}
public KryoBackedDecoder(InputStream inputStream, int bufferSize) {
this.inputStream = inputStream;
input = new Input(this.inputStream, bufferSize);
}
@Override
protected int maybeReadBytes(byte[] buffer, int offset, int count) {
return input.read(buffer, offset, count);
}
@Override
protected long maybeSkip(long count) throws IOException {
// Work around some bugs in Input.skip()
int remaining = input.limit() - input.position();
if (remaining == 0) {
long skipped = inputStream.skip(count);
if (skipped > 0) {
extraSkipped += skipped;
}
return skipped;
} else if (count <= remaining) {
input.setPosition(input.position() + (int) count);
return count;
} else {
input.setPosition(input.limit());
return remaining;
}
}
private RuntimeException maybeEndOfStream(KryoException e) throws EOFException {
if (e.getMessage().equals("Buffer underflow.")) {
throw (EOFException) (new EOFException().initCause(e));
}
throw e;
}
@Override
public byte readByte() throws EOFException {
try {
return input.readByte();
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public void readBytes(byte[] buffer, int offset, int count) throws EOFException {
try {
input.readBytes(buffer, offset, count);
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public long readLong() throws EOFException {
try {
return input.readLong();
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public long readSmallLong() throws EOFException, IOException {
try {
return input.readLong(true);
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public int readInt() throws EOFException {
try {
return input.readInt();
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public int readSmallInt() throws EOFException {
try {
return input.readInt(true);
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public boolean readBoolean() throws EOFException {
try {
return input.readBoolean();
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public String readString() throws EOFException {
return readNullableString();
}
@Override
public String readNullableString() throws EOFException {
try {
return input.readString();
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public void skipChunked() throws EOFException, IOException {
while (true) {
int count = readSmallInt();
if (count == 0) {
break;
}
skipBytes(count);
}
}
@Override
public <T> T decodeChunked(DecodeAction<Decoder, T> decodeAction) throws EOFException, Exception {
if (nested == null) {
nested = new KryoBackedDecoder(new InputStream() {
@Override
public int read() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int read(byte[] buffer, int offset, int length) throws IOException {
int count = readSmallInt();
if (count == 0) {
// End of stream has been reached
return -1;
}
if (count > length) {
// For now, assume same size buffers used to read and write
throw new UnsupportedOperationException();
}
readBytes(buffer, offset, count);
return count;
}
});
}
T value = decodeAction.read(nested);
if (readSmallInt() != 0) {
throw new IllegalStateException("Expecting the end of nested stream.");
}
return value;
}
/**
* Returns the total number of bytes consumed by this decoder. Some additional bytes may also be buffered by this decoder but have not been consumed.
*/
public long getReadPosition() {
return input.total() + extraSkipped;
}
@Override
public void close() throws IOException {
input.close();
}
}

134
test/random_access/src/main/java/seaweedfs/client/btree/serialize/kryo/KryoBackedEncoder.java

@ -0,0 +1,134 @@
/*
* Copyright 2013 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize.kryo;
import com.esotericsoftware.kryo.io.Output;
import seaweedfs.client.btree.serialize.AbstractEncoder;
import seaweedfs.client.btree.serialize.Encoder;
import seaweedfs.client.btree.serialize.FlushableEncoder;
import javax.annotation.Nullable;
import java.io.Closeable;
import java.io.IOException;
import java.io.OutputStream;
public class KryoBackedEncoder extends AbstractEncoder implements FlushableEncoder, Closeable {
private final Output output;
private KryoBackedEncoder nested;
public KryoBackedEncoder(OutputStream outputStream) {
this(outputStream, 4096);
}
public KryoBackedEncoder(OutputStream outputStream, int bufferSize) {
output = new Output(outputStream, bufferSize);
}
@Override
public void writeByte(byte value) {
output.writeByte(value);
}
@Override
public void writeBytes(byte[] bytes, int offset, int count) {
output.writeBytes(bytes, offset, count);
}
@Override
public void writeLong(long value) {
output.writeLong(value);
}
@Override
public void writeSmallLong(long value) {
output.writeLong(value, true);
}
@Override
public void writeInt(int value) {
output.writeInt(value);
}
@Override
public void writeSmallInt(int value) {
output.writeInt(value, true);
}
@Override
public void writeBoolean(boolean value) {
output.writeBoolean(value);
}
@Override
public void writeString(CharSequence value) {
if (value == null) {
throw new IllegalArgumentException("Cannot encode a null string.");
}
output.writeString(value);
}
@Override
public void writeNullableString(@Nullable CharSequence value) {
output.writeString(value);
}
@Override
public void encodeChunked(EncodeAction<Encoder> writeAction) throws Exception {
if (nested == null) {
nested = new KryoBackedEncoder(new OutputStream() {
@Override
public void write(byte[] buffer, int offset, int length) {
if (length == 0) {
return;
}
writeSmallInt(length);
writeBytes(buffer, offset, length);
}
@Override
public void write(byte[] buffer) throws IOException {
write(buffer, 0, buffer.length);
}
@Override
public void write(int b) {
throw new UnsupportedOperationException();
}
});
}
writeAction.write(nested);
nested.flush();
writeSmallInt(0);
}
/**
* Returns the total number of bytes written by this encoder, some of which may still be buffered.
*/
public long getWritePosition() {
return output.total();
}
@Override
public void flush() {
output.flush();
}
@Override
public void close() {
output.close();
}
}

188
test/random_access/src/main/java/seaweedfs/client/btree/serialize/kryo/StringDeduplicatingKryoBackedDecoder.java

@ -0,0 +1,188 @@
/*
* Copyright 2018 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize.kryo;
import com.esotericsoftware.kryo.KryoException;
import com.esotericsoftware.kryo.io.Input;
import seaweedfs.client.btree.serialize.AbstractDecoder;
import seaweedfs.client.btree.serialize.Decoder;
import java.io.Closeable;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
/**
* Note that this decoder uses buffering, so will attempt to read beyond the end of the encoded data. This means you should use this type only when this decoder will be used to decode the entire
* stream.
*/
public class StringDeduplicatingKryoBackedDecoder extends AbstractDecoder implements Decoder, Closeable {
public static final int INITIAL_CAPACITY = 32;
private final Input input;
private final InputStream inputStream;
private String[] strings;
private long extraSkipped;
public StringDeduplicatingKryoBackedDecoder(InputStream inputStream) {
this(inputStream, 4096);
}
public StringDeduplicatingKryoBackedDecoder(InputStream inputStream, int bufferSize) {
this.inputStream = inputStream;
input = new Input(this.inputStream, bufferSize);
}
@Override
protected int maybeReadBytes(byte[] buffer, int offset, int count) {
return input.read(buffer, offset, count);
}
@Override
protected long maybeSkip(long count) throws IOException {
// Work around some bugs in Input.skip()
int remaining = input.limit() - input.position();
if (remaining == 0) {
long skipped = inputStream.skip(count);
if (skipped > 0) {
extraSkipped += skipped;
}
return skipped;
} else if (count <= remaining) {
input.setPosition(input.position() + (int) count);
return count;
} else {
input.setPosition(input.limit());
return remaining;
}
}
private RuntimeException maybeEndOfStream(KryoException e) throws EOFException {
if (e.getMessage().equals("Buffer underflow.")) {
throw (EOFException) (new EOFException().initCause(e));
}
throw e;
}
@Override
public byte readByte() throws EOFException {
try {
return input.readByte();
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public void readBytes(byte[] buffer, int offset, int count) throws EOFException {
try {
input.readBytes(buffer, offset, count);
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public long readLong() throws EOFException {
try {
return input.readLong();
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public long readSmallLong() throws EOFException, IOException {
try {
return input.readLong(true);
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public int readInt() throws EOFException {
try {
return input.readInt();
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public int readSmallInt() throws EOFException {
try {
return input.readInt(true);
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public boolean readBoolean() throws EOFException {
try {
return input.readBoolean();
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public String readString() throws EOFException {
return readNullableString();
}
@Override
public String readNullableString() throws EOFException {
try {
int idx = readInt();
if (idx == -1) {
return null;
}
if (strings == null) {
strings = new String[INITIAL_CAPACITY];
}
String string = null;
if (idx >= strings.length) {
String[] grow = new String[strings.length * 3 / 2];
System.arraycopy(strings, 0, grow, 0, strings.length);
strings = grow;
} else {
string = strings[idx];
}
if (string == null) {
string = input.readString();
strings[idx] = string;
}
return string;
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
/**
* Returns the total number of bytes consumed by this decoder. Some additional bytes may also be buffered by this decoder but have not been consumed.
*/
public long getReadPosition() {
return input.total() + extraSkipped;
}
@Override
public void close() throws IOException {
strings = null;
input.close();
}
}

128
test/random_access/src/main/java/seaweedfs/client/btree/serialize/kryo/StringDeduplicatingKryoBackedEncoder.java

@ -0,0 +1,128 @@
/*
* Copyright 2018 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize.kryo;
import com.esotericsoftware.kryo.io.Output;
import com.google.common.collect.Maps;
import seaweedfs.client.btree.serialize.AbstractEncoder;
import seaweedfs.client.btree.serialize.FlushableEncoder;
import javax.annotation.Nullable;
import java.io.Closeable;
import java.io.OutputStream;
import java.util.Map;
public class StringDeduplicatingKryoBackedEncoder extends AbstractEncoder implements FlushableEncoder, Closeable {
private Map<String, Integer> strings;
private final Output output;
public StringDeduplicatingKryoBackedEncoder(OutputStream outputStream) {
this(outputStream, 4096);
}
public StringDeduplicatingKryoBackedEncoder(OutputStream outputStream, int bufferSize) {
output = new Output(outputStream, bufferSize);
}
@Override
public void writeByte(byte value) {
output.writeByte(value);
}
@Override
public void writeBytes(byte[] bytes, int offset, int count) {
output.writeBytes(bytes, offset, count);
}
@Override
public void writeLong(long value) {
output.writeLong(value);
}
@Override
public void writeSmallLong(long value) {
output.writeLong(value, true);
}
@Override
public void writeInt(int value) {
output.writeInt(value);
}
@Override
public void writeSmallInt(int value) {
output.writeInt(value, true);
}
@Override
public void writeBoolean(boolean value) {
output.writeBoolean(value);
}
@Override
public void writeString(CharSequence value) {
if (value == null) {
throw new IllegalArgumentException("Cannot encode a null string.");
}
writeNullableString(value);
}
@Override
public void writeNullableString(@Nullable CharSequence value) {
if (value == null) {
output.writeInt(-1);
return;
} else {
if (strings == null) {
strings = Maps.newHashMapWithExpectedSize(1024);
}
}
String key = value.toString();
Integer index = strings.get(key);
if (index == null) {
index = strings.size();
output.writeInt(index);
strings.put(key, index);
output.writeString(key);
} else {
output.writeInt(index);
}
}
/**
* Returns the total number of bytes written by this encoder, some of which may still be buffered.
*/
public long getWritePosition() {
return output.total();
}
@Override
public void flush() {
output.flush();
}
@Override
public void close() {
output.close();
}
public void done() {
strings = null;
}
}

51
test/random_access/src/main/java/seaweedfs/client/btree/serialize/kryo/TypeSafeSerializer.java

@ -0,0 +1,51 @@
/*
* Copyright 2012 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize.kryo;
import seaweedfs.client.btree.serialize.*;
public class TypeSafeSerializer<T> implements StatefulSerializer<Object> {
private final Class<T> type;
private final StatefulSerializer<T> serializer;
public TypeSafeSerializer(Class<T> type, StatefulSerializer<T> serializer) {
this.type = type;
this.serializer = serializer;
}
@Override
public ObjectReader<Object> newReader(Decoder decoder) {
final ObjectReader<T> reader = serializer.newReader(decoder);
return new ObjectReader<Object>() {
@Override
public Object read() throws Exception {
return reader.read();
}
};
}
@Override
public ObjectWriter<Object> newWriter(Encoder encoder) {
final ObjectWriter<T> writer = serializer.newWriter(encoder);
return new ObjectWriter<Object>() {
@Override
public void write(Object value) throws Exception {
writer.write(type.cast(value));
}
};
}
}

143
test/random_access/src/test/java/seaewedfs/mmap/MmapFileTest.java

@ -0,0 +1,143 @@
package seaewedfs.mmap;
import org.junit.Test;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
public class MmapFileTest {
File dir = new File("/Users/chris/tmp/mm/dev");
@Test
public void testMmap() {
try {
System.out.println("starting ...");
File f = new File(dir, "mmap_file.txt");
RandomAccessFile raf = new RandomAccessFile(f, "rw");
FileChannel fc = raf.getChannel();
MappedByteBuffer mbf = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size());
fc.close();
raf.close();
FileOutputStream fos = new FileOutputStream(f);
fos.write("abcdefg".getBytes());
fos.close();
System.out.println("completed!");
} catch (Exception e) {
e.printStackTrace();
}
}
@Test
public void testBigMmap() throws IOException {
/*
// new file
I0817 09:48:02 25175 dir.go:147] create /dev/mmap_big.txt: OpenReadWrite+OpenCreate
I0817 09:48:02 25175 wfs.go:116] AcquireHandle /dev/mmap_big.txt uid=502 gid=20
I0817 09:48:02 25175 file.go:62] file Attr /dev/mmap_big.txt, open:1, size: 0
I0817 09:48:02 25175 meta_cache_subscribe.go:32] creating /dev/mmap_big.txt
//get channel
I0817 09:48:26 25175 file.go:62] file Attr /dev/mmap_big.txt, open:1, size: 0
I0817 09:48:32 25175 file.go:62] file Attr /dev/mmap_big.txt, open:1, size: 0
I0817 09:48:32 25175 wfs.go:116] AcquireHandle /dev/mmap_big.txt uid=0 gid=0
I0817 09:48:32 25175 filehandle.go:160] Release /dev/mmap_big.txt fh 14968871991130164560
//fileChannel.map
I0817 09:49:18 25175 file.go:62] file Attr /dev/mmap_big.txt, open:1, size: 0
I0817 09:49:18 25175 file.go:112] /dev/mmap_big.txt file setattr set size=262144 chunks=0
I0817 09:49:18 25175 file.go:62] file Attr /dev/mmap_big.txt, open:1, size: 262144
I0817 09:49:18 25175 file.go:62] file Attr /dev/mmap_big.txt, open:1, size: 262144
I0817 09:49:18 25175 file.go:62] file Attr /dev/mmap_big.txt, open:1, size: 262144
// buffer.put
I0817 09:49:49 25175 filehandle.go:57] /dev/mmap_big.txt read fh 14968871991130164560: [0,32768) size 32768 resp.Data len=0 cap=32768
I0817 09:49:49 25175 reader_at.go:113] zero2 [0,32768)
I0817 09:49:50 25175 file.go:62] file Attr /dev/mmap_big.txt, open:1, size: 262144
I0817 09:49:53 25175 file.go:233] /dev/mmap_big.txt fsync file Fsync [ID=0x4 Node=0xe Uid=0 Gid=0 Pid=0] Handle 0x2 Flags 1
//close
I0817 09:50:14 25175 file.go:62] file Attr /dev/mmap_big.txt, open:1, size: 262144
I0817 09:50:14 25175 dirty_page.go:130] saveToStorage /dev/mmap_big.txt 1,315b69812039e5 [0,4096) of 262144 bytes
I0817 09:50:14 25175 file.go:274] /dev/mmap_big.txt existing 0 chunks adds 1 more
I0817 09:50:14 25175 filehandle.go:218] /dev/mmap_big.txt set chunks: 1
I0817 09:50:14 25175 filehandle.go:220] /dev/mmap_big.txt chunks 0: 1,315b69812039e5 [0,4096)
I0817 09:50:14 25175 meta_cache_subscribe.go:23] deleting /dev/mmap_big.txt
I0817 09:50:14 25175 meta_cache_subscribe.go:32] creating /dev/mmap_big.txt
// end of test
I0817 09:50:41 25175 file.go:62] file Attr /dev/mmap_big.txt, open:1, size: 262144
I0817 09:50:41 25175 filehandle.go:160] Release /dev/mmap_big.txt fh 14968871991130164560
*/
// Create file object
File file = new File(dir, "mmap_big.txt");
try (RandomAccessFile randomAccessFile = new RandomAccessFile(file, "rw")) {
// Get file channel in read-write mode
FileChannel fileChannel = randomAccessFile.getChannel();
// Get direct byte buffer access using channel.map() operation
MappedByteBuffer buffer = fileChannel.map(FileChannel.MapMode.READ_WRITE, 0, 4096 * 8 * 8);
//Write the content using put methods
buffer.put("howtodoinjava.com".getBytes());
}
/*
> meta.cat /dev/mmap_big.txt
{
"name": "mmap_big.txt",
"isDirectory": false,
"chunks": [
{
"fileId": "1,315b69812039e5",
"offset": "0",
"size": "4096",
"mtime": "1597683014026365000",
"eTag": "985ab0ac",
"sourceFileId": "",
"fid": {
"volumeId": 1,
"fileKey": "3234665",
"cookie": 2166372837
},
"sourceFid": null,
"cipherKey": null,
"isCompressed": true,
"isChunkManifest": false
}
],
"attributes": {
"fileSize": "262144",
"mtime": "1597683014",
"fileMode": 420,
"uid": 502,
"gid": 20,
"crtime": "1597682882",
"mime": "application/octet-stream",
"replication": "",
"collection": "",
"ttlSec": 0,
"userName": "",
"groupName": [
],
"symlinkTarget": "",
"md5": null
},
"extended": {
}
}
*/
}
}

476
test/random_access/src/test/java/seaweedfs/client/btree/BTreePersistentIndexedCacheTest.java

@ -0,0 +1,476 @@
/*
* Copyright 2010 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import seaweedfs.client.btree.serialize.DefaultSerializer;
import seaweedfs.client.btree.serialize.Serializer;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import static org.hamcrest.CoreMatchers.*;
import static org.junit.Assert.assertNull;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.Assert.assertTrue;
public class BTreePersistentIndexedCacheTest {
private final Serializer<String> stringSerializer = new DefaultSerializer<String>();
private final Serializer<Integer> integerSerializer = new DefaultSerializer<Integer>();
private BTreePersistentIndexedCache<String, Integer> cache;
private File cacheFile;
@Before
public void setup() {
cacheFile = tmpDirFile("cache.bin");
}
public File tmpDirFile(String filename) {
File f = new File("/Users/chris/tmp/mm/dev/btree_test");
// File f = new File("/tmp/btree_test");
f.mkdirs();
return new File(f, filename);
}
private void createCache() {
cache = new BTreePersistentIndexedCache<String, Integer>(cacheFile, stringSerializer, integerSerializer, (short) 4, 100);
}
private void verifyAndCloseCache() {
cache.verify();
cache.close();
}
@Test
public void getReturnsNullWhenEntryDoesNotExist() {
createCache();
assertNull(cache.get("unknown"));
verifyAndCloseCache();
}
@Test
public void persistsAddedEntries() {
createCache();
checkAdds(1, 2, 3, 4, 5);
verifyAndCloseCache();
}
@Test
public void persistsAddedEntriesInReverseOrder() {
createCache();
checkAdds(5, 4, 3, 2, 1);
verifyAndCloseCache();
}
@Test
public void persistsAddedEntriesOverMultipleIndexBlocks() {
createCache();
checkAdds(3, 2, 11, 5, 7, 1, 10, 8, 9, 4, 6, 0);
verifyAndCloseCache();
}
@Test
public void persistsUpdates() {
createCache();
checkUpdates(3, 2, 11, 5, 7, 1, 10, 8, 9, 4, 6, 0);
verifyAndCloseCache();
}
@Test
public void handlesUpdatesWhenBlockSizeDecreases() {
BTreePersistentIndexedCache<String, List<Integer>> cache =
new BTreePersistentIndexedCache<String, List<Integer>>(
tmpDirFile("listcache.bin"), stringSerializer,
new DefaultSerializer<List<Integer>>(), (short) 4, 100);
List<Integer> values = Arrays.asList(3, 2, 11, 5, 7, 1, 10, 8, 9, 4, 6, 0);
Map<Integer, List<Integer>> updated = new LinkedHashMap<Integer, List<Integer>>();
for (int i = 10; i > 0; i--) {
for (Integer value : values) {
String key = String.format("key_%d", value);
List<Integer> newValue = new ArrayList<Integer>(i);
for (int j = 0; j < i * 2; j++) {
newValue.add(j);
}
cache.put(key, newValue);
updated.put(value, newValue);
}
checkListEntries(cache, updated);
}
cache.reset();
checkListEntries(cache, updated);
cache.verify();
cache.close();
}
private void checkListEntries(BTreePersistentIndexedCache<String, List<Integer>> cache, Map<Integer, List<Integer>> updated) {
for (Map.Entry<Integer, List<Integer>> entry : updated.entrySet()) {
String key = String.format("key_%d", entry.getKey());
assertThat(cache.get(key), equalTo(entry.getValue()));
}
}
@Test
public void handlesUpdatesWhenBlockSizeIncreases() {
BTreePersistentIndexedCache<String, List<Integer>> cache =
new BTreePersistentIndexedCache<String, List<Integer>>(
tmpDirFile("listcache.bin"), stringSerializer,
new DefaultSerializer<List<Integer>>(), (short) 4, 100);
List<Integer> values = Arrays.asList(3, 2, 11, 5, 7, 1, 10, 8, 9, 4, 6, 0);
Map<Integer, List<Integer>> updated = new LinkedHashMap<Integer, List<Integer>>();
for (int i = 1; i < 10; i++) {
for (Integer value : values) {
String key = String.format("key_%d", value);
List<Integer> newValue = new ArrayList<Integer>(i);
for (int j = 0; j < i * 2; j++) {
newValue.add(j);
}
cache.put(key, newValue);
updated.put(value, newValue);
}
checkListEntries(cache, updated);
}
cache.reset();
checkListEntries(cache, updated);
cache.verify();
cache.close();
}
@Test
public void persistsAddedEntriesAfterReopen() {
createCache();
checkAdds(1, 2, 3, 4);
cache.reset();
checkAdds(5, 6, 7, 8);
verifyAndCloseCache();
}
@Test
public void persistsReplacedEntries() {
createCache();
cache.put("key_1", 1);
cache.put("key_2", 2);
cache.put("key_3", 3);
cache.put("key_4", 4);
cache.put("key_5", 5);
cache.put("key_1", 1);
cache.put("key_4", 12);
assertThat(cache.get("key_1"), equalTo(1));
assertThat(cache.get("key_2"), equalTo(2));
assertThat(cache.get("key_3"), equalTo(3));
assertThat(cache.get("key_4"), equalTo(12));
assertThat(cache.get("key_5"), equalTo(5));
cache.reset();
assertThat(cache.get("key_1"), equalTo(1));
assertThat(cache.get("key_2"), equalTo(2));
assertThat(cache.get("key_3"), equalTo(3));
assertThat(cache.get("key_4"), equalTo(12));
assertThat(cache.get("key_5"), equalTo(5));
verifyAndCloseCache();
}
@Test
public void reusesEmptySpaceWhenPuttingEntries() {
BTreePersistentIndexedCache<String, String> cache = new BTreePersistentIndexedCache<String, String>(cacheFile, stringSerializer, stringSerializer, (short) 4, 100);
long beforeLen = cacheFile.length();
if (beforeLen>0){
System.out.println(String.format("cache %s: %s", "key_new", cache.get("key_new")));
}
cache.put("key_1", "abcd");
cache.put("key_2", "abcd");
cache.put("key_3", "abcd");
cache.put("key_4", "abcd");
cache.put("key_5", "abcd");
long len = cacheFile.length();
assertTrue(len > 0L);
System.out.println(String.format("cache file size %d => %d", beforeLen, len));
cache.put("key_1", "1234");
assertThat(cacheFile.length(), equalTo(len));
cache.remove("key_1");
cache.put("key_new", "a1b2");
assertThat(cacheFile.length(), equalTo(len));
cache.put("key_new", "longer value assertThat(cacheFile.length(), equalTo(len))");
System.out.println(String.format("cache file size %d beforeLen %d", cacheFile.length(), len));
// assertTrue(cacheFile.length() > len);
len = cacheFile.length();
cache.put("key_1", "1234");
assertThat(cacheFile.length(), equalTo(len));
cache.close();
}
@Test
public void canHandleLargeNumberOfEntries() {
createCache();
int count = 2000;
List<Integer> values = new ArrayList<Integer>();
for (int i = 0; i < count; i++) {
values.add(i);
}
checkAddsAndRemoves(null, values);
long len = cacheFile.length();
checkAddsAndRemoves(Collections.reverseOrder(), values);
// need to make this better
assertTrue(cacheFile.length() < (long)(1.4 * len));
checkAdds(values);
// need to make this better
assertTrue(cacheFile.length() < (long) (1.4 * 1.4 * len));
cache.close();
}
@Test
public void persistsRemovalOfEntries() {
createCache();
checkAddsAndRemoves(1, 2, 3, 4, 5);
verifyAndCloseCache();
}
@Test
public void persistsRemovalOfEntriesInReverse() {
createCache();
checkAddsAndRemoves(Collections.<Integer>reverseOrder(), 1, 2, 3, 4, 5);
verifyAndCloseCache();
}
@Test
public void persistsRemovalOfEntriesOverMultipleIndexBlocks() {
createCache();
checkAddsAndRemoves(4, 12, 9, 1, 3, 10, 11, 7, 8, 2, 5, 6);
verifyAndCloseCache();
}
@Test
public void removalRedistributesRemainingEntriesWithLeftSibling() {
createCache();
// Ends up with: 1 2 3 -> 4 <- 5 6
checkAdds(1, 2, 5, 6, 4, 3);
cache.verify();
cache.remove("key_5");
verifyAndCloseCache();
}
@Test
public void removalMergesRemainingEntriesIntoLeftSibling() {
createCache();
// Ends up with: 1 2 -> 3 <- 4 5
checkAdds(1, 2, 4, 5, 3);
cache.verify();
cache.remove("key_4");
verifyAndCloseCache();
}
@Test
public void removalRedistributesRemainingEntriesWithRightSibling() {
createCache();
// Ends up with: 1 2 -> 3 <- 4 5 6
checkAdds(1, 2, 4, 5, 3, 6);
cache.verify();
cache.remove("key_2");
verifyAndCloseCache();
}
@Test
public void removalMergesRemainingEntriesIntoRightSibling() {
createCache();
// Ends up with: 1 2 -> 3 <- 4 5
checkAdds(1, 2, 4, 5, 3);
cache.verify();
cache.remove("key_2");
verifyAndCloseCache();
}
@Test
public void handlesOpeningATruncatedCacheFile() throws IOException {
BTreePersistentIndexedCache<String, Integer> cache = new BTreePersistentIndexedCache<String, Integer>(cacheFile, stringSerializer, integerSerializer);
assertNull(cache.get("key_1"));
cache.put("key_1", 99);
RandomAccessFile file = new RandomAccessFile(cacheFile, "rw");
file.setLength(file.length() - 10);
file.close();
cache.reset();
assertNull(cache.get("key_1"));
cache.verify();
cache.close();
}
@Test
public void canUseFileAsKey() {
BTreePersistentIndexedCache<File, Integer> cache = new BTreePersistentIndexedCache<File, Integer>(cacheFile, new DefaultSerializer<File>(), integerSerializer);
cache.put(new File("file"), 1);
cache.put(new File("dir/file"), 2);
cache.put(new File("File"), 3);
assertThat(cache.get(new File("file")), equalTo(1));
assertThat(cache.get(new File("dir/file")), equalTo(2));
assertThat(cache.get(new File("File")), equalTo(3));
cache.close();
}
@Test
public void handlesKeysWithSameHashCode() {
createCache();
String key1 = new String(new byte[]{2, 31});
String key2 = new String(new byte[]{1, 62});
cache.put(key1, 1);
cache.put(key2, 2);
assertThat(cache.get(key1), equalTo(1));
assertThat(cache.get(key2), equalTo(2));
cache.close();
}
private void checkAdds(Integer... values) {
checkAdds(Arrays.asList(values));
}
private Map<String, Integer> checkAdds(Iterable<Integer> values) {
Map<String, Integer> added = new LinkedHashMap<String, Integer>();
for (Integer value : values) {
String key = String.format("key_%d", value);
cache.put(key, value);
added.put(String.format("key_%d", value), value);
}
for (Map.Entry<String, Integer> entry : added.entrySet()) {
assertThat(cache.get(entry.getKey()), equalTo(entry.getValue()));
}
cache.reset();
for (Map.Entry<String, Integer> entry : added.entrySet()) {
assertThat(cache.get(entry.getKey()), equalTo(entry.getValue()));
}
return added;
}
private void checkUpdates(Integer... values) {
checkUpdates(Arrays.asList(values));
}
private Map<Integer, Integer> checkUpdates(Iterable<Integer> values) {
Map<Integer, Integer> updated = new LinkedHashMap<Integer, Integer>();
for (int i = 0; i < 10; i++) {
for (Integer value : values) {
String key = String.format("key_%d", value);
int newValue = value + (i * 100);
cache.put(key, newValue);
updated.put(value, newValue);
}
for (Map.Entry<Integer, Integer> entry : updated.entrySet()) {
String key = String.format("key_%d", entry.getKey());
assertThat(cache.get(key), equalTo(entry.getValue()));
}
}
cache.reset();
for (Map.Entry<Integer, Integer> entry : updated.entrySet()) {
String key = String.format("key_%d", entry.getKey());
assertThat(cache.get(key), equalTo(entry.getValue()));
}
return updated;
}
private void checkAddsAndRemoves(Integer... values) {
checkAddsAndRemoves(null, values);
}
private void checkAddsAndRemoves(Comparator<Integer> comparator, Integer... values) {
checkAddsAndRemoves(comparator, Arrays.asList(values));
}
private void checkAddsAndRemoves(Comparator<Integer> comparator, Collection<Integer> values) {
checkAdds(values);
List<Integer> deleteValues = new ArrayList<Integer>(values);
Collections.sort(deleteValues, comparator);
for (Integer value : deleteValues) {
String key = String.format("key_%d", value);
assertThat(cache.get(key), notNullValue());
cache.remove(key);
assertThat(cache.get(key), nullValue());
}
cache.reset();
cache.verify();
for (Integer value : deleteValues) {
String key = String.format("key_%d", value);
assertThat(cache.get(key), nullValue());
}
}
}

21
test/s3/basic/basic_test.go

@ -61,7 +61,7 @@ func TestCreateBucket(t *testing.T) {
} }
func TestListBuckets(t *testing.T) {
func TestPutObject(t *testing.T) {
input := &s3.PutObjectInput{ input := &s3.PutObjectInput{
ACL: aws.String("authenticated-read"), ACL: aws.String("authenticated-read"),
@ -89,7 +89,7 @@ func TestListBuckets(t *testing.T) {
} }
func TestPutObject(t *testing.T) {
func TestListBucket(t *testing.T) {
result, err := svc.ListBuckets(nil) result, err := svc.ListBuckets(nil)
if err != nil { if err != nil {
@ -105,6 +105,23 @@ func TestPutObject(t *testing.T) {
} }
func TestListObjectV2(t *testing.T) {
listObj, err := svc.ListObjectsV2(&s3.ListObjectsV2Input{
Bucket: aws.String(Bucket),
Prefix: aws.String("foo"),
Delimiter: aws.String("/"),
})
if err != nil {
exitErrorf("Unable to list objects, %v", err)
}
for _, content := range listObj.Contents {
fmt.Println(aws.StringValue(content.Key))
}
fmt.Printf("list: %s\n", listObj)
}
func exitErrorf(msg string, args ...interface{}) { func exitErrorf(msg string, args ...interface{}) {
fmt.Fprintf(os.Stderr, msg+"\n", args...) fmt.Fprintf(os.Stderr, msg+"\n", args...)
os.Exit(1) os.Exit(1)

6
unmaintained/diff_volume_servers/diff_volume_servers.go

@ -118,7 +118,7 @@ const (
type needleState struct { type needleState struct {
state uint8 state uint8
size uint32
size types.Size
} }
func getVolumeFiles(v uint32, addr string) (map[types.NeedleId]needleState, int64, error) { func getVolumeFiles(v uint32, addr string) (map[types.NeedleId]needleState, int64, error) {
@ -154,8 +154,8 @@ func getVolumeFiles(v uint32, addr string) (map[types.NeedleId]needleState, int6
var maxOffset int64 var maxOffset int64
files := map[types.NeedleId]needleState{} files := map[types.NeedleId]needleState{}
err = idx.WalkIndexFile(idxFile, func(key types.NeedleId, offset types.Offset, size uint32) error {
if offset.IsZero() || size == types.TombstoneFileSize {
err = idx.WalkIndexFile(idxFile, func(key types.NeedleId, offset types.Offset, size types.Size) error {
if offset.IsZero() || size.IsDeleted() {
files[key] = needleState{ files[key] = needleState{
state: stateDeleted, state: stateDeleted,
size: size, size: size,

4
unmaintained/fix_dat/fix_dat.go

@ -98,7 +98,7 @@ func iterateEntries(datBackend backend.BackendStorageFile, idxFile *os.File, vis
// parse index file entry // parse index file entry
key := util.BytesToUint64(bytes[0:8]) key := util.BytesToUint64(bytes[0:8])
offsetFromIndex := util.BytesToUint32(bytes[8:12]) offsetFromIndex := util.BytesToUint32(bytes[8:12])
sizeFromIndex := util.BytesToUint32(bytes[12:16])
sizeFromIndex := types.BytesToSize(bytes[12:16])
count, _ = idxFile.ReadAt(bytes, readerOffset) count, _ = idxFile.ReadAt(bytes, readerOffset)
readerOffset += int64(count) readerOffset += int64(count)
@ -123,7 +123,7 @@ func iterateEntries(datBackend backend.BackendStorageFile, idxFile *os.File, vis
} }
}() }()
if n.Size <= n.DataSize {
if n.Size <= types.Size(n.DataSize) {
continue continue
} }
visitNeedle(n, offset) visitNeedle(n, offset)

83
unmaintained/see_dat/see_dat_gzip.go

@ -1,83 +0,0 @@
package main
import (
"bytes"
"compress/gzip"
"crypto/md5"
"flag"
"io"
"io/ioutil"
"net/http"
"time"
"github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/storage"
"github.com/chrislusf/seaweedfs/weed/storage/needle"
"github.com/chrislusf/seaweedfs/weed/storage/super_block"
"github.com/chrislusf/seaweedfs/weed/util"
)
type VolumeFileScanner4SeeDat struct {
version needle.Version
}
func (scanner *VolumeFileScanner4SeeDat) VisitSuperBlock(superBlock super_block.SuperBlock) error {
scanner.version = superBlock.Version
return nil
}
func (scanner *VolumeFileScanner4SeeDat) ReadNeedleBody() bool {
return true
}
var (
files = int64(0)
filebytes = int64(0)
diffbytes = int64(0)
)
func Compresssion(data []byte) float64 {
if len(data) <= 128 {
return 100.0
}
compressed, _ := util.GzipData(data[0:128])
return float64(len(compressed)*10) / 1280.0
}
func (scanner *VolumeFileScanner4SeeDat) VisitNeedle(n *needle.Needle, offset int64, needleHeader, needleBody []byte) error {
t := time.Unix(int64(n.AppendAtNs)/int64(time.Second), int64(n.AppendAtNs)%int64(time.Second))
glog.V(0).Info("----------------------------------------------------------------------------------")
glog.V(0).Infof("%d,%s%x offset %d size %d(%s) cookie %x appendedAt %v hasmime[%t] mime[%s] (len: %d)",
*volumeId, n.Id, n.Cookie, offset, n.Size, util.BytesToHumanReadable(uint64(n.Size)), n.Cookie, t, n.HasMime(), string(n.Mime), len(n.Mime))
r, err := gzip.NewReader(bytes.NewReader(n.Data))
if err == nil {
buf := bytes.Buffer{}
h := md5.New()
c, _ := io.Copy(&buf, r)
d := buf.Bytes()
io.Copy(h, bytes.NewReader(d))
diff := (int64(n.DataSize) - int64(c))
diffbytes += diff
glog.V(0).Infof("was gzip! stored_size: %d orig_size: %d diff: %d(%d) mime:%s compression-of-128: %.2f md5: %x", n.DataSize, c, diff, diffbytes, http.DetectContentType(d), Compresssion(d), h.Sum(nil))
} else {
glog.V(0).Infof("no gzip!")
}
return nil
}
var (
_ = ioutil.ReadAll
volumePath = flag.String("dir", "/tmp", "data directory to store files")
volumeCollection = flag.String("collection", "", "the volume collection name")
volumeId = flag.Int("volumeId", -1, "a volume id. The volume should already exist in the dir. The volume index file should not exist.")
)
func main() {
flag.Parse()
vid := needle.VolumeId(*volumeId)
glog.V(0).Info("Starting")
scanner := &VolumeFileScanner4SeeDat{}
err := storage.ScanVolumeFile(*volumePath, *volumeCollection, vid, storage.NeedleMapInMemory, scanner)
if err != nil {
glog.Fatalf("Reading Volume File [ERROR] %s\n", err)
}
}

2
unmaintained/see_idx/see_idx.go

@ -36,7 +36,7 @@ func main() {
} }
defer indexFile.Close() defer indexFile.Close()
idx.WalkIndexFile(indexFile, func(key types.NeedleId, offset types.Offset, size uint32) error {
idx.WalkIndexFile(indexFile, func(key types.NeedleId, offset types.Offset, size types.Size) error {
fmt.Printf("key:%v offset:%v size:%v(%v)\n", key, offset, size, util.BytesToHumanReadable(uint64(size))) fmt.Printf("key:%v offset:%v size:%v(%v)\n", key, offset, size, util.BytesToHumanReadable(uint64(size)))
return nil return nil
}) })

19
weed/Makefile

@ -0,0 +1,19 @@
BINARY = weed
SOURCE_DIR = .
all: debug_mount
.PHONY : clean debug_mount
clean:
go clean $(SOURCE_DIR)
rm -f $(BINARY)
debug_mount:
go build -gcflags="all=-N -l"
dlv --listen=:2345 --headless=true --api-version=2 --accept-multiclient exec weed -- mount -dir=~/tmp/mm
debug_server:
go build -gcflags="all=-N -l"
dlv --listen=:2345 --headless=true --api-version=2 --accept-multiclient exec weed -- server -dir=/Volumes/mobile_disk/99 -filer -volume.port=8343 -s3 -volume.max=0

6
weed/command/export.go

@ -72,9 +72,9 @@ var (
func printNeedle(vid needle.VolumeId, n *needle.Needle, version needle.Version, deleted bool) { func printNeedle(vid needle.VolumeId, n *needle.Needle, version needle.Version, deleted bool) {
key := needle.NewFileIdFromNeedle(vid, n).String() key := needle.NewFileIdFromNeedle(vid, n).String()
size := n.DataSize
size := int32(n.DataSize)
if version == needle.Version1 { if version == needle.Version1 {
size = n.Size
size = int32(n.Size)
} }
fmt.Printf("%s\t%s\t%d\t%t\t%s\t%s\t%s\t%t\n", fmt.Printf("%s\t%s\t%d\t%t\t%s\t%s\t%s\t%t\n",
key, key,
@ -111,7 +111,7 @@ func (scanner *VolumeFileScanner4Export) VisitNeedle(n *needle.Needle, offset in
nv, ok := needleMap.Get(n.Id) nv, ok := needleMap.Get(n.Id)
glog.V(3).Infof("key %d offset %d size %d disk_size %d compressed %v ok %v nv %+v", glog.V(3).Infof("key %d offset %d size %d disk_size %d compressed %v ok %v nv %+v",
n.Id, offset, n.Size, n.DiskSize(scanner.version), n.IsCompressed(), ok, nv) n.Id, offset, n.Size, n.DiskSize(scanner.version), n.IsCompressed(), ok, nv)
if ok && nv.Size > 0 && nv.Size != types.TombstoneFileSize && nv.Offset.ToAcutalOffset() == offset {
if ok && nv.Size.IsValid() && nv.Offset.ToAcutalOffset() == offset {
if newerThanUnix >= 0 && n.HasLastModifiedDate() && n.LastModified < uint64(newerThanUnix) { if newerThanUnix >= 0 && n.HasLastModifiedDate() && n.LastModified < uint64(newerThanUnix) {
glog.V(3).Infof("Skipping this file, as it's old enough: LastModified %d vs %d", glog.V(3).Infof("Skipping this file, as it's old enough: LastModified %d vs %d",
n.LastModified, newerThanUnix) n.LastModified, newerThanUnix)

2
weed/command/fix.go

@ -48,7 +48,7 @@ func (scanner *VolumeFileScanner4Fix) ReadNeedleBody() bool {
func (scanner *VolumeFileScanner4Fix) VisitNeedle(n *needle.Needle, offset int64, needleHeader, needleBody []byte) error { func (scanner *VolumeFileScanner4Fix) VisitNeedle(n *needle.Needle, offset int64, needleHeader, needleBody []byte) error {
glog.V(2).Infof("key %d offset %d size %d disk_size %d compressed %v", n.Id, offset, n.Size, n.DiskSize(scanner.version), n.IsCompressed()) glog.V(2).Infof("key %d offset %d size %d disk_size %d compressed %v", n.Id, offset, n.Size, n.DiskSize(scanner.version), n.IsCompressed())
if n.Size > 0 && n.Size != types.TombstoneFileSize {
if n.Size.IsValid() {
pe := scanner.nm.Set(n.Id, types.ToOffset(offset), n.Size) pe := scanner.nm.Set(n.Id, types.ToOffset(offset), n.Size)
glog.V(2).Infof("saved %d with error %v", n.Size, pe) glog.V(2).Infof("saved %d with error %v", n.Size, pe)
} else { } else {

2
weed/command/server.go

@ -96,7 +96,7 @@ func init() {
serverOptions.v.fixJpgOrientation = cmdServer.Flag.Bool("volume.images.fix.orientation", false, "Adjust jpg orientation when uploading.") serverOptions.v.fixJpgOrientation = cmdServer.Flag.Bool("volume.images.fix.orientation", false, "Adjust jpg orientation when uploading.")
serverOptions.v.readRedirect = cmdServer.Flag.Bool("volume.read.redirect", true, "Redirect moved or non-local volumes.") serverOptions.v.readRedirect = cmdServer.Flag.Bool("volume.read.redirect", true, "Redirect moved or non-local volumes.")
serverOptions.v.compactionMBPerSecond = cmdServer.Flag.Int("volume.compactionMBps", 0, "limit compaction speed in mega bytes per second") serverOptions.v.compactionMBPerSecond = cmdServer.Flag.Int("volume.compactionMBps", 0, "limit compaction speed in mega bytes per second")
serverOptions.v.fileSizeLimitMB = cmdServer.Flag.Int("volume.fileSizeLimitMB", 256, "limit file size to avoid out of memory")
serverOptions.v.fileSizeLimitMB = cmdServer.Flag.Int("volume.fileSizeLimitMB", 1024, "limit file size to avoid out of memory")
serverOptions.v.publicUrl = cmdServer.Flag.String("volume.publicUrl", "", "publicly accessible address") serverOptions.v.publicUrl = cmdServer.Flag.String("volume.publicUrl", "", "publicly accessible address")
serverOptions.v.pprof = &False serverOptions.v.pprof = &False

2
weed/command/volume.go

@ -76,7 +76,7 @@ func init() {
v.cpuProfile = cmdVolume.Flag.String("cpuprofile", "", "cpu profile output file") v.cpuProfile = cmdVolume.Flag.String("cpuprofile", "", "cpu profile output file")
v.memProfile = cmdVolume.Flag.String("memprofile", "", "memory profile output file") v.memProfile = cmdVolume.Flag.String("memprofile", "", "memory profile output file")
v.compactionMBPerSecond = cmdVolume.Flag.Int("compactionMBps", 0, "limit background compaction or copying speed in mega bytes per second") v.compactionMBPerSecond = cmdVolume.Flag.Int("compactionMBps", 0, "limit background compaction or copying speed in mega bytes per second")
v.fileSizeLimitMB = cmdVolume.Flag.Int("fileSizeLimitMB", 256, "limit file size to avoid out of memory")
v.fileSizeLimitMB = cmdVolume.Flag.Int("fileSizeLimitMB", 1024, "limit file size to avoid out of memory")
v.pprof = cmdVolume.Flag.Bool("pprof", false, "enable pprof http handlers. precludes --memprofile and --cpuprofile") v.pprof = cmdVolume.Flag.Bool("pprof", false, "enable pprof http handlers. precludes --memprofile and --cpuprofile")
} }

10
weed/filer2/entry.go

@ -22,6 +22,7 @@ type Attr struct {
GroupNames []string GroupNames []string
SymlinkTarget string SymlinkTarget string
Md5 []byte Md5 []byte
FileSize uint64
} }
func (attr Attr) IsDirectory() bool { func (attr Attr) IsDirectory() bool {
@ -39,7 +40,7 @@ type Entry struct {
} }
func (entry *Entry) Size() uint64 { func (entry *Entry) Size() uint64 {
return TotalSize(entry.Chunks)
return maxUint64(TotalSize(entry.Chunks), entry.FileSize)
} }
func (entry *Entry) Timestamp() time.Time { func (entry *Entry) Timestamp() time.Time {
@ -81,3 +82,10 @@ func FromPbEntry(dir string, entry *filer_pb.Entry) *Entry {
Chunks: entry.Chunks, Chunks: entry.Chunks,
} }
} }
func maxUint64(x, y uint64) uint64 {
if x > y {
return x
}
return y
}

2
weed/filer2/entry_codec.go

@ -53,6 +53,7 @@ func EntryAttributeToPb(entry *Entry) *filer_pb.FuseAttributes {
GroupName: entry.Attr.GroupNames, GroupName: entry.Attr.GroupNames,
SymlinkTarget: entry.Attr.SymlinkTarget, SymlinkTarget: entry.Attr.SymlinkTarget,
Md5: entry.Attr.Md5, Md5: entry.Attr.Md5,
FileSize: entry.Attr.FileSize,
} }
} }
@ -73,6 +74,7 @@ func PbToEntryAttribute(attr *filer_pb.FuseAttributes) Attr {
t.GroupNames = attr.GroupName t.GroupNames = attr.GroupName
t.SymlinkTarget = attr.SymlinkTarget t.SymlinkTarget = attr.SymlinkTarget
t.Md5 = attr.Md5 t.Md5 = attr.Md5
t.FileSize = attr.FileSize
return t return t
} }

2
weed/filer2/filechunk_manifest.go

@ -64,7 +64,7 @@ func fetchChunk(lookupFileIdFn LookupFileIdFunctionType, fileId string, cipherKe
return nil, err return nil, err
} }
var buffer bytes.Buffer var buffer bytes.Buffer
err = util.ReadUrlAsStream(urlString, cipherKey, isGzipped, true, 0, 0, func(data []byte) {
err = util.ReadUrlAsStream(urlString+"?readDeleted=true", cipherKey, isGzipped, true, 0, 0, func(data []byte) {
buffer.Write(data) buffer.Write(data)
}) })
if err != nil { if err != nil {

41
weed/filer2/filechunks.go

@ -20,6 +20,10 @@ func TotalSize(chunks []*filer_pb.FileChunk) (size uint64) {
return return
} }
func FileSize(entry *filer_pb.Entry) (size uint64) {
return maxUint64(TotalSize(entry.Chunks), entry.Attributes.FileSize)
}
func ETag(entry *filer_pb.Entry) (etag string) { func ETag(entry *filer_pb.Entry) (etag string) {
if entry.Attributes == nil || entry.Attributes.Md5 == nil { if entry.Attributes == nil || entry.Attributes.Md5 == nil {
return ETagChunks(entry.Chunks) return ETagChunks(entry.Chunks)
@ -100,7 +104,7 @@ type ChunkView struct {
FileId string FileId string
Offset int64 Offset int64
Size uint64 Size uint64
LogicOffset int64
LogicOffset int64 // actual offset in the file, for the data specified via [offset, offset+size) in current chunk
ChunkSize uint64 ChunkSize uint64
CipherKey []byte CipherKey []byte
IsGzipped bool IsGzipped bool
@ -130,17 +134,18 @@ func ViewFromVisibleIntervals(visibles []VisibleInterval, offset int64, size int
for _, chunk := range visibles { for _, chunk := range visibles {
if chunk.start <= offset && offset < chunk.stop && offset < stop {
chunkStart, chunkStop := max(offset, chunk.start), min(stop, chunk.stop)
if chunkStart < chunkStop {
views = append(views, &ChunkView{ views = append(views, &ChunkView{
FileId: chunk.fileId, FileId: chunk.fileId,
Offset: offset - chunk.start, // offset is the data starting location in this file id
Size: uint64(min(chunk.stop, stop) - offset),
LogicOffset: offset,
Offset: chunkStart - chunk.start + chunk.chunkOffset,
Size: uint64(chunkStop - chunkStart),
LogicOffset: chunkStart,
ChunkSize: chunk.chunkSize, ChunkSize: chunk.chunkSize,
CipherKey: chunk.cipherKey, CipherKey: chunk.cipherKey,
IsGzipped: chunk.isGzipped, IsGzipped: chunk.isGzipped,
}) })
offset = min(chunk.stop, stop)
} }
} }
@ -149,10 +154,11 @@ func ViewFromVisibleIntervals(visibles []VisibleInterval, offset int64, size int
} }
func logPrintf(name string, visibles []VisibleInterval) { func logPrintf(name string, visibles []VisibleInterval) {
/* /*
log.Printf("%s len %d", name, len(visibles))
glog.V(0).Infof("%s len %d", name, len(visibles))
for _, v := range visibles { for _, v := range visibles {
log.Printf("%s: => %+v", name, v)
glog.V(0).Infof("%s: [%d,%d)", name, v.start, v.stop)
} }
*/ */
} }
@ -165,7 +171,7 @@ var bufPool = sync.Pool{
func MergeIntoVisibles(visibles, newVisibles []VisibleInterval, chunk *filer_pb.FileChunk) []VisibleInterval { func MergeIntoVisibles(visibles, newVisibles []VisibleInterval, chunk *filer_pb.FileChunk) []VisibleInterval {
newV := newVisibleInterval(chunk.Offset, chunk.Offset+int64(chunk.Size), chunk.GetFileIdString(), chunk.Mtime, chunk.Size, chunk.CipherKey, chunk.IsCompressed)
newV := newVisibleInterval(chunk.Offset, chunk.Offset+int64(chunk.Size), chunk.GetFileIdString(), chunk.Mtime, 0, chunk.Size, chunk.CipherKey, chunk.IsCompressed)
length := len(visibles) length := len(visibles)
if length == 0 { if length == 0 {
@ -177,13 +183,13 @@ func MergeIntoVisibles(visibles, newVisibles []VisibleInterval, chunk *filer_pb.
} }
logPrintf(" before", visibles) logPrintf(" before", visibles)
chunkStop := chunk.Offset + int64(chunk.Size)
for _, v := range visibles { for _, v := range visibles {
if v.start < chunk.Offset && chunk.Offset < v.stop { if v.start < chunk.Offset && chunk.Offset < v.stop {
newVisibles = append(newVisibles, newVisibleInterval(v.start, chunk.Offset, v.fileId, v.modifiedTime, chunk.Size, v.cipherKey, v.isGzipped))
newVisibles = append(newVisibles, newVisibleInterval(v.start, chunk.Offset, v.fileId, v.modifiedTime, v.chunkOffset, v.chunkSize, v.cipherKey, v.isGzipped))
} }
chunkStop := chunk.Offset + int64(chunk.Size)
if v.start < chunkStop && chunkStop < v.stop { if v.start < chunkStop && chunkStop < v.stop {
newVisibles = append(newVisibles, newVisibleInterval(chunkStop, v.stop, v.fileId, v.modifiedTime, chunk.Size, v.cipherKey, v.isGzipped))
newVisibles = append(newVisibles, newVisibleInterval(chunkStop, v.stop, v.fileId, v.modifiedTime, v.chunkOffset+(chunkStop-v.start), v.chunkSize, v.cipherKey, v.isGzipped))
} }
if chunkStop <= v.start || v.stop <= chunk.Offset { if chunkStop <= v.start || v.stop <= chunk.Offset {
newVisibles = append(newVisibles, v) newVisibles = append(newVisibles, v)
@ -219,6 +225,7 @@ func NonOverlappingVisibleIntervals(lookupFileIdFn LookupFileIdFunctionType, chu
var newVisibles []VisibleInterval var newVisibles []VisibleInterval
for _, chunk := range chunks { for _, chunk := range chunks {
// glog.V(0).Infof("merge [%d,%d)", chunk.Offset, chunk.Offset+int64(chunk.Size))
newVisibles = MergeIntoVisibles(visibles, newVisibles, chunk) newVisibles = MergeIntoVisibles(visibles, newVisibles, chunk)
t := visibles[:0] t := visibles[:0]
visibles = newVisibles visibles = newVisibles
@ -239,17 +246,19 @@ type VisibleInterval struct {
stop int64 stop int64
modifiedTime int64 modifiedTime int64
fileId string fileId string
chunkOffset int64
chunkSize uint64 chunkSize uint64
cipherKey []byte cipherKey []byte
isGzipped bool isGzipped bool
} }
func newVisibleInterval(start, stop int64, fileId string, modifiedTime int64, chunkSize uint64, cipherKey []byte, isGzipped bool) VisibleInterval {
func newVisibleInterval(start, stop int64, fileId string, modifiedTime int64, chunkOffset int64, chunkSize uint64, cipherKey []byte, isGzipped bool) VisibleInterval {
return VisibleInterval{ return VisibleInterval{
start: start, start: start,
stop: stop, stop: stop,
fileId: fileId, fileId: fileId,
modifiedTime: modifiedTime, modifiedTime: modifiedTime,
chunkOffset: chunkOffset, // the starting position in the chunk
chunkSize: chunkSize, chunkSize: chunkSize,
cipherKey: cipherKey, cipherKey: cipherKey,
isGzipped: isGzipped, isGzipped: isGzipped,
@ -262,3 +271,9 @@ func min(x, y int64) int64 {
} }
return y return y
} }
func max(x, y int64) int64 {
if x <= y {
return y
}
return x
}

135
weed/filer2/filechunks_test.go

@ -1,10 +1,13 @@
package filer2 package filer2
import ( import (
"fmt"
"log" "log"
"math"
"testing" "testing"
"fmt"
"github.com/stretchr/testify/assert"
"github.com/chrislusf/seaweedfs/weed/pb/filer_pb" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
) )
@ -91,12 +94,12 @@ func TestIntervalMerging(t *testing.T) {
// case 2: updates overwrite part of previous chunks // case 2: updates overwrite part of previous chunks
{ {
Chunks: []*filer_pb.FileChunk{ Chunks: []*filer_pb.FileChunk{
{Offset: 0, Size: 100, FileId: "abc", Mtime: 123},
{Offset: 0, Size: 50, FileId: "asdf", Mtime: 134},
{Offset: 0, Size: 100, FileId: "a", Mtime: 123},
{Offset: 0, Size: 70, FileId: "b", Mtime: 134},
}, },
Expected: []*VisibleInterval{ Expected: []*VisibleInterval{
{start: 0, stop: 50, fileId: "asdf"},
{start: 50, stop: 100, fileId: "abc"},
{start: 0, stop: 70, fileId: "b"},
{start: 70, stop: 100, fileId: "a", chunkOffset: 70},
}, },
}, },
// case 3: updates overwrite full chunks // case 3: updates overwrite full chunks
@ -126,14 +129,14 @@ func TestIntervalMerging(t *testing.T) {
// case 5: updates overwrite full chunks // case 5: updates overwrite full chunks
{ {
Chunks: []*filer_pb.FileChunk{ Chunks: []*filer_pb.FileChunk{
{Offset: 0, Size: 100, FileId: "abc", Mtime: 123},
{Offset: 0, Size: 200, FileId: "asdf", Mtime: 184},
{Offset: 70, Size: 150, FileId: "abc", Mtime: 143},
{Offset: 80, Size: 100, FileId: "xxxx", Mtime: 134},
{Offset: 0, Size: 100, FileId: "a", Mtime: 123},
{Offset: 0, Size: 200, FileId: "d", Mtime: 184},
{Offset: 70, Size: 150, FileId: "c", Mtime: 143},
{Offset: 80, Size: 100, FileId: "b", Mtime: 134},
}, },
Expected: []*VisibleInterval{ Expected: []*VisibleInterval{
{start: 0, stop: 200, fileId: "asdf"},
{start: 200, stop: 220, fileId: "abc"},
{start: 0, stop: 200, fileId: "d"},
{start: 200, stop: 220, fileId: "c", chunkOffset: 130},
}, },
}, },
// case 6: same updates // case 6: same updates
@ -204,6 +207,10 @@ func TestIntervalMerging(t *testing.T) {
t.Fatalf("failed on test case %d, interval %d, chunkId %s, expect %s", t.Fatalf("failed on test case %d, interval %d, chunkId %s, expect %s",
i, x, interval.fileId, testcase.Expected[x].fileId) i, x, interval.fileId, testcase.Expected[x].fileId)
} }
if interval.chunkOffset != testcase.Expected[x].chunkOffset {
t.Fatalf("failed on test case %d, interval %d, chunkOffset %d, expect %d",
i, x, interval.chunkOffset, testcase.Expected[x].chunkOffset)
}
} }
if len(intervals) != len(testcase.Expected) { if len(intervals) != len(testcase.Expected) {
t.Fatalf("failed to compact test case %d, len %d expected %d", i, len(intervals), len(testcase.Expected)) t.Fatalf("failed to compact test case %d, len %d expected %d", i, len(intervals), len(testcase.Expected))
@ -251,14 +258,14 @@ func TestChunksReading(t *testing.T) {
// case 2: updates overwrite part of previous chunks // case 2: updates overwrite part of previous chunks
{ {
Chunks: []*filer_pb.FileChunk{ Chunks: []*filer_pb.FileChunk{
{Offset: 0, Size: 100, FileId: "abc", Mtime: 123},
{Offset: 0, Size: 50, FileId: "asdf", Mtime: 134},
{Offset: 3, Size: 100, FileId: "a", Mtime: 123},
{Offset: 10, Size: 50, FileId: "b", Mtime: 134},
}, },
Offset: 25,
Size: 50,
Offset: 30,
Size: 40,
Expected: []*ChunkView{ Expected: []*ChunkView{
{Offset: 25, Size: 25, FileId: "asdf", LogicOffset: 25},
{Offset: 0, Size: 25, FileId: "abc", LogicOffset: 50},
{Offset: 20, Size: 30, FileId: "b", LogicOffset: 30},
{Offset: 57, Size: 10, FileId: "a", LogicOffset: 60},
}, },
}, },
// case 3: updates overwrite full chunks // case 3: updates overwrite full chunks
@ -286,22 +293,22 @@ func TestChunksReading(t *testing.T) {
Size: 400, Size: 400,
Expected: []*ChunkView{ Expected: []*ChunkView{
{Offset: 0, Size: 200, FileId: "asdf", LogicOffset: 0}, {Offset: 0, Size: 200, FileId: "asdf", LogicOffset: 0},
// {Offset: 0, Size: 150, FileId: "xxxx"}, // missing intervals should not happen
{Offset: 0, Size: 150, FileId: "xxxx", LogicOffset: 250},
}, },
}, },
// case 5: updates overwrite full chunks // case 5: updates overwrite full chunks
{ {
Chunks: []*filer_pb.FileChunk{ Chunks: []*filer_pb.FileChunk{
{Offset: 0, Size: 100, FileId: "abc", Mtime: 123},
{Offset: 0, Size: 200, FileId: "asdf", Mtime: 184},
{Offset: 70, Size: 150, FileId: "abc", Mtime: 143},
{Offset: 0, Size: 100, FileId: "a", Mtime: 123},
{Offset: 0, Size: 200, FileId: "c", Mtime: 184},
{Offset: 70, Size: 150, FileId: "b", Mtime: 143},
{Offset: 80, Size: 100, FileId: "xxxx", Mtime: 134}, {Offset: 80, Size: 100, FileId: "xxxx", Mtime: 134},
}, },
Offset: 0, Offset: 0,
Size: 220, Size: 220,
Expected: []*ChunkView{ Expected: []*ChunkView{
{Offset: 0, Size: 200, FileId: "asdf", LogicOffset: 0},
{Offset: 0, Size: 20, FileId: "abc", LogicOffset: 200},
{Offset: 0, Size: 200, FileId: "c", LogicOffset: 0},
{Offset: 130, Size: 20, FileId: "b", LogicOffset: 200},
}, },
}, },
// case 6: same updates // case 6: same updates
@ -370,18 +377,21 @@ func TestChunksReading(t *testing.T) {
} }
for i, testcase := range testcases { for i, testcase := range testcases {
if i != 2 {
// continue
}
log.Printf("++++++++++ read test case %d ++++++++++++++++++++", i) log.Printf("++++++++++ read test case %d ++++++++++++++++++++", i)
chunks := ViewFromChunks(nil, testcase.Chunks, testcase.Offset, testcase.Size) chunks := ViewFromChunks(nil, testcase.Chunks, testcase.Offset, testcase.Size)
for x, chunk := range chunks { for x, chunk := range chunks {
log.Printf("read case %d, chunk %d, offset=%d, size=%d, fileId=%s", log.Printf("read case %d, chunk %d, offset=%d, size=%d, fileId=%s",
i, x, chunk.Offset, chunk.Size, chunk.FileId) i, x, chunk.Offset, chunk.Size, chunk.FileId)
if chunk.Offset != testcase.Expected[x].Offset { if chunk.Offset != testcase.Expected[x].Offset {
t.Fatalf("failed on read case %d, chunk %d, Offset %d, expect %d",
i, x, chunk.Offset, testcase.Expected[x].Offset)
t.Fatalf("failed on read case %d, chunk %s, Offset %d, expect %d",
i, chunk.FileId, chunk.Offset, testcase.Expected[x].Offset)
} }
if chunk.Size != testcase.Expected[x].Size { if chunk.Size != testcase.Expected[x].Size {
t.Fatalf("failed on read case %d, chunk %d, Size %d, expect %d",
i, x, chunk.Size, testcase.Expected[x].Size)
t.Fatalf("failed on read case %d, chunk %s, Size %d, expect %d",
i, chunk.FileId, chunk.Size, testcase.Expected[x].Size)
} }
if chunk.FileId != testcase.Expected[x].FileId { if chunk.FileId != testcase.Expected[x].FileId {
t.Fatalf("failed on read case %d, chunk %d, FileId %s, expect %s", t.Fatalf("failed on read case %d, chunk %d, FileId %s, expect %s",
@ -418,3 +428,74 @@ func BenchmarkCompactFileChunks(b *testing.B) {
CompactFileChunks(nil, chunks) CompactFileChunks(nil, chunks)
} }
} }
func TestViewFromVisibleIntervals(t *testing.T) {
visibles := []VisibleInterval{
{
start: 0,
stop: 25,
fileId: "fid1",
},
{
start: 4096,
stop: 8192,
fileId: "fid2",
},
{
start: 16384,
stop: 18551,
fileId: "fid3",
},
}
views := ViewFromVisibleIntervals(visibles, 0, math.MaxInt32)
if len(views) != len(visibles) {
assert.Equal(t, len(visibles), len(views), "ViewFromVisibleIntervals error")
}
}
func TestViewFromVisibleIntervals2(t *testing.T) {
visibles := []VisibleInterval{
{
start: 344064,
stop: 348160,
fileId: "fid1",
},
{
start: 348160,
stop: 356352,
fileId: "fid2",
},
}
views := ViewFromVisibleIntervals(visibles, 0, math.MaxInt32)
if len(views) != len(visibles) {
assert.Equal(t, len(visibles), len(views), "ViewFromVisibleIntervals error")
}
}
func TestViewFromVisibleIntervals3(t *testing.T) {
visibles := []VisibleInterval{
{
start: 1000,
stop: 2000,
fileId: "fid1",
},
{
start: 3000,
stop: 4000,
fileId: "fid2",
},
}
views := ViewFromVisibleIntervals(visibles, 1700, 1500)
if len(views) != len(visibles) {
assert.Equal(t, len(visibles), len(views), "ViewFromVisibleIntervals error")
}
}

61
weed/filer2/filer.go

@ -9,8 +9,6 @@ import (
"google.golang.org/grpc" "google.golang.org/grpc"
"github.com/karlseguin/ccache"
"github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/pb/filer_pb" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
"github.com/chrislusf/seaweedfs/weed/util" "github.com/chrislusf/seaweedfs/weed/util"
@ -27,7 +25,6 @@ var (
type Filer struct { type Filer struct {
Store *FilerStoreWrapper Store *FilerStoreWrapper
directoryCache *ccache.Cache
MasterClient *wdclient.MasterClient MasterClient *wdclient.MasterClient
fileIdDeletionQueue *util.UnboundedQueue fileIdDeletionQueue *util.UnboundedQueue
GrpcDialOption grpc.DialOption GrpcDialOption grpc.DialOption
@ -44,7 +41,6 @@ type Filer struct {
func NewFiler(masters []string, grpcDialOption grpc.DialOption, func NewFiler(masters []string, grpcDialOption grpc.DialOption,
filerHost string, filerGrpcPort uint32, collection string, replication string, notifyFn func()) *Filer { filerHost string, filerGrpcPort uint32, collection string, replication string, notifyFn func()) *Filer {
f := &Filer{ f := &Filer{
directoryCache: ccache.New(ccache.Configure().MaxSize(1000).ItemsToPrune(100)),
MasterClient: wdclient.NewMasterClient(grpcDialOption, "filer", filerHost, filerGrpcPort, masters), MasterClient: wdclient.NewMasterClient(grpcDialOption, "filer", filerHost, filerGrpcPort, masters),
fileIdDeletionQueue: util.NewUnboundedQueue(), fileIdDeletionQueue: util.NewUnboundedQueue(),
GrpcDialOption: grpcDialOption, GrpcDialOption: grpcDialOption,
@ -77,10 +73,6 @@ func (f *Filer) GetStore() (store FilerStore) {
return f.Store return f.Store
} }
func (f *Filer) DisableDirectoryCache() {
f.directoryCache = nil
}
func (fs *Filer) GetMaster() string { func (fs *Filer) GetMaster() string {
return fs.MasterClient.GetMaster() return fs.MasterClient.GetMaster()
} }
@ -117,16 +109,9 @@ func (f *Filer) CreateEntry(ctx context.Context, entry *Entry, o_excl bool, isFr
dirPath := "/" + util.Join(dirParts[:i]...) dirPath := "/" + util.Join(dirParts[:i]...)
// fmt.Printf("%d directory: %+v\n", i, dirPath) // fmt.Printf("%d directory: %+v\n", i, dirPath)
// first check local cache
dirEntry := f.cacheGetDirectory(dirPath)
// not found, check the store directly
if dirEntry == nil {
// check the store directly
glog.V(4).Infof("find uncached directory: %s", dirPath) glog.V(4).Infof("find uncached directory: %s", dirPath)
dirEntry, _ = f.FindEntry(ctx, util.FullPath(dirPath))
} else {
// glog.V(4).Infof("found cached directory: %s", dirPath)
}
dirEntry, _ := f.FindEntry(ctx, util.FullPath(dirPath))
// no such existing directory // no such existing directory
if dirEntry == nil { if dirEntry == nil {
@ -166,9 +151,6 @@ func (f *Filer) CreateEntry(ctx context.Context, entry *Entry, o_excl bool, isFr
return fmt.Errorf("%s is a file", dirPath) return fmt.Errorf("%s is a file", dirPath)
} }
// cache the directory entry
f.cacheSetDirectory(dirPath, dirEntry, i)
// remember the direct parent directory entry // remember the direct parent directory entry
if i == len(dirParts)-1 { if i == len(dirParts)-1 {
lastDirectoryEntry = dirEntry lastDirectoryEntry = dirEntry
@ -295,45 +277,6 @@ func (f *Filer) doListDirectoryEntries(ctx context.Context, p util.FullPath, sta
return return
} }
func (f *Filer) cacheDelDirectory(dirpath string) {
if dirpath == "/" {
return
}
if f.directoryCache == nil {
return
}
f.directoryCache.Delete(dirpath)
return
}
func (f *Filer) cacheGetDirectory(dirpath string) *Entry {
if f.directoryCache == nil {
return nil
}
item := f.directoryCache.Get(dirpath)
if item == nil {
return nil
}
return item.Value().(*Entry)
}
func (f *Filer) cacheSetDirectory(dirpath string, dirEntry *Entry, level int) {
if f.directoryCache == nil {
return
}
minutes := 60
if level < 10 {
minutes -= level * 6
}
f.directoryCache.Set(dirpath, dirEntry, time.Duration(minutes)*time.Minute)
}
func (f *Filer) Shutdown() { func (f *Filer) Shutdown() {
f.LocalMetaLogBuffer.Shutdown() f.LocalMetaLogBuffer.Shutdown()
f.Store.Shutdown() f.Store.Shutdown()

6
weed/filer2/filer_delete_entry.go

@ -65,6 +65,7 @@ func (f *Filer) doBatchDeleteFolderMetaAndData(ctx context.Context, entry *Entry
} }
if lastFileName == "" && !isRecursive && len(entries) > 0 { if lastFileName == "" && !isRecursive && len(entries) > 0 {
// only for first iteration in the loop // only for first iteration in the loop
glog.Errorf("deleting a folder %s has children: %+v ...", entry.FullPath, entries[0].Name())
return nil, fmt.Errorf("fail to delete non-empty folder: %s", entry.FullPath) return nil, fmt.Errorf("fail to delete non-empty folder: %s", entry.FullPath)
} }
@ -73,7 +74,6 @@ func (f *Filer) doBatchDeleteFolderMetaAndData(ctx context.Context, entry *Entry
var dirChunks []*filer_pb.FileChunk var dirChunks []*filer_pb.FileChunk
if sub.IsDirectory() { if sub.IsDirectory() {
dirChunks, err = f.doBatchDeleteFolderMetaAndData(ctx, sub, isRecursive, ignoreRecursiveError, shouldDeleteChunks, false) dirChunks, err = f.doBatchDeleteFolderMetaAndData(ctx, sub, isRecursive, ignoreRecursiveError, shouldDeleteChunks, false)
f.cacheDelDirectory(string(sub.FullPath))
chunks = append(chunks, dirChunks...) chunks = append(chunks, dirChunks...)
} else { } else {
f.NotifyUpdateEvent(ctx, sub, nil, shouldDeleteChunks, isFromOtherCluster) f.NotifyUpdateEvent(ctx, sub, nil, shouldDeleteChunks, isFromOtherCluster)
@ -107,9 +107,7 @@ func (f *Filer) doDeleteEntryMetaAndData(ctx context.Context, entry *Entry, shou
if storeDeletionErr := f.Store.DeleteEntry(ctx, entry.FullPath); storeDeletionErr != nil { if storeDeletionErr := f.Store.DeleteEntry(ctx, entry.FullPath); storeDeletionErr != nil {
return fmt.Errorf("filer store delete: %v", storeDeletionErr) return fmt.Errorf("filer store delete: %v", storeDeletionErr)
} }
if entry.IsDirectory() {
f.cacheDelDirectory(string(entry.FullPath))
} else {
if !entry.IsDirectory() {
f.NotifyUpdateEvent(ctx, entry, nil, shouldDeleteChunks, isFromOtherCluster) f.NotifyUpdateEvent(ctx, entry, nil, shouldDeleteChunks, isFromOtherCluster)
} }

8
weed/filer2/filer_deletion.go

@ -1,6 +1,7 @@
package filer2 package filer2
import ( import (
"strings"
"time" "time"
"github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/glog"
@ -50,15 +51,14 @@ func (f *Filer) loopProcessingDeletion() {
fileIds = fileIds[:0] fileIds = fileIds[:0]
} }
deletionCount = len(toDeleteFileIds) deletionCount = len(toDeleteFileIds)
deleteResults, err := operation.DeleteFilesWithLookupVolumeId(f.GrpcDialOption, toDeleteFileIds, lookupFunc)
_, err := operation.DeleteFilesWithLookupVolumeId(f.GrpcDialOption, toDeleteFileIds, lookupFunc)
if err != nil { if err != nil {
if !strings.Contains(err.Error(), "already deleted") {
glog.V(0).Infof("deleting fileIds len=%d error: %v", deletionCount, err) glog.V(0).Infof("deleting fileIds len=%d error: %v", deletionCount, err)
}
} else { } else {
glog.V(1).Infof("deleting fileIds len=%d", deletionCount) glog.V(1).Infof("deleting fileIds len=%d", deletionCount)
} }
if len(deleteResults) != deletionCount {
glog.V(0).Infof("delete %d fileIds actual %d", deletionCount, len(deleteResults))
}
} }
}) })

2
weed/filer2/leveldb/leveldb_store_test.go

@ -17,7 +17,6 @@ func TestCreateAndFind(t *testing.T) {
store := &LevelDBStore{} store := &LevelDBStore{}
store.initialize(dir) store.initialize(dir)
filer.SetStore(store) filer.SetStore(store)
filer.DisableDirectoryCache()
fullpath := util.FullPath("/home/chris/this/is/one/file1.jpg") fullpath := util.FullPath("/home/chris/this/is/one/file1.jpg")
@ -72,7 +71,6 @@ func TestEmptyRoot(t *testing.T) {
store := &LevelDBStore{} store := &LevelDBStore{}
store.initialize(dir) store.initialize(dir)
filer.SetStore(store) filer.SetStore(store)
filer.DisableDirectoryCache()
ctx := context.Background() ctx := context.Background()

2
weed/filer2/leveldb2/leveldb2_store_test.go

@ -17,7 +17,6 @@ func TestCreateAndFind(t *testing.T) {
store := &LevelDB2Store{} store := &LevelDB2Store{}
store.initialize(dir, 2) store.initialize(dir, 2)
filer.SetStore(store) filer.SetStore(store)
filer.DisableDirectoryCache()
fullpath := util.FullPath("/home/chris/this/is/one/file1.jpg") fullpath := util.FullPath("/home/chris/this/is/one/file1.jpg")
@ -72,7 +71,6 @@ func TestEmptyRoot(t *testing.T) {
store := &LevelDB2Store{} store := &LevelDB2Store{}
store.initialize(dir, 2) store.initialize(dir, 2)
filer.SetStore(store) filer.SetStore(store)
filer.DisableDirectoryCache()
ctx := context.Background() ctx := context.Background()

96
weed/filer2/reader_at.go

@ -15,12 +15,11 @@ import (
type ChunkReadAt struct { type ChunkReadAt struct {
masterClient *wdclient.MasterClient masterClient *wdclient.MasterClient
chunkViews []*ChunkView chunkViews []*ChunkView
buffer []byte
bufferOffset int64
lookupFileId func(fileId string) (targetUrl string, err error) lookupFileId func(fileId string) (targetUrl string, err error)
readerLock sync.Mutex readerLock sync.Mutex
fileSize int64
chunkCache *chunk_cache.ChunkCache
chunkCache chunk_cache.ChunkCache
} }
// var _ = io.ReaderAt(&ChunkReadAt{}) // var _ = io.ReaderAt(&ChunkReadAt{})
@ -54,13 +53,13 @@ func LookupFn(filerClient filer_pb.FilerClient) LookupFileIdFunctionType {
} }
} }
func NewChunkReaderAtFromClient(filerClient filer_pb.FilerClient, chunkViews []*ChunkView, chunkCache *chunk_cache.ChunkCache) *ChunkReadAt {
func NewChunkReaderAtFromClient(filerClient filer_pb.FilerClient, chunkViews []*ChunkView, chunkCache chunk_cache.ChunkCache, fileSize int64) *ChunkReadAt {
return &ChunkReadAt{ return &ChunkReadAt{
chunkViews: chunkViews, chunkViews: chunkViews,
lookupFileId: LookupFn(filerClient), lookupFileId: LookupFn(filerClient),
bufferOffset: -1,
chunkCache: chunkCache, chunkCache: chunkCache,
fileSize: fileSize,
} }
} }
@ -69,75 +68,78 @@ func (c *ChunkReadAt) ReadAt(p []byte, offset int64) (n int, err error) {
c.readerLock.Lock() c.readerLock.Lock()
defer c.readerLock.Unlock() defer c.readerLock.Unlock()
for n < len(p) && err == nil {
readCount, readErr := c.doReadAt(p[n:], offset+int64(n))
n += readCount
err = readErr
if readCount == 0 {
return n, io.EOF
}
}
return
glog.V(4).Infof("ReadAt [%d,%d) of total file size %d bytes %d chunk views", offset, offset+int64(len(p)), c.fileSize, len(c.chunkViews))
return c.doReadAt(p[n:], offset+int64(n))
} }
func (c *ChunkReadAt) doReadAt(p []byte, offset int64) (n int, err error) { func (c *ChunkReadAt) doReadAt(p []byte, offset int64) (n int, err error) {
var found bool
for _, chunk := range c.chunkViews {
if chunk.LogicOffset <= offset && offset < chunk.LogicOffset+int64(chunk.Size) {
found = true
if c.bufferOffset != chunk.LogicOffset {
c.buffer, err = c.fetchChunkData(chunk)
if err != nil {
glog.Errorf("fetching chunk %+v: %v\n", chunk, err)
}
c.bufferOffset = chunk.LogicOffset
var buffer []byte
startOffset, remaining := offset, int64(len(p))
for i, chunk := range c.chunkViews {
if remaining <= 0 {
break
} }
if startOffset < chunk.LogicOffset {
gap := int(chunk.LogicOffset - startOffset)
glog.V(4).Infof("zero [%d,%d)", startOffset, startOffset+int64(gap))
n += int(min(int64(gap), remaining))
startOffset, remaining = chunk.LogicOffset, remaining-int64(gap)
if remaining <= 0 {
break break
} }
} }
if !found {
return 0, io.EOF
// fmt.Printf(">>> doReadAt [%d,%d), chunk[%d,%d)\n", offset, offset+int64(len(p)), chunk.LogicOffset, chunk.LogicOffset+int64(chunk.Size))
chunkStart, chunkStop := max(chunk.LogicOffset, startOffset), min(chunk.LogicOffset+int64(chunk.Size), startOffset+remaining)
if chunkStart >= chunkStop {
continue
}
glog.V(4).Infof("read [%d,%d), %d/%d chunk %s [%d,%d)", chunkStart, chunkStop, i, len(c.chunkViews), chunk.FileId, chunk.LogicOffset-chunk.Offset, chunk.LogicOffset-chunk.Offset+int64(chunk.Size))
buffer, err = c.readFromWholeChunkData(chunk)
if err != nil {
glog.Errorf("fetching chunk %+v: %v\n", chunk, err)
return
} }
bufferOffset := chunkStart - chunk.LogicOffset + chunk.Offset
copied := copy(p[startOffset-offset:chunkStop-chunkStart+startOffset-offset], buffer[bufferOffset:bufferOffset+chunkStop-chunkStart])
n += copied
startOffset, remaining = startOffset+int64(copied), remaining-int64(copied)
}
glog.V(4).Infof("doReadAt [%d,%d), n:%v, err:%v", offset, offset+int64(len(p)), n, err)
if err == nil {
n = copy(p, c.buffer[offset-c.bufferOffset:])
if err == nil && remaining > 0 && c.fileSize > startOffset {
delta := int(min(remaining, c.fileSize - startOffset))
glog.V(4).Infof("zero2 [%d,%d) of file size %d bytes", startOffset, startOffset+int64(delta), c.fileSize)
n += delta
} }
// fmt.Printf("> doReadAt [%d,%d), buffer:[%d,%d)\n", offset, offset+int64(n), c.bufferOffset, c.bufferOffset+int64(len(c.buffer)))
if err == nil && offset+int64(len(p)) > c.fileSize {
err = io.EOF
}
// fmt.Printf("~~~ filled %d, err: %v\n\n", n, err)
return return
} }
func (c *ChunkReadAt) fetchChunkData(chunkView *ChunkView) (data []byte, err error) {
func (c *ChunkReadAt) readFromWholeChunkData(chunkView *ChunkView) (chunkData []byte, err error) {
glog.V(4).Infof("fetchChunkData %s [%d,%d)\n", chunkView.FileId, chunkView.LogicOffset, chunkView.LogicOffset+int64(chunkView.Size))
glog.V(4).Infof("readFromWholeChunkData %s offset %d [%d,%d) size at least %d", chunkView.FileId, chunkView.Offset, chunkView.LogicOffset, chunkView.LogicOffset+int64(chunkView.Size), chunkView.ChunkSize)
hasDataInCache := false
chunkData := c.chunkCache.GetChunk(chunkView.FileId, chunkView.ChunkSize)
chunkData = c.chunkCache.GetChunk(chunkView.FileId, chunkView.ChunkSize)
if chunkData != nil { if chunkData != nil {
glog.V(3).Infof("cache hit %s [%d,%d)", chunkView.FileId, chunkView.LogicOffset, chunkView.LogicOffset+int64(chunkView.Size))
hasDataInCache = true
glog.V(5).Infof("cache hit %s [%d,%d)", chunkView.FileId, chunkView.LogicOffset-chunkView.Offset, chunkView.LogicOffset-chunkView.Offset+int64(len(chunkData)))
} else { } else {
glog.V(4).Infof("doFetchFullChunkData %s", chunkView.FileId)
chunkData, err = c.doFetchFullChunkData(chunkView.FileId, chunkView.CipherKey, chunkView.IsGzipped) chunkData, err = c.doFetchFullChunkData(chunkView.FileId, chunkView.CipherKey, chunkView.IsGzipped)
if err != nil { if err != nil {
return nil, err
}
}
if int64(len(chunkData)) < chunkView.Offset+int64(chunkView.Size) {
glog.Errorf("unexpected larger cached:%v chunk %s [%d,%d) than %d", hasDataInCache, chunkView.FileId, chunkView.Offset, chunkView.Offset+int64(chunkView.Size), len(chunkData))
return nil, fmt.Errorf("unexpected larger cached:%v chunk %s [%d,%d) than %d", hasDataInCache, chunkView.FileId, chunkView.Offset, chunkView.Offset+int64(chunkView.Size), len(chunkData))
return
} }
data = chunkData[chunkView.Offset : chunkView.Offset+int64(chunkView.Size)]
if !hasDataInCache {
c.chunkCache.SetChunk(chunkView.FileId, chunkData) c.chunkCache.SetChunk(chunkView.FileId, chunkData)
} }
return data, nil
return
} }
func (c *ChunkReadAt) doFetchFullChunkData(fileId string, cipherKey []byte, isGzipped bool) ([]byte, error) { func (c *ChunkReadAt) doFetchFullChunkData(fileId string, cipherKey []byte, isGzipped bool) ([]byte, error) {

156
weed/filer2/reader_at_test.go

@ -0,0 +1,156 @@
package filer2
import (
"fmt"
"io"
"math"
"strconv"
"sync"
"testing"
)
type mockChunkCache struct {
}
func (m *mockChunkCache) GetChunk(fileId string, minSize uint64) (data []byte) {
x, _ := strconv.Atoi(fileId)
data = make([]byte, minSize)
for i := 0; i < int(minSize); i++ {
data[i] = byte(x)
}
return data
}
func (m *mockChunkCache) SetChunk(fileId string, data []byte) {
}
func TestReaderAt(t *testing.T) {
visibles := []VisibleInterval{
{
start: 1,
stop: 2,
fileId: "1",
chunkSize: 9,
},
{
start: 3,
stop: 4,
fileId: "3",
chunkSize: 1,
},
{
start: 5,
stop: 6,
fileId: "5",
chunkSize: 2,
},
{
start: 7,
stop: 9,
fileId: "7",
chunkSize: 2,
},
{
start: 9,
stop: 10,
fileId: "9",
chunkSize: 2,
},
}
readerAt := &ChunkReadAt{
chunkViews: ViewFromVisibleIntervals(visibles, 0, math.MaxInt64),
lookupFileId: nil,
readerLock: sync.Mutex{},
fileSize: 10,
chunkCache: &mockChunkCache{},
}
testReadAt(t, readerAt, 0, 10, 10, nil)
testReadAt(t, readerAt, 0, 12, 10, io.EOF)
testReadAt(t, readerAt, 2, 8, 8, nil)
testReadAt(t, readerAt, 3, 6, 6, nil)
}
func testReadAt(t *testing.T, readerAt *ChunkReadAt, offset int64, size int, expected int, expectedErr error) {
data := make([]byte, size)
n, err := readerAt.ReadAt(data, offset)
for _, d := range data {
fmt.Printf("%x", d)
}
fmt.Println()
if expected != n {
t.Errorf("unexpected read size: %d, expect: %d", n, expected)
}
if err != expectedErr {
t.Errorf("unexpected read error: %v, expect: %v", err, expectedErr)
}
}
func TestReaderAt0(t *testing.T) {
visibles := []VisibleInterval{
{
start: 2,
stop: 5,
fileId: "1",
chunkSize: 9,
},
{
start: 7,
stop: 9,
fileId: "2",
chunkSize: 9,
},
}
readerAt := &ChunkReadAt{
chunkViews: ViewFromVisibleIntervals(visibles, 0, math.MaxInt64),
lookupFileId: nil,
readerLock: sync.Mutex{},
fileSize: 10,
chunkCache: &mockChunkCache{},
}
testReadAt(t, readerAt, 0, 10, 10, nil)
testReadAt(t, readerAt, 3, 16, 7, io.EOF)
testReadAt(t, readerAt, 3, 5, 5, nil)
testReadAt(t, readerAt, 11, 5, 0, io.EOF)
testReadAt(t, readerAt, 10, 5, 0, io.EOF)
}
func TestReaderAt1(t *testing.T) {
visibles := []VisibleInterval{
{
start: 2,
stop: 5,
fileId: "1",
chunkSize: 9,
},
}
readerAt := &ChunkReadAt{
chunkViews: ViewFromVisibleIntervals(visibles, 0, math.MaxInt64),
lookupFileId: nil,
readerLock: sync.Mutex{},
fileSize: 20,
chunkCache: &mockChunkCache{},
}
testReadAt(t, readerAt, 0, 20, 20, nil)
testReadAt(t, readerAt, 1, 7, 7, nil)
testReadAt(t, readerAt, 0, 1, 1, nil)
testReadAt(t, readerAt, 18, 4, 2, io.EOF)
testReadAt(t, readerAt, 12, 4, 4, nil)
testReadAt(t, readerAt, 4, 20, 16, io.EOF)
testReadAt(t, readerAt, 4, 10, 10, nil)
testReadAt(t, readerAt, 1, 10, 10, nil)
}

6
weed/filer2/stream.go

@ -32,7 +32,7 @@ func StreamContent(masterClient *wdclient.MasterClient, w io.Writer, chunks []*f
for _, chunkView := range chunkViews { for _, chunkView := range chunkViews {
urlString := fileId2Url[chunkView.FileId] urlString := fileId2Url[chunkView.FileId]
err := util.ReadUrlAsStream(urlString, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset, int(chunkView.Size), func(data []byte) {
err := util.ReadUrlAsStream(urlString+"?readDeleted=true", chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset, int(chunkView.Size), func(data []byte) {
w.Write(data) w.Write(data)
}) })
if err != nil { if err != nil {
@ -63,7 +63,7 @@ func ReadAll(masterClient *wdclient.MasterClient, chunks []*filer_pb.FileChunk)
glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err) glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err)
return nil, err return nil, err
} }
err = util.ReadUrlAsStream(urlString, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset, int(chunkView.Size), func(data []byte) {
err = util.ReadUrlAsStream(urlString+"?readDeleted=true", chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset, int(chunkView.Size), func(data []byte) {
buffer.Write(data) buffer.Write(data)
}) })
if err != nil { if err != nil {
@ -175,7 +175,7 @@ func (c *ChunkStreamReader) fetchChunkToBuffer(chunkView *ChunkView) error {
return err return err
} }
var buffer bytes.Buffer var buffer bytes.Buffer
err = util.ReadUrlAsStream(urlString, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset, int(chunkView.Size), func(data []byte) {
err = util.ReadUrlAsStream(urlString+"?readDeleted=true", chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset, int(chunkView.Size), func(data []byte) {
buffer.Write(data) buffer.Write(data)
}) })
if err != nil { if err != nil {

47
weed/filesys/dir.go

@ -63,7 +63,7 @@ func (dir *Dir) Attr(ctx context.Context, attr *fuse.Attr) error {
attr.Gid = dir.entry.Attributes.Gid attr.Gid = dir.entry.Attributes.Gid
attr.Uid = dir.entry.Attributes.Uid attr.Uid = dir.entry.Attributes.Uid
glog.V(4).Infof("dir Attr %s, attr: %+v", dir.FullPath(), attr)
glog.V(5).Infof("dir Attr %s, attr: %+v", dir.FullPath(), attr)
return nil return nil
} }
@ -101,7 +101,7 @@ func (dir *Dir) Fsync(ctx context.Context, req *fuse.FsyncRequest) error {
} }
func (dir *Dir) newFile(name string, entry *filer_pb.Entry) fs.Node { func (dir *Dir) newFile(name string, entry *filer_pb.Entry) fs.Node {
return dir.wfs.fsNodeCache.EnsureFsNode(util.NewFullPath(dir.FullPath(), name), func() fs.Node {
f := dir.wfs.fsNodeCache.EnsureFsNode(util.NewFullPath(dir.FullPath(), name), func() fs.Node {
return &File{ return &File{
Name: name, Name: name,
dir: dir, dir: dir,
@ -110,14 +110,17 @@ func (dir *Dir) newFile(name string, entry *filer_pb.Entry) fs.Node {
entryViewCache: nil, entryViewCache: nil,
} }
}) })
f.(*File).dir = dir // in case dir node was created later
return f
} }
func (dir *Dir) newDirectory(fullpath util.FullPath, entry *filer_pb.Entry) fs.Node { func (dir *Dir) newDirectory(fullpath util.FullPath, entry *filer_pb.Entry) fs.Node {
return dir.wfs.fsNodeCache.EnsureFsNode(fullpath, func() fs.Node {
d := dir.wfs.fsNodeCache.EnsureFsNode(fullpath, func() fs.Node {
return &Dir{name: entry.Name, wfs: dir.wfs, entry: entry, parent: dir} return &Dir{name: entry.Name, wfs: dir.wfs, entry: entry, parent: dir}
}) })
d.(*Dir).parent = dir // in case dir node was created later
return d
} }
func (dir *Dir) Create(ctx context.Context, req *fuse.CreateRequest, func (dir *Dir) Create(ctx context.Context, req *fuse.CreateRequest,
@ -218,7 +221,7 @@ func (dir *Dir) Mkdir(ctx context.Context, req *fuse.MkdirRequest) (fs.Node, err
func (dir *Dir) Lookup(ctx context.Context, req *fuse.LookupRequest, resp *fuse.LookupResponse) (node fs.Node, err error) { func (dir *Dir) Lookup(ctx context.Context, req *fuse.LookupRequest, resp *fuse.LookupResponse) (node fs.Node, err error) {
glog.V(4).Infof("dir Lookup %s: %s by %s", dir.FullPath(), req.Name, req.Header.String())
glog.V(5).Infof("dir Lookup %s: %s by %s", dir.FullPath(), req.Name, req.Header.String())
fullFilePath := util.NewFullPath(dir.FullPath(), req.Name) fullFilePath := util.NewFullPath(dir.FullPath(), req.Name)
dirPath := util.FullPath(dir.FullPath()) dirPath := util.FullPath(dir.FullPath())
@ -237,7 +240,7 @@ func (dir *Dir) Lookup(ctx context.Context, req *fuse.LookupRequest, resp *fuse.
return nil, fuse.ENOENT return nil, fuse.ENOENT
} }
} else { } else {
glog.V(4).Infof("dir Lookup cache hit %s", fullFilePath)
glog.V(5).Infof("dir Lookup cache hit %s", fullFilePath)
} }
if entry != nil { if entry != nil {
@ -265,7 +268,7 @@ func (dir *Dir) Lookup(ctx context.Context, req *fuse.LookupRequest, resp *fuse.
func (dir *Dir) ReadDirAll(ctx context.Context) (ret []fuse.Dirent, err error) { func (dir *Dir) ReadDirAll(ctx context.Context) (ret []fuse.Dirent, err error) {
glog.V(3).Infof("dir ReadDirAll %s", dir.FullPath())
glog.V(5).Infof("dir ReadDirAll %s", dir.FullPath())
processEachEntryFn := func(entry *filer_pb.Entry, isLast bool) error { processEachEntryFn := func(entry *filer_pb.Entry, isLast bool) error {
fullpath := util.NewFullPath(dir.FullPath(), entry.Name) fullpath := util.NewFullPath(dir.FullPath(), entry.Name)
@ -314,12 +317,8 @@ func (dir *Dir) removeOneFile(req *fuse.RemoveRequest) error {
return nil return nil
} }
dir.wfs.deleteFileChunks(entry.Chunks)
dir.wfs.fsNodeCache.DeleteFsNode(filePath)
dir.wfs.metaCache.DeleteEntry(context.Background(), filePath)
// first, ensure the filer store can correctly delete
glog.V(3).Infof("remove file: %v", req) glog.V(3).Infof("remove file: %v", req)
err = filer_pb.Remove(dir.wfs, dir.FullPath(), req.Name, false, false, false, false) err = filer_pb.Remove(dir.wfs, dir.FullPath(), req.Name, false, false, false, false)
if err != nil { if err != nil {
@ -327,34 +326,40 @@ func (dir *Dir) removeOneFile(req *fuse.RemoveRequest) error {
return fuse.ENOENT return fuse.ENOENT
} }
// then, delete meta cache and fsNode cache
dir.wfs.metaCache.DeleteEntry(context.Background(), filePath)
dir.wfs.fsNodeCache.DeleteFsNode(filePath)
// delete the chunks last
dir.wfs.deleteFileChunks(entry.Chunks)
return nil return nil
} }
func (dir *Dir) removeFolder(req *fuse.RemoveRequest) error { func (dir *Dir) removeFolder(req *fuse.RemoveRequest) error {
t := util.NewFullPath(dir.FullPath(), req.Name)
dir.wfs.fsNodeCache.DeleteFsNode(t)
dir.wfs.metaCache.DeleteEntry(context.Background(), t)
glog.V(3).Infof("remove directory entry: %v", req) glog.V(3).Infof("remove directory entry: %v", req)
err := filer_pb.Remove(dir.wfs, dir.FullPath(), req.Name, true, false, false, false) err := filer_pb.Remove(dir.wfs, dir.FullPath(), req.Name, true, false, false, false)
if err != nil { if err != nil {
glog.V(3).Infof("remove %s/%s: %v", dir.FullPath(), req.Name, err)
glog.V(0).Infof("remove %s/%s: %v", dir.FullPath(), req.Name, err)
if strings.Contains(err.Error(), "non-empty"){ if strings.Contains(err.Error(), "non-empty"){
return fuse.EEXIST return fuse.EEXIST
} }
return fuse.ENOENT return fuse.ENOENT
} }
t := util.NewFullPath(dir.FullPath(), req.Name)
dir.wfs.metaCache.DeleteEntry(context.Background(), t)
dir.wfs.fsNodeCache.DeleteFsNode(t)
return nil return nil
} }
func (dir *Dir) Setattr(ctx context.Context, req *fuse.SetattrRequest, resp *fuse.SetattrResponse) error { func (dir *Dir) Setattr(ctx context.Context, req *fuse.SetattrRequest, resp *fuse.SetattrResponse) error {
glog.V(3).Infof("%v dir setattr %+v", dir.FullPath(), req)
glog.V(4).Infof("%v dir setattr %+v", dir.FullPath(), req)
if err := dir.maybeLoadEntry(); err != nil { if err := dir.maybeLoadEntry(); err != nil {
return err return err
@ -429,7 +434,7 @@ func (dir *Dir) Listxattr(ctx context.Context, req *fuse.ListxattrRequest, resp
} }
func (dir *Dir) Forget() { func (dir *Dir) Forget() {
glog.V(3).Infof("Forget dir %s", dir.FullPath())
glog.V(5).Infof("Forget dir %s", dir.FullPath())
dir.wfs.fsNodeCache.DeleteFsNode(util.FullPath(dir.FullPath())) dir.wfs.fsNodeCache.DeleteFsNode(util.FullPath(dir.FullPath()))
} }
@ -460,7 +465,7 @@ func (dir *Dir) saveEntry() error {
glog.V(1).Infof("save dir entry: %v", request) glog.V(1).Infof("save dir entry: %v", request)
_, err := client.UpdateEntry(context.Background(), request) _, err := client.UpdateEntry(context.Background(), request)
if err != nil { if err != nil {
glog.V(0).Infof("UpdateEntry dir %s/%s: %v", parentDir, name, err)
glog.Errorf("UpdateEntry dir %s/%s: %v", parentDir, name, err)
return fuse.EIO return fuse.EIO
} }

4
weed/filesys/dir_link.go

@ -18,7 +18,7 @@ var _ = fs.NodeReadlinker(&File{})
func (dir *Dir) Symlink(ctx context.Context, req *fuse.SymlinkRequest) (fs.Node, error) { func (dir *Dir) Symlink(ctx context.Context, req *fuse.SymlinkRequest) (fs.Node, error) {
glog.V(3).Infof("Symlink: %v/%v to %v", dir.FullPath(), req.NewName, req.Target)
glog.V(4).Infof("Symlink: %v/%v to %v", dir.FullPath(), req.NewName, req.Target)
request := &filer_pb.CreateEntryRequest{ request := &filer_pb.CreateEntryRequest{
Directory: dir.FullPath(), Directory: dir.FullPath(),
@ -63,7 +63,7 @@ func (file *File) Readlink(ctx context.Context, req *fuse.ReadlinkRequest) (stri
return "", fuse.Errno(syscall.EINVAL) return "", fuse.Errno(syscall.EINVAL)
} }
glog.V(3).Infof("Readlink: %v/%v => %v", file.dir.FullPath(), file.Name, file.entry.Attributes.SymlinkTarget)
glog.V(4).Infof("Readlink: %v/%v => %v", file.dir.FullPath(), file.Name, file.entry.Attributes.SymlinkTarget)
return file.entry.Attributes.SymlinkTarget, nil return file.entry.Attributes.SymlinkTarget, nil

12
weed/filesys/dir_rename.go

@ -63,7 +63,17 @@ func (dir *Dir) Rename(ctx context.Context, req *fuse.RenameRequest, newDirector
// fmt.Printf("rename path: %v => %v\n", oldPath, newPath) // fmt.Printf("rename path: %v => %v\n", oldPath, newPath)
dir.wfs.fsNodeCache.Move(oldPath, newPath) dir.wfs.fsNodeCache.Move(oldPath, newPath)
delete(dir.wfs.handles, oldPath.AsInode())
// change file handle
dir.wfs.handlesLock.Lock()
defer dir.wfs.handlesLock.Unlock()
inodeId := oldPath.AsInode()
existingHandle, found := dir.wfs.handles[inodeId]
if !found || existingHandle == nil {
return err
}
delete(dir.wfs.handles, inodeId)
dir.wfs.handles[newPath.AsInode()] = existingHandle
return err return err
} }

25
weed/filesys/dirty_page.go

@ -25,9 +25,6 @@ func newDirtyPages(file *File) *ContinuousDirtyPages {
} }
} }
func (pages *ContinuousDirtyPages) releaseResource() {
}
var counter = int32(0) var counter = int32(0)
func (pages *ContinuousDirtyPages) AddPage(offset int64, data []byte) (chunks []*filer_pb.FileChunk, err error) { func (pages *ContinuousDirtyPages) AddPage(offset int64, data []byte) (chunks []*filer_pb.FileChunk, err error) {
@ -35,7 +32,7 @@ func (pages *ContinuousDirtyPages) AddPage(offset int64, data []byte) (chunks []
pages.lock.Lock() pages.lock.Lock()
defer pages.lock.Unlock() defer pages.lock.Unlock()
glog.V(3).Infof("%s AddPage [%d,%d)", pages.f.fullpath(), offset, offset+int64(len(data)))
glog.V(5).Infof("%s AddPage [%d,%d) of %d bytes", pages.f.fullpath(), offset, offset+int64(len(data)), pages.f.entry.Attributes.FileSize)
if len(data) > int(pages.f.wfs.option.ChunkSizeLimit) { if len(data) > int(pages.f.wfs.option.ChunkSizeLimit) {
// this is more than what buffer can hold. // this is more than what buffer can hold.
@ -121,14 +118,16 @@ func (pages *ContinuousDirtyPages) saveExistingLargestPageToStorage() (chunk *fi
return nil, false, nil return nil, false, nil
} }
fileSize := int64(pages.f.entry.Attributes.FileSize)
for { for {
chunk, err = pages.saveToStorage(maxList.ToReader(), maxList.Offset(), maxList.Size())
chunkSize := min(maxList.Size(), fileSize-maxList.Offset())
chunk, err = pages.saveToStorage(maxList.ToReader(), maxList.Offset(), chunkSize)
if err == nil { if err == nil {
hasSavedData = true hasSavedData = true
glog.V(3).Infof("%s saveToStorage [%d,%d) %s", pages.f.fullpath(), maxList.Offset(), maxList.Offset()+maxList.Size(), chunk.FileId)
glog.V(4).Infof("saveToStorage %s %s [%d,%d) of %d bytes", pages.f.fullpath(), chunk.GetFileIdString(), maxList.Offset(), maxList.Offset()+chunkSize, fileSize)
return return
} else { } else {
glog.V(0).Infof("%s saveToStorage [%d,%d): %v", pages.f.fullpath(), maxList.Offset(), maxList.Offset()+maxList.Size(), err)
glog.V(0).Infof("%s saveToStorage [%d,%d): %v", pages.f.fullpath(), maxList.Offset(), maxList.Offset()+chunkSize, err)
time.Sleep(5 * time.Second) time.Sleep(5 * time.Second)
} }
} }
@ -139,6 +138,7 @@ func (pages *ContinuousDirtyPages) saveToStorage(reader io.Reader, offset int64,
dir, _ := pages.f.fullpath().DirAndName() dir, _ := pages.f.fullpath().DirAndName()
reader = io.LimitReader(reader, size)
chunk, collection, replication, err := pages.f.wfs.saveDataAsChunk(dir)(reader, pages.f.Name, offset) chunk, collection, replication, err := pages.f.wfs.saveDataAsChunk(dir)(reader, pages.f.Name, offset)
if err != nil { if err != nil {
return nil, err return nil, err
@ -149,6 +149,13 @@ func (pages *ContinuousDirtyPages) saveToStorage(reader io.Reader, offset int64,
} }
func maxUint64(x, y uint64) uint64 {
if x > y {
return x
}
return y
}
func max(x, y int64) int64 { func max(x, y int64) int64 {
if x > y { if x > y {
return x return x
@ -162,11 +169,11 @@ func min(x, y int64) int64 {
return y return y
} }
func (pages *ContinuousDirtyPages) ReadDirtyData(data []byte, startOffset int64) (offset int64, size int) {
func (pages *ContinuousDirtyPages) ReadDirtyDataAt(data []byte, startOffset int64) (maxStop int64) {
pages.lock.Lock() pages.lock.Lock()
defer pages.lock.Unlock() defer pages.lock.Unlock()
return pages.intervals.ReadData(data, startOffset)
return pages.intervals.ReadDataAt(data, startOffset)
} }

15
weed/filesys/dirty_page_interval.go

@ -3,7 +3,6 @@ package filesys
import ( import (
"bytes" "bytes"
"io" "io"
"math"
) )
type IntervalNode struct { type IntervalNode struct {
@ -186,25 +185,15 @@ func (c *ContinuousIntervals) removeList(target *IntervalLinkedList) {
} }
func (c *ContinuousIntervals) ReadData(data []byte, startOffset int64) (offset int64, size int) {
var minOffset int64 = math.MaxInt64
var maxStop int64
func (c *ContinuousIntervals) ReadDataAt(data []byte, startOffset int64) (maxStop int64) {
for _, list := range c.lists { for _, list := range c.lists {
start := max(startOffset, list.Offset()) start := max(startOffset, list.Offset())
stop := min(startOffset+int64(len(data)), list.Offset()+list.Size()) stop := min(startOffset+int64(len(data)), list.Offset()+list.Size())
if start <= stop {
if start < stop {
list.ReadData(data[start-startOffset:], start, stop) list.ReadData(data[start-startOffset:], start, stop)
minOffset = min(minOffset, start)
maxStop = max(maxStop, stop) maxStop = max(maxStop, stop)
} }
} }
if minOffset == math.MaxInt64 {
return 0, 0
}
offset = minOffset
size = int(maxStop - offset)
return return
} }

51
weed/filesys/file.go

@ -7,12 +7,13 @@ import (
"sort" "sort"
"time" "time"
"github.com/seaweedfs/fuse"
"github.com/seaweedfs/fuse/fs"
"github.com/chrislusf/seaweedfs/weed/filer2" "github.com/chrislusf/seaweedfs/weed/filer2"
"github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/pb/filer_pb" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
"github.com/chrislusf/seaweedfs/weed/util" "github.com/chrislusf/seaweedfs/weed/util"
"github.com/seaweedfs/fuse"
"github.com/seaweedfs/fuse/fs"
) )
const blockSize = 512 const blockSize = 512
@ -35,6 +36,7 @@ type File struct {
entryViewCache []filer2.VisibleInterval entryViewCache []filer2.VisibleInterval
isOpen int isOpen int
reader io.ReaderAt reader io.ReaderAt
dirtyMetadata bool
} }
func (file *File) fullpath() util.FullPath { func (file *File) fullpath() util.FullPath {
@ -43,7 +45,7 @@ func (file *File) fullpath() util.FullPath {
func (file *File) Attr(ctx context.Context, attr *fuse.Attr) error { func (file *File) Attr(ctx context.Context, attr *fuse.Attr) error {
glog.V(4).Infof("file Attr %s, open:%v, existing attr: %+v", file.fullpath(), file.isOpen, attr)
glog.V(5).Infof("file Attr %s, open:%v, existing attr: %+v", file.fullpath(), file.isOpen, attr)
if file.isOpen <= 0 { if file.isOpen <= 0 {
if err := file.maybeLoadEntry(ctx); err != nil { if err := file.maybeLoadEntry(ctx); err != nil {
@ -54,7 +56,7 @@ func (file *File) Attr(ctx context.Context, attr *fuse.Attr) error {
attr.Inode = file.fullpath().AsInode() attr.Inode = file.fullpath().AsInode()
attr.Valid = time.Second attr.Valid = time.Second
attr.Mode = os.FileMode(file.entry.Attributes.FileMode) attr.Mode = os.FileMode(file.entry.Attributes.FileMode)
attr.Size = filer2.TotalSize(file.entry.Chunks)
attr.Size = filer2.FileSize(file.entry)
if file.isOpen > 0 { if file.isOpen > 0 {
attr.Size = file.entry.Attributes.FileSize attr.Size = file.entry.Attributes.FileSize
glog.V(4).Infof("file Attr %s, open:%v, size: %d", file.fullpath(), file.isOpen, attr.Size) glog.V(4).Infof("file Attr %s, open:%v, size: %d", file.fullpath(), file.isOpen, attr.Size)
@ -91,7 +93,7 @@ func (file *File) Open(ctx context.Context, req *fuse.OpenRequest, resp *fuse.Op
resp.Handle = fuse.HandleID(handle.handle) resp.Handle = fuse.HandleID(handle.handle)
glog.V(3).Infof("%v file open handle id = %d", file.fullpath(), handle.handle)
glog.V(4).Infof("%v file open handle id = %d", file.fullpath(), handle.handle)
return handle, nil return handle, nil
@ -99,7 +101,7 @@ func (file *File) Open(ctx context.Context, req *fuse.OpenRequest, resp *fuse.Op
func (file *File) Setattr(ctx context.Context, req *fuse.SetattrRequest, resp *fuse.SetattrResponse) error { func (file *File) Setattr(ctx context.Context, req *fuse.SetattrRequest, resp *fuse.SetattrResponse) error {
glog.V(3).Infof("%v file setattr %+v, old:%+v", file.fullpath(), req, file.entry.Attributes)
glog.V(5).Infof("%v file setattr %+v", file.fullpath(), req)
if err := file.maybeLoadEntry(ctx); err != nil { if err := file.maybeLoadEntry(ctx); err != nil {
return err return err
@ -107,49 +109,72 @@ func (file *File) Setattr(ctx context.Context, req *fuse.SetattrRequest, resp *f
if req.Valid.Size() { if req.Valid.Size() {
glog.V(3).Infof("%v file setattr set size=%v", file.fullpath(), req.Size)
glog.V(4).Infof("%v file setattr set size=%v chunks=%d", file.fullpath(), req.Size, len(file.entry.Chunks))
if req.Size < filer2.TotalSize(file.entry.Chunks) { if req.Size < filer2.TotalSize(file.entry.Chunks) {
// fmt.Printf("truncate %v \n", fullPath) // fmt.Printf("truncate %v \n", fullPath)
var chunks []*filer_pb.FileChunk var chunks []*filer_pb.FileChunk
var truncatedChunks []*filer_pb.FileChunk
for _, chunk := range file.entry.Chunks { for _, chunk := range file.entry.Chunks {
int64Size := int64(chunk.Size) int64Size := int64(chunk.Size)
if chunk.Offset+int64Size > int64(req.Size) { if chunk.Offset+int64Size > int64(req.Size) {
// this chunk is truncated
int64Size = int64(req.Size) - chunk.Offset int64Size = int64(req.Size) - chunk.Offset
}
if int64Size > 0 { if int64Size > 0 {
chunks = append(chunks, chunk) chunks = append(chunks, chunk)
glog.V(4).Infof("truncated chunk %+v from %d to %d\n", chunk.GetFileIdString(), chunk.Size, int64Size)
chunk.Size = uint64(int64Size)
} else {
glog.V(4).Infof("truncated whole chunk %+v\n", chunk.GetFileIdString())
truncatedChunks = append(truncatedChunks, chunk)
}
} }
} }
file.wfs.deleteFileChunks(truncatedChunks)
file.entry.Chunks = chunks file.entry.Chunks = chunks
file.entryViewCache = nil file.entryViewCache = nil
file.reader = nil file.reader = nil
} }
file.entry.Attributes.FileSize = req.Size file.entry.Attributes.FileSize = req.Size
file.dirtyMetadata = true
} }
if req.Valid.Mode() { if req.Valid.Mode() {
file.entry.Attributes.FileMode = uint32(req.Mode) file.entry.Attributes.FileMode = uint32(req.Mode)
file.dirtyMetadata = true
} }
if req.Valid.Uid() { if req.Valid.Uid() {
file.entry.Attributes.Uid = req.Uid file.entry.Attributes.Uid = req.Uid
file.dirtyMetadata = true
} }
if req.Valid.Gid() { if req.Valid.Gid() {
file.entry.Attributes.Gid = req.Gid file.entry.Attributes.Gid = req.Gid
file.dirtyMetadata = true
} }
if req.Valid.Crtime() { if req.Valid.Crtime() {
file.entry.Attributes.Crtime = req.Crtime.Unix() file.entry.Attributes.Crtime = req.Crtime.Unix()
file.dirtyMetadata = true
} }
if req.Valid.Mtime() { if req.Valid.Mtime() {
file.entry.Attributes.Mtime = req.Mtime.Unix() file.entry.Attributes.Mtime = req.Mtime.Unix()
file.dirtyMetadata = true
}
if req.Valid.Handle() {
// fmt.Printf("file handle => %d\n", req.Handle)
} }
if file.isOpen > 0 { if file.isOpen > 0 {
return nil return nil
} }
if !file.dirtyMetadata {
return nil
}
return file.saveEntry() return file.saveEntry()
} }
@ -205,14 +230,14 @@ func (file *File) Listxattr(ctx context.Context, req *fuse.ListxattrRequest, res
func (file *File) Fsync(ctx context.Context, req *fuse.FsyncRequest) error { func (file *File) Fsync(ctx context.Context, req *fuse.FsyncRequest) error {
// fsync works at OS level // fsync works at OS level
// write the file chunks to the filerGrpcAddress // write the file chunks to the filerGrpcAddress
glog.V(3).Infof("%s/%s fsync file %+v", file.dir.FullPath(), file.Name, req)
glog.V(4).Infof("%s/%s fsync file %+v", file.dir.FullPath(), file.Name, req)
return nil return nil
} }
func (file *File) Forget() { func (file *File) Forget() {
t := util.NewFullPath(file.dir.FullPath(), file.Name) t := util.NewFullPath(file.dir.FullPath(), file.Name)
glog.V(3).Infof("Forget file %s", t)
glog.V(5).Infof("Forget file %s", t)
file.wfs.fsNodeCache.DeleteFsNode(t) file.wfs.fsNodeCache.DeleteFsNode(t)
} }
@ -246,7 +271,7 @@ func (file *File) addChunks(chunks []*filer_pb.FileChunk) {
file.reader = nil file.reader = nil
glog.V(3).Infof("%s existing %d chunks adds %d more", file.fullpath(), len(file.entry.Chunks), len(chunks))
glog.V(4).Infof("%s existing %d chunks adds %d more", file.fullpath(), len(file.entry.Chunks), len(chunks))
file.entry.Chunks = append(file.entry.Chunks, chunks...) file.entry.Chunks = append(file.entry.Chunks, chunks...)
} }
@ -265,10 +290,10 @@ func (file *File) saveEntry() error {
Entry: file.entry, Entry: file.entry,
} }
glog.V(1).Infof("save file entry: %v", request)
glog.V(4).Infof("save file entry: %v", request)
_, err := client.UpdateEntry(context.Background(), request) _, err := client.UpdateEntry(context.Background(), request)
if err != nil { if err != nil {
glog.V(0).Infof("UpdateEntry file %s/%s: %v", file.dir.FullPath(), file.Name, err)
glog.Errorf("UpdateEntry file %s/%s: %v", file.dir.FullPath(), file.Name, err)
return fuse.EIO return fuse.EIO
} }

76
weed/filesys/filehandle.go

@ -21,7 +21,6 @@ type FileHandle struct {
// cache file has been written to // cache file has been written to
dirtyPages *ContinuousDirtyPages dirtyPages *ContinuousDirtyPages
contentType string contentType string
dirtyMetadata bool
handle uint64 handle uint64
f *File f *File
@ -40,7 +39,7 @@ func newFileHandle(file *File, uid, gid uint32) *FileHandle {
Gid: gid, Gid: gid,
} }
if fh.f.entry != nil { if fh.f.entry != nil {
fh.f.entry.Attributes.FileSize = filer2.TotalSize(fh.f.entry.Chunks)
fh.f.entry.Attributes.FileSize = filer2.FileSize(fh.f.entry)
} }
return fh return fh
} }
@ -55,38 +54,45 @@ var _ = fs.HandleReleaser(&FileHandle{})
func (fh *FileHandle) Read(ctx context.Context, req *fuse.ReadRequest, resp *fuse.ReadResponse) error { func (fh *FileHandle) Read(ctx context.Context, req *fuse.ReadRequest, resp *fuse.ReadResponse) error {
glog.V(4).Infof("%s read fh %d: [%d,%d)", fh.f.fullpath(), fh.handle, req.Offset, req.Offset+int64(req.Size))
glog.V(4).Infof("%s read fh %d: [%d,%d) size %d resp.Data cap=%d", fh.f.fullpath(), fh.handle, req.Offset, req.Offset+int64(req.Size), req.Size, cap(resp.Data))
buff := make([]byte, req.Size)
buff := resp.Data[:cap(resp.Data)]
if req.Size > cap(resp.Data) {
// should not happen
buff = make([]byte, req.Size)
}
totalRead, err := fh.readFromChunks(buff, req.Offset) totalRead, err := fh.readFromChunks(buff, req.Offset)
if err == nil { if err == nil {
dirtyOffset, dirtySize := fh.readFromDirtyPages(buff, req.Offset)
if totalRead+req.Offset < dirtyOffset+int64(dirtySize) {
totalRead = dirtyOffset + int64(dirtySize) - req.Offset
}
maxStop := fh.readFromDirtyPages(buff, req.Offset)
totalRead = max(maxStop - req.Offset, totalRead)
} }
resp.Data = buff[:totalRead]
if err != nil { if err != nil {
glog.Errorf("file handle read %s: %v", fh.f.fullpath(), err) glog.Errorf("file handle read %s: %v", fh.f.fullpath(), err)
return fuse.EIO return fuse.EIO
} }
if totalRead > int64(len(buff)) {
glog.Warningf("%s FileHandle Read %d: [%d,%d) size %d totalRead %d", fh.f.fullpath(), fh.handle, req.Offset, req.Offset+int64(req.Size), req.Size, totalRead)
totalRead = min(int64(len(buff)), totalRead)
}
resp.Data = buff[:totalRead]
return err return err
} }
func (fh *FileHandle) readFromDirtyPages(buff []byte, startOffset int64) (offset int64, size int) {
return fh.dirtyPages.ReadDirtyData(buff, startOffset)
func (fh *FileHandle) readFromDirtyPages(buff []byte, startOffset int64) (maxStop int64) {
return fh.dirtyPages.ReadDirtyDataAt(buff, startOffset)
} }
func (fh *FileHandle) readFromChunks(buff []byte, offset int64) (int64, error) { func (fh *FileHandle) readFromChunks(buff []byte, offset int64) (int64, error) {
// this value should come from the filer instead of the old f
if len(fh.f.entry.Chunks) == 0 {
fileSize := int64(filer2.FileSize(fh.f.entry))
if fileSize == 0 {
glog.V(1).Infof("empty fh %v", fh.f.fullpath()) glog.V(1).Infof("empty fh %v", fh.f.fullpath())
return 0, nil
return 0, io.EOF
} }
var chunkResolveErr error var chunkResolveErr error
@ -99,8 +105,8 @@ func (fh *FileHandle) readFromChunks(buff []byte, offset int64) (int64, error) {
} }
if fh.f.reader == nil { if fh.f.reader == nil {
chunkViews := filer2.ViewFromVisibleIntervals(fh.f.entryViewCache, 0, math.MaxInt32)
fh.f.reader = filer2.NewChunkReaderAtFromClient(fh.f.wfs, chunkViews, fh.f.wfs.chunkCache)
chunkViews := filer2.ViewFromVisibleIntervals(fh.f.entryViewCache, 0, math.MaxInt64)
fh.f.reader = filer2.NewChunkReaderAtFromClient(fh.f.wfs, chunkViews, fh.f.wfs.chunkCache, fileSize)
} }
totalRead, err := fh.f.reader.ReadAt(buff, offset) totalRead, err := fh.f.reader.ReadAt(buff, offset)
@ -113,7 +119,7 @@ func (fh *FileHandle) readFromChunks(buff []byte, offset int64) (int64, error) {
glog.Errorf("file handle read %s: %v", fh.f.fullpath(), err) glog.Errorf("file handle read %s: %v", fh.f.fullpath(), err)
} }
// glog.V(0).Infof("file handle read %s [%d,%d] %d : %v", fh.f.fullpath(), offset, offset+int64(totalRead), totalRead, err)
glog.V(4).Infof("file handle read %s [%d,%d] %d : %v", fh.f.fullpath(), offset, offset+int64(totalRead), totalRead, err)
return int64(totalRead), err return int64(totalRead), err
} }
@ -126,7 +132,7 @@ func (fh *FileHandle) Write(ctx context.Context, req *fuse.WriteRequest, resp *f
copy(data, req.Data) copy(data, req.Data)
fh.f.entry.Attributes.FileSize = uint64(max(req.Offset+int64(len(data)), int64(fh.f.entry.Attributes.FileSize))) fh.f.entry.Attributes.FileSize = uint64(max(req.Offset+int64(len(data)), int64(fh.f.entry.Attributes.FileSize)))
// glog.V(0).Infof("%v write [%d,%d)", fh.f.fullpath(), req.Offset, req.Offset+int64(len(req.Data)))
glog.V(4).Infof("%v write [%d,%d) %d", fh.f.fullpath(), req.Offset, req.Offset+int64(len(req.Data)), len(req.Data))
chunks, err := fh.dirtyPages.AddPage(req.Offset, data) chunks, err := fh.dirtyPages.AddPage(req.Offset, data)
if err != nil { if err != nil {
@ -139,14 +145,14 @@ func (fh *FileHandle) Write(ctx context.Context, req *fuse.WriteRequest, resp *f
if req.Offset == 0 { if req.Offset == 0 {
// detect mime type // detect mime type
fh.contentType = http.DetectContentType(data) fh.contentType = http.DetectContentType(data)
fh.dirtyMetadata = true
fh.f.dirtyMetadata = true
} }
if len(chunks) > 0 { if len(chunks) > 0 {
fh.f.addChunks(chunks) fh.f.addChunks(chunks)
fh.dirtyMetadata = true
fh.f.dirtyMetadata = true
} }
return nil return nil
@ -154,24 +160,28 @@ func (fh *FileHandle) Write(ctx context.Context, req *fuse.WriteRequest, resp *f
func (fh *FileHandle) Release(ctx context.Context, req *fuse.ReleaseRequest) error { func (fh *FileHandle) Release(ctx context.Context, req *fuse.ReleaseRequest) error {
glog.V(4).Infof("%v release fh %d", fh.f.fullpath(), fh.handle)
glog.V(4).Infof("Release %v fh %d", fh.f.fullpath(), fh.handle)
fh.f.isOpen-- fh.f.isOpen--
if fh.f.isOpen <= 0 { if fh.f.isOpen <= 0 {
fh.dirtyPages.releaseResource()
fh.doFlush(ctx, req.Header)
fh.f.wfs.ReleaseHandle(fh.f.fullpath(), fuse.HandleID(fh.handle)) fh.f.wfs.ReleaseHandle(fh.f.fullpath(), fuse.HandleID(fh.handle))
}
fh.f.entryViewCache = nil fh.f.entryViewCache = nil
fh.f.reader = nil fh.f.reader = nil
}
return nil return nil
} }
func (fh *FileHandle) Flush(ctx context.Context, req *fuse.FlushRequest) error { func (fh *FileHandle) Flush(ctx context.Context, req *fuse.FlushRequest) error {
return fh.doFlush(ctx, req.Header)
}
func (fh *FileHandle) doFlush(ctx context.Context, header fuse.Header) error {
// fflush works at fh level // fflush works at fh level
// send the data to the OS // send the data to the OS
glog.V(4).Infof("%s fh %d flush %v", fh.f.fullpath(), fh.handle, req)
glog.V(4).Infof("doFlush %s fh %d %v", fh.f.fullpath(), fh.handle, header)
chunks, err := fh.dirtyPages.FlushToStorage() chunks, err := fh.dirtyPages.FlushToStorage()
if err != nil { if err != nil {
@ -181,10 +191,10 @@ func (fh *FileHandle) Flush(ctx context.Context, req *fuse.FlushRequest) error {
if len(chunks) > 0 { if len(chunks) > 0 {
fh.f.addChunks(chunks) fh.f.addChunks(chunks)
fh.dirtyMetadata = true
fh.f.dirtyMetadata = true
} }
if !fh.dirtyMetadata {
if !fh.f.dirtyMetadata {
return nil return nil
} }
@ -193,10 +203,10 @@ func (fh *FileHandle) Flush(ctx context.Context, req *fuse.FlushRequest) error {
if fh.f.entry.Attributes != nil { if fh.f.entry.Attributes != nil {
fh.f.entry.Attributes.Mime = fh.contentType fh.f.entry.Attributes.Mime = fh.contentType
if fh.f.entry.Attributes.Uid == 0 { if fh.f.entry.Attributes.Uid == 0 {
fh.f.entry.Attributes.Uid = req.Uid
fh.f.entry.Attributes.Uid = header.Uid
} }
if fh.f.entry.Attributes.Gid == 0 { if fh.f.entry.Attributes.Gid == 0 {
fh.f.entry.Attributes.Gid = req.Gid
fh.f.entry.Attributes.Gid = header.Gid
} }
if fh.f.entry.Attributes.Crtime == 0 { if fh.f.entry.Attributes.Crtime == 0 {
fh.f.entry.Attributes.Crtime = time.Now().Unix() fh.f.entry.Attributes.Crtime = time.Now().Unix()
@ -212,9 +222,9 @@ func (fh *FileHandle) Flush(ctx context.Context, req *fuse.FlushRequest) error {
Entry: fh.f.entry, Entry: fh.f.entry,
} }
glog.V(3).Infof("%s set chunks: %v", fh.f.fullpath(), len(fh.f.entry.Chunks))
glog.V(4).Infof("%s set chunks: %v", fh.f.fullpath(), len(fh.f.entry.Chunks))
for i, chunk := range fh.f.entry.Chunks { for i, chunk := range fh.f.entry.Chunks {
glog.V(3).Infof("%s chunks %d: %v [%d,%d)", fh.f.fullpath(), i, chunk.FileId, chunk.Offset, chunk.Offset+int64(chunk.Size))
glog.V(4).Infof("%s chunks %d: %v [%d,%d)", fh.f.fullpath(), i, chunk.GetFileIdString(), chunk.Offset, chunk.Offset+int64(chunk.Size))
} }
chunks, garbages := filer2.CompactFileChunks(filer2.LookupFn(fh.f.wfs), fh.f.entry.Chunks) chunks, garbages := filer2.CompactFileChunks(filer2.LookupFn(fh.f.wfs), fh.f.entry.Chunks)
@ -239,14 +249,14 @@ func (fh *FileHandle) Flush(ctx context.Context, req *fuse.FlushRequest) error {
fh.f.wfs.deleteFileChunks(garbages) fh.f.wfs.deleteFileChunks(garbages)
for i, chunk := range garbages { for i, chunk := range garbages {
glog.V(3).Infof("garbage %s chunks %d: %v [%d,%d)", fh.f.fullpath(), i, chunk.FileId, chunk.Offset, chunk.Offset+int64(chunk.Size))
glog.V(4).Infof("garbage %s chunks %d: %v [%d,%d)", fh.f.fullpath(), i, chunk.GetFileIdString(), chunk.Offset, chunk.Offset+int64(chunk.Size))
} }
return nil return nil
}) })
if err == nil { if err == nil {
fh.dirtyMetadata = false
fh.f.dirtyMetadata = false
} }
if err != nil { if err != nil {

9
weed/filesys/fscache.go

@ -3,8 +3,9 @@ package filesys
import ( import (
"sync" "sync"
"github.com/chrislusf/seaweedfs/weed/util"
"github.com/seaweedfs/fuse/fs" "github.com/seaweedfs/fuse/fs"
"github.com/chrislusf/seaweedfs/weed/util"
) )
type FsCache struct { type FsCache struct {
@ -118,7 +119,6 @@ func (c *FsCache) Move(oldPath util.FullPath, newPath util.FullPath) *FsNode {
target = target.ensureChild(p) target = target.ensureChild(p)
} }
parent := target.parent parent := target.parent
src.name = target.name
if dir, ok := src.node.(*Dir); ok { if dir, ok := src.node.(*Dir); ok {
dir.name = target.name // target is not Dir, but a shortcut dir.name = target.name // target is not Dir, but a shortcut
} }
@ -132,6 +132,7 @@ func (c *FsCache) Move(oldPath util.FullPath, newPath util.FullPath) *FsNode {
target.deleteSelf() target.deleteSelf()
src.name = target.name
src.connectToParent(parent) src.connectToParent(parent)
return src return src
@ -144,11 +145,15 @@ func (n *FsNode) connectToParent(parent *FsNode) {
oldNode.deleteSelf() oldNode.deleteSelf()
} }
if dir, ok := n.node.(*Dir); ok { if dir, ok := n.node.(*Dir); ok {
if parent.node != nil {
dir.parent = parent.node.(*Dir) dir.parent = parent.node.(*Dir)
} }
}
if f, ok := n.node.(*File); ok { if f, ok := n.node.(*File); ok {
if parent.node != nil {
f.dir = parent.node.(*Dir) f.dir = parent.node.(*Dir)
} }
}
n.childrenLock.Lock() n.childrenLock.Lock()
parent.children[n.name] = n parent.children[n.name] = n
n.childrenLock.Unlock() n.childrenLock.Unlock()

21
weed/filesys/fscache_test.go

@ -94,3 +94,24 @@ func TestFsCacheMove(t *testing.T) {
} }
} }
func TestFsCacheMove2(t *testing.T) {
cache := newFsCache(nil)
cache.SetFsNode(util.FullPath("/a/b/d"), &File{Name: "dd"})
cache.SetFsNode(util.FullPath("/a/b/e"), &File{Name: "ee"})
cache.Move(util.FullPath("/a/b/d"), util.FullPath("/a/b/e"))
d := cache.GetFsNode(util.FullPath("/a/b/e"))
if d == nil {
t.Errorf("unexpected nil node!")
}
if d.(*File).Name != "e" {
t.Errorf("unexpected node!")
}
}

5
weed/filesys/meta_cache/meta_cache.go

@ -61,10 +61,15 @@ func (mc *MetaCache) AtomicUpdateEntry(ctx context.Context, oldPath util.FullPat
oldDir, _ := oldPath.DirAndName() oldDir, _ := oldPath.DirAndName()
if mc.visitedBoundary.HasVisited(util.FullPath(oldDir)) { if mc.visitedBoundary.HasVisited(util.FullPath(oldDir)) {
if oldPath != "" { if oldPath != "" {
if newEntry != nil && oldPath == newEntry.FullPath {
// skip the unnecessary deletion
// leave the update to the following InsertEntry operation
} else {
if err := mc.actualStore.DeleteEntry(ctx, oldPath); err != nil { if err := mc.actualStore.DeleteEntry(ctx, oldPath); err != nil {
return err return err
} }
} }
}
} else { } else {
// println("unknown old directory:", oldDir) // println("unknown old directory:", oldDir)
} }

2
weed/filesys/meta_cache/meta_cache_init.go

@ -14,7 +14,7 @@ func EnsureVisited(mc *MetaCache, client filer_pb.FilerClient, dirPath util.Full
mc.visitedBoundary.EnsureVisited(dirPath, func(path util.FullPath) (childDirectories []string, err error) { mc.visitedBoundary.EnsureVisited(dirPath, func(path util.FullPath) (childDirectories []string, err error) {
glog.V(2).Infof("ReadDirAllEntries %s ...", path)
glog.V(5).Infof("ReadDirAllEntries %s ...", path)
err = filer_pb.ReadDirAllEntries(client, dirPath, "", func(pbEntry *filer_pb.Entry, isLast bool) error { err = filer_pb.ReadDirAllEntries(client, dirPath, "", func(pbEntry *filer_pb.Entry, isLast bool) error {
entry := filer2.FromPbEntry(string(dirPath), pbEntry) entry := filer2.FromPbEntry(string(dirPath), pbEntry)

16
weed/filesys/wfs.go

@ -65,7 +65,7 @@ type WFS struct {
root fs.Node root fs.Node
fsNodeCache *FsCache fsNodeCache *FsCache
chunkCache *chunk_cache.ChunkCache
chunkCache *chunk_cache.TieredChunkCache
metaCache *meta_cache.MetaCache metaCache *meta_cache.MetaCache
} }
type statsCache struct { type statsCache struct {
@ -87,10 +87,7 @@ func NewSeaweedFileSystem(option *Option) *WFS {
cacheDir := path.Join(option.CacheDir, cacheUniqueId) cacheDir := path.Join(option.CacheDir, cacheUniqueId)
if option.CacheSizeMB > 0 { if option.CacheSizeMB > 0 {
os.MkdirAll(cacheDir, 0755) os.MkdirAll(cacheDir, 0755)
wfs.chunkCache = chunk_cache.NewChunkCache(256, cacheDir, option.CacheSizeMB)
grace.OnInterrupt(func() {
wfs.chunkCache.Shutdown()
})
wfs.chunkCache = chunk_cache.NewTieredChunkCache(256, cacheDir, option.CacheSizeMB)
} }
wfs.metaCache = meta_cache.NewMetaCache(path.Join(cacheDir, "meta")) wfs.metaCache = meta_cache.NewMetaCache(path.Join(cacheDir, "meta"))
@ -113,7 +110,7 @@ func (wfs *WFS) Root() (fs.Node, error) {
func (wfs *WFS) AcquireHandle(file *File, uid, gid uint32) (fileHandle *FileHandle) { func (wfs *WFS) AcquireHandle(file *File, uid, gid uint32) (fileHandle *FileHandle) {
fullpath := file.fullpath() fullpath := file.fullpath()
glog.V(4).Infof("%s AcquireHandle uid=%d gid=%d", fullpath, uid, gid)
glog.V(4).Infof("AcquireHandle %s uid=%d gid=%d", fullpath, uid, gid)
wfs.handlesLock.Lock() wfs.handlesLock.Lock()
defer wfs.handlesLock.Unlock() defer wfs.handlesLock.Unlock()
@ -127,7 +124,6 @@ func (wfs *WFS) AcquireHandle(file *File, uid, gid uint32) (fileHandle *FileHand
fileHandle = newFileHandle(file, uid, gid) fileHandle = newFileHandle(file, uid, gid)
wfs.handles[inodeId] = fileHandle wfs.handles[inodeId] = fileHandle
fileHandle.handle = inodeId fileHandle.handle = inodeId
glog.V(4).Infof("%s new fh %d", fullpath, fileHandle.handle)
return return
} }
@ -146,7 +142,7 @@ func (wfs *WFS) ReleaseHandle(fullpath util.FullPath, handleId fuse.HandleID) {
// Statfs is called to obtain file system metadata. Implements fuse.FSStatfser // Statfs is called to obtain file system metadata. Implements fuse.FSStatfser
func (wfs *WFS) Statfs(ctx context.Context, req *fuse.StatfsRequest, resp *fuse.StatfsResponse) error { func (wfs *WFS) Statfs(ctx context.Context, req *fuse.StatfsRequest, resp *fuse.StatfsResponse) error {
glog.V(4).Infof("reading fs stats: %+v", req)
glog.V(5).Infof("reading fs stats: %+v", req)
if wfs.stats.lastChecked < time.Now().Unix()-20 { if wfs.stats.lastChecked < time.Now().Unix()-20 {
@ -158,13 +154,13 @@ func (wfs *WFS) Statfs(ctx context.Context, req *fuse.StatfsRequest, resp *fuse.
Ttl: fmt.Sprintf("%ds", wfs.option.TtlSec), Ttl: fmt.Sprintf("%ds", wfs.option.TtlSec),
} }
glog.V(4).Infof("reading filer stats: %+v", request)
glog.V(5).Infof("reading filer stats: %+v", request)
resp, err := client.Statistics(context.Background(), request) resp, err := client.Statistics(context.Background(), request)
if err != nil { if err != nil {
glog.V(0).Infof("reading filer stats %v: %v", request, err) glog.V(0).Infof("reading filer stats %v: %v", request, err)
return err return err
} }
glog.V(4).Infof("read filer stats: %+v", resp)
glog.V(5).Infof("read filer stats: %+v", resp)
wfs.stats.TotalSize = resp.TotalSize wfs.stats.TotalSize = resp.TotalSize
wfs.stats.UsedSize = resp.UsedSize wfs.stats.UsedSize = resp.UsedSize

2
weed/filesys/wfs_deletion.go

@ -38,7 +38,7 @@ func (wfs *WFS) deleteFileIds(grpcDialOption grpc.DialOption, client filer_pb.Se
m := make(map[string]operation.LookupResult) m := make(map[string]operation.LookupResult)
glog.V(4).Infof("remove file lookup volume id locations: %v", vids)
glog.V(5).Infof("deleteFileIds lookup volume id locations: %v", vids)
resp, err := client.LookupVolume(context.Background(), &filer_pb.LookupVolumeRequest{ resp, err := client.LookupVolume(context.Background(), &filer_pb.LookupVolumeRequest{
VolumeIds: vids, VolumeIds: vids,
}) })

18
weed/operation/upload_content.go

@ -33,6 +33,7 @@ type UploadResult struct {
} }
func (uploadResult *UploadResult) ToPbFileChunk(fileId string, offset int64) *filer_pb.FileChunk { func (uploadResult *UploadResult) ToPbFileChunk(fileId string, offset int64) *filer_pb.FileChunk {
fid, _ := filer_pb.ToFileIdObject(fileId)
return &filer_pb.FileChunk{ return &filer_pb.FileChunk{
FileId: fileId, FileId: fileId,
Offset: offset, Offset: offset,
@ -41,6 +42,7 @@ func (uploadResult *UploadResult) ToPbFileChunk(fileId string, offset int64) *fi
ETag: uploadResult.ETag, ETag: uploadResult.ETag,
CipherKey: uploadResult.CipherKey, CipherKey: uploadResult.CipherKey,
IsCompressed: uploadResult.Gzip > 0, IsCompressed: uploadResult.Gzip > 0,
Fid: fid,
} }
} }
@ -63,7 +65,7 @@ var fileNameEscaper = strings.NewReplacer("\\", "\\\\", "\"", "\\\"")
// Upload sends a POST request to a volume server to upload the content with adjustable compression level // Upload sends a POST request to a volume server to upload the content with adjustable compression level
func UploadData(uploadUrl string, filename string, cipher bool, data []byte, isInputCompressed bool, mtype string, pairMap map[string]string, jwt security.EncodedJwt) (uploadResult *UploadResult, err error) { func UploadData(uploadUrl string, filename string, cipher bool, data []byte, isInputCompressed bool, mtype string, pairMap map[string]string, jwt security.EncodedJwt) (uploadResult *UploadResult, err error) {
uploadResult, err = doUploadData(uploadUrl, filename, cipher, data, isInputCompressed, mtype, pairMap, jwt)
uploadResult, err = retriedUploadData(uploadUrl, filename, cipher, data, isInputCompressed, mtype, pairMap, jwt)
return return
} }
@ -79,10 +81,22 @@ func doUpload(uploadUrl string, filename string, cipher bool, reader io.Reader,
err = fmt.Errorf("read input: %v", err) err = fmt.Errorf("read input: %v", err)
return return
} }
uploadResult, uploadErr := doUploadData(uploadUrl, filename, cipher, data, isInputCompressed, mtype, pairMap, jwt)
uploadResult, uploadErr := retriedUploadData(uploadUrl, filename, cipher, data, isInputCompressed, mtype, pairMap, jwt)
return uploadResult, uploadErr, data return uploadResult, uploadErr, data
} }
func retriedUploadData(uploadUrl string, filename string, cipher bool, data []byte, isInputCompressed bool, mtype string, pairMap map[string]string, jwt security.EncodedJwt) (uploadResult *UploadResult, err error) {
for i := 0; i < 3; i++ {
uploadResult, err = doUploadData(uploadUrl, filename, cipher, data, isInputCompressed, mtype, pairMap, jwt)
if err == nil {
return
} else {
glog.Warningf("uploading to %s: %v", uploadUrl, err)
}
}
return
}
func doUploadData(uploadUrl string, filename string, cipher bool, data []byte, isInputCompressed bool, mtype string, pairMap map[string]string, jwt security.EncodedJwt) (uploadResult *UploadResult, err error) { func doUploadData(uploadUrl string, filename string, cipher bool, data []byte, isInputCompressed bool, mtype string, pairMap map[string]string, jwt security.EncodedJwt) (uploadResult *UploadResult, err error) {
contentIsGzipped := isInputCompressed contentIsGzipped := isInputCompressed
shouldGzipNow := false shouldGzipNow := false

9
weed/pb/filer_pb/filer_client.go

@ -7,6 +7,7 @@ import (
"io" "io"
"math" "math"
"os" "os"
"strings"
"time" "time"
"github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/glog"
@ -82,7 +83,7 @@ func doList(filerClient FilerClient, fullDirPath util.FullPath, prefix string, f
InclusiveStartFrom: inclusive, InclusiveStartFrom: inclusive,
} }
glog.V(3).Infof("read directory: %v", request)
glog.V(5).Infof("read directory: %v", request)
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())
stream, err := client.ListEntries(ctx, request) stream, err := client.ListEntries(ctx, request)
if err != nil { if err != nil {
@ -224,9 +225,15 @@ func Remove(filerClient FilerClient, parentDirectoryPath, name string, isDeleteD
IgnoreRecursiveError: ignoreRecursiveErr, IgnoreRecursiveError: ignoreRecursiveErr,
IsFromOtherCluster: isFromOtherCluster, IsFromOtherCluster: isFromOtherCluster,
}); err != nil { }); err != nil {
if strings.Contains(err.Error(), ErrNotFound.Error()){
return nil
}
return err return err
} else { } else {
if resp.Error != "" { if resp.Error != "" {
if strings.Contains(resp.Error, ErrNotFound.Error()){
return nil
}
return errors.New(resp.Error) return errors.New(resp.Error)
} }
} }

8
weed/pb/filer_pb/filer_pb_helper.go

@ -10,7 +10,7 @@ import (
"github.com/chrislusf/seaweedfs/weed/storage/needle" "github.com/chrislusf/seaweedfs/weed/storage/needle"
) )
func toFileIdObject(fileIdStr string) (*FileId, error) {
func ToFileIdObject(fileIdStr string) (*FileId, error) {
t, err := needle.ParseFileIdFromString(fileIdStr) t, err := needle.ParseFileIdFromString(fileIdStr)
if err != nil { if err != nil {
return nil, err return nil, err
@ -43,14 +43,14 @@ func BeforeEntrySerialization(chunks []*FileChunk) {
for _, chunk := range chunks { for _, chunk := range chunks {
if chunk.FileId != "" { if chunk.FileId != "" {
if fid, err := toFileIdObject(chunk.FileId); err == nil {
if fid, err := ToFileIdObject(chunk.FileId); err == nil {
chunk.Fid = fid chunk.Fid = fid
chunk.FileId = "" chunk.FileId = ""
} }
} }
if chunk.SourceFileId != "" { if chunk.SourceFileId != "" {
if fid, err := toFileIdObject(chunk.SourceFileId); err == nil {
if fid, err := ToFileIdObject(chunk.SourceFileId); err == nil {
chunk.SourceFid = fid chunk.SourceFid = fid
chunk.SourceFileId = "" chunk.SourceFileId = ""
} }
@ -81,7 +81,7 @@ func CreateEntry(client SeaweedFilerClient, request *CreateEntryRequest) error {
return fmt.Errorf("CreateEntry: %v", err) return fmt.Errorf("CreateEntry: %v", err)
} }
if resp.Error != "" { if resp.Error != "" {
glog.V(1).Infof("create entry %s/%s %v: %v", request.Directory, request.Entry.Name, request.OExcl, err)
glog.V(1).Infof("create entry %s/%s %v: %v", request.Directory, request.Entry.Name, request.OExcl, resp.Error)
return fmt.Errorf("CreateEntry : %v", resp.Error) return fmt.Errorf("CreateEntry : %v", resp.Error)
} }
return nil return nil

2
weed/pb/filer_pb/filer_pb_helper_test.go

@ -9,7 +9,7 @@ import (
func TestFileIdSize(t *testing.T) { func TestFileIdSize(t *testing.T) {
fileIdStr := "11745,0293434534cbb9892b" fileIdStr := "11745,0293434534cbb9892b"
fid, _ := toFileIdObject(fileIdStr)
fid, _ := ToFileIdObject(fileIdStr)
bytes, _ := proto.Marshal(fid) bytes, _ := proto.Marshal(fid)
println(len(fileIdStr)) println(len(fileIdStr))

17
weed/pb/volume_server.proto

@ -37,8 +37,12 @@ service VolumeServer {
} }
rpc VolumeMarkReadonly (VolumeMarkReadonlyRequest) returns (VolumeMarkReadonlyResponse) { rpc VolumeMarkReadonly (VolumeMarkReadonlyRequest) returns (VolumeMarkReadonlyResponse) {
} }
rpc VolumeMarkWritable (VolumeMarkWritableRequest) returns (VolumeMarkWritableResponse) {
}
rpc VolumeConfigure (VolumeConfigureRequest) returns (VolumeConfigureResponse) { rpc VolumeConfigure (VolumeConfigureRequest) returns (VolumeConfigureResponse) {
} }
rpc VolumeStatus (VolumeStatusRequest) returns (VolumeStatusResponse) {
}
// copy the .idx .dat files, and mount this volume // copy the .idx .dat files, and mount this volume
rpc VolumeCopy (VolumeCopyRequest) returns (VolumeCopyResponse) { rpc VolumeCopy (VolumeCopyRequest) returns (VolumeCopyResponse) {
@ -200,6 +204,12 @@ message VolumeMarkReadonlyRequest {
message VolumeMarkReadonlyResponse { message VolumeMarkReadonlyResponse {
} }
message VolumeMarkWritableRequest {
uint32 volume_id = 1;
}
message VolumeMarkWritableResponse {
}
message VolumeConfigureRequest { message VolumeConfigureRequest {
uint32 volume_id = 1; uint32 volume_id = 1;
string replication = 2; string replication = 2;
@ -208,6 +218,13 @@ message VolumeConfigureResponse {
string error = 1; string error = 1;
} }
message VolumeStatusRequest {
uint32 volume_id = 1;
}
message VolumeStatusResponse {
bool is_read_only = 1;
}
message VolumeCopyRequest { message VolumeCopyRequest {
uint32 volume_id = 1; uint32 volume_id = 1;
string collection = 2; string collection = 2;

2227
weed/pb/volume_server_pb/volume_server.pb.go
File diff suppressed because it is too large
View File

4
weed/replication/sink/azuresink/azure_sink.go

@ -95,7 +95,7 @@ func (g *AzureSink) CreateEntry(key string, entry *filer_pb.Entry) error {
return nil return nil
} }
totalSize := filer2.TotalSize(entry.Chunks)
totalSize := filer2.FileSize(entry)
chunkViews := filer2.ViewFromChunks(g.filerSource.LookupFileId, entry.Chunks, 0, int64(totalSize)) chunkViews := filer2.ViewFromChunks(g.filerSource.LookupFileId, entry.Chunks, 0, int64(totalSize))
// Create a URL that references a to-be-created blob in your // Create a URL that references a to-be-created blob in your
@ -115,7 +115,7 @@ func (g *AzureSink) CreateEntry(key string, entry *filer_pb.Entry) error {
} }
var writeErr error var writeErr error
readErr := util.ReadUrlAsStream(fileUrl, nil, false, chunk.IsFullChunk(), chunk.Offset, int(chunk.Size), func(data []byte) {
readErr := util.ReadUrlAsStream(fileUrl+"?readDeleted=true", nil, false, chunk.IsFullChunk(), chunk.Offset, int(chunk.Size), func(data []byte) {
_, writeErr = appendBlobURL.AppendBlock(context.Background(), bytes.NewReader(data), azblob.AppendBlobAccessConditions{}, nil) _, writeErr = appendBlobURL.AppendBlock(context.Background(), bytes.NewReader(data), azblob.AppendBlobAccessConditions{}, nil)
}) })

4
weed/replication/sink/b2sink/b2_sink.go

@ -84,7 +84,7 @@ func (g *B2Sink) CreateEntry(key string, entry *filer_pb.Entry) error {
return nil return nil
} }
totalSize := filer2.TotalSize(entry.Chunks)
totalSize := filer2.FileSize(entry)
chunkViews := filer2.ViewFromChunks(g.filerSource.LookupFileId, entry.Chunks, 0, int64(totalSize)) chunkViews := filer2.ViewFromChunks(g.filerSource.LookupFileId, entry.Chunks, 0, int64(totalSize))
bucket, err := g.client.Bucket(context.Background(), g.bucket) bucket, err := g.client.Bucket(context.Background(), g.bucket)
@ -103,7 +103,7 @@ func (g *B2Sink) CreateEntry(key string, entry *filer_pb.Entry) error {
} }
var writeErr error var writeErr error
readErr := util.ReadUrlAsStream(fileUrl, nil, false, chunk.IsFullChunk(), chunk.Offset, int(chunk.Size), func(data []byte) {
readErr := util.ReadUrlAsStream(fileUrl+"?readDeleted=true", nil, false, chunk.IsFullChunk(), chunk.Offset, int(chunk.Size), func(data []byte) {
_, err := writer.Write(data) _, err := writer.Write(data)
if err != nil { if err != nil {
writeErr = err writeErr = err

4
weed/replication/sink/gcssink/gcs_sink.go

@ -89,7 +89,7 @@ func (g *GcsSink) CreateEntry(key string, entry *filer_pb.Entry) error {
return nil return nil
} }
totalSize := filer2.TotalSize(entry.Chunks)
totalSize := filer2.FileSize(entry)
chunkViews := filer2.ViewFromChunks(g.filerSource.LookupFileId, entry.Chunks, 0, int64(totalSize)) chunkViews := filer2.ViewFromChunks(g.filerSource.LookupFileId, entry.Chunks, 0, int64(totalSize))
wc := g.client.Bucket(g.bucket).Object(key).NewWriter(context.Background()) wc := g.client.Bucket(g.bucket).Object(key).NewWriter(context.Background())
@ -101,7 +101,7 @@ func (g *GcsSink) CreateEntry(key string, entry *filer_pb.Entry) error {
return err return err
} }
err = util.ReadUrlAsStream(fileUrl, nil, false, chunk.IsFullChunk(), chunk.Offset, int(chunk.Size), func(data []byte) {
err = util.ReadUrlAsStream(fileUrl+"?readDeleted=true", nil, false, chunk.IsFullChunk(), chunk.Offset, int(chunk.Size), func(data []byte) {
wc.Write(data) wc.Write(data)
}) })

2
weed/replication/sink/s3sink/s3_sink.go

@ -107,7 +107,7 @@ func (s3sink *S3Sink) CreateEntry(key string, entry *filer_pb.Entry) error {
return err return err
} }
totalSize := filer2.TotalSize(entry.Chunks)
totalSize := filer2.FileSize(entry)
chunkViews := filer2.ViewFromChunks(s3sink.filerSource.LookupFileId, entry.Chunks, 0, int64(totalSize)) chunkViews := filer2.ViewFromChunks(s3sink.filerSource.LookupFileId, entry.Chunks, 0, int64(totalSize))
parts := make([]*s3.CompletedPart, len(chunkViews)) parts := make([]*s3.CompletedPart, len(chunkViews))

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save