Browse Source

Merge pull request #2 from chrislusf/master

merge
pull/1392/head
yourchanges 4 years ago
committed by GitHub
parent
commit
64df5207db
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      README.md
  2. 2
      k8s/seaweedfs/Chart.yaml
  3. 2
      k8s/seaweedfs/values.yaml
  4. 2
      other/java/client/pom.xml
  5. 170
      other/java/client/pom.xml.deploy
  6. 2
      other/java/client/pom_debug.xml
  7. 22
      other/java/client/src/main/java/seaweedfs/client/ByteBufferPool.java
  8. 11
      other/java/client/src/main/java/seaweedfs/client/ChunkCache.java
  9. 5
      other/java/client/src/main/java/seaweedfs/client/FilerClient.java
  10. 4
      other/java/client/src/main/java/seaweedfs/client/FilerGrpcClient.java
  11. 41
      other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java
  12. 27
      other/java/client/src/main/java/seaweedfs/client/SeaweedUtil.java
  13. 20
      other/java/client/src/main/java/seaweedfs/client/SeaweedWrite.java
  14. 5
      other/java/client/src/main/proto/filer.proto
  15. 2
      other/java/hdfs2/dependency-reduced-pom.xml
  16. 2
      other/java/hdfs2/pom.xml
  17. 137
      other/java/hdfs2/src/main/java/seaweed/hdfs/ReadBuffer.java
  18. 394
      other/java/hdfs2/src/main/java/seaweed/hdfs/ReadBufferManager.java
  19. 70
      other/java/hdfs2/src/main/java/seaweed/hdfs/ReadBufferWorker.java
  20. 22
      other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedAbstractFileSystem.java
  21. 28
      other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedFileSystem.java
  22. 12
      other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedFileSystemStore.java
  23. 183
      other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedInputStream.java
  24. 89
      other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedOutputStream.java
  25. 2
      other/java/hdfs3/dependency-reduced-pom.xml
  26. 2
      other/java/hdfs3/pom.xml
  27. 137
      other/java/hdfs3/src/main/java/seaweed/hdfs/ReadBuffer.java
  28. 394
      other/java/hdfs3/src/main/java/seaweed/hdfs/ReadBufferManager.java
  29. 70
      other/java/hdfs3/src/main/java/seaweed/hdfs/ReadBufferWorker.java
  30. 22
      other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedAbstractFileSystem.java
  31. 26
      other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedFileSystem.java
  32. 12
      other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedFileSystemStore.java
  33. 183
      other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedInputStream.java
  34. 98
      other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedOutputStream.java
  35. 0
      test/data/sample.idx
  36. 111
      test/s3/basic/basic_test.go
  37. 4
      weed/command/backup.go
  38. 3
      weed/command/compact.go
  39. 2
      weed/command/download.go
  40. 4
      weed/command/export.go
  41. 2
      weed/command/filer.go
  42. 5
      weed/command/fix.go
  43. 31
      weed/command/master.go
  44. 10
      weed/command/mount_std.go
  45. 2
      weed/command/server.go
  46. 2
      weed/command/upload.go
  47. 2
      weed/command/volume.go
  48. 2
      weed/command/webdav.go
  49. 43
      weed/filer2/filer.go
  50. 11
      weed/filer2/filer_delete_entry.go
  51. 9
      weed/filer2/filer_notify.go
  52. 57
      weed/filer2/filerstore.go
  53. 43
      weed/filer2/leveldb2/leveldb2_local_store.go
  54. 46
      weed/filer2/meta_aggregator.go
  55. 37
      weed/filer2/meta_replay.go
  56. 2
      weed/filesys/meta_cache/meta_cache.go
  57. 12
      weed/filesys/meta_cache/meta_cache_init.go
  58. 4
      weed/filesys/meta_cache/meta_cache_subscribe.go
  59. 2
      weed/filesys/unimplemented.go
  60. 20
      weed/filesys/wfs.go
  61. 5
      weed/operation/needle_parse_test.go
  62. 8
      weed/operation/upload_content.go
  63. 5
      weed/pb/filer.proto
  64. 1162
      weed/pb/filer_pb/filer.pb.go
  65. 8
      weed/s3api/auth_credentials.go
  66. 4
      weed/server/common.go
  67. 23
      weed/server/filer_grpc_server_sub_meta.go
  68. 12
      weed/server/filer_server.go
  69. 12
      weed/server/master_server.go
  70. 2
      weed/storage/erasure_coding/ec_decoder.go
  71. 4
      weed/storage/erasure_coding/ec_volume_test.go
  72. 2
      weed/storage/needle_map/compact_map_perf_test.go
  73. 9
      weed/storage/store.go
  74. 5
      weed/util/bounded_tree/bounded_tree_test.go
  75. 1
      weed/util/compression.go
  76. 2
      weed/util/constants.go
  77. 20
      weed/util/file_util.go
  78. 13
      weed/util/log_buffer/log_buffer.go

2
README.md

@ -123,7 +123,7 @@ On top of the object store, optional [Filer] can support directories and POSIX a
* [Kubernetes CSI Driver][SeaweedFsCsiDriver] A Container Storage Interface (CSI) Driver. [![Docker Pulls](https://img.shields.io/docker/pulls/chrislusf/seaweedfs-csi-driver.svg?maxAge=604800)](https://hub.docker.com/r/chrislusf/seaweedfs-csi-driver/) * [Kubernetes CSI Driver][SeaweedFsCsiDriver] A Container Storage Interface (CSI) Driver. [![Docker Pulls](https://img.shields.io/docker/pulls/chrislusf/seaweedfs-csi-driver.svg?maxAge=604800)](https://hub.docker.com/r/chrislusf/seaweedfs-csi-driver/)
[Filer]: https://github.com/chrislusf/seaweedfs/wiki/Directories-and-Files [Filer]: https://github.com/chrislusf/seaweedfs/wiki/Directories-and-Files
[Mount]: https://github.com/chrislusf/seaweedfs/wiki/Mount
[Mount]: https://github.com/chrislusf/seaweedfs/wiki/FUSE-Mount
[AmazonS3API]: https://github.com/chrislusf/seaweedfs/wiki/Amazon-S3-API [AmazonS3API]: https://github.com/chrislusf/seaweedfs/wiki/Amazon-S3-API
[BackupToCloud]: https://github.com/chrislusf/seaweedfs/wiki/Backup-to-Cloud [BackupToCloud]: https://github.com/chrislusf/seaweedfs/wiki/Backup-to-Cloud
[Hadoop]: https://github.com/chrislusf/seaweedfs/wiki/Hadoop-Compatible-File-System [Hadoop]: https://github.com/chrislusf/seaweedfs/wiki/Hadoop-Compatible-File-System

2
k8s/seaweedfs/Chart.yaml

@ -1,4 +1,4 @@
apiVersion: v1 apiVersion: v1
description: SeaweedFS description: SeaweedFS
name: seaweedfs name: seaweedfs
version: 1.84
version: 1.85

2
k8s/seaweedfs/values.yaml

@ -4,7 +4,7 @@ global:
registry: "" registry: ""
repository: "" repository: ""
imageName: chrislusf/seaweedfs imageName: chrislusf/seaweedfs
imageTag: "1.84"
imageTag: "1.85"
imagePullPolicy: IfNotPresent imagePullPolicy: IfNotPresent
imagePullSecrets: imagepullsecret imagePullSecrets: imagepullsecret
restartPolicy: Always restartPolicy: Always

2
other/java/client/pom.xml

@ -5,7 +5,7 @@
<groupId>com.github.chrislusf</groupId> <groupId>com.github.chrislusf</groupId>
<artifactId>seaweedfs-client</artifactId> <artifactId>seaweedfs-client</artifactId>
<version>1.2.9</version>
<version>1.3.6</version>
<parent> <parent>
<groupId>org.sonatype.oss</groupId> <groupId>org.sonatype.oss</groupId>

170
other/java/client/pom.xml.deploy

@ -0,0 +1,170 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.github.chrislusf</groupId>
<artifactId>seaweedfs-client</artifactId>
<version>1.3.6</version>
<parent>
<groupId>org.sonatype.oss</groupId>
<artifactId>oss-parent</artifactId>
<version>9</version>
</parent>
<properties>
<protobuf.version>3.9.1</protobuf.version>
<!-- follow https://github.com/grpc/grpc-java -->
<grpc.version>1.23.0</grpc.version>
<guava.version>28.0-jre</guava.version>
</properties>
<dependencies>
<dependency>
<groupId>com.moandjiezana.toml</groupId>
<artifactId>toml4j</artifactId>
<version>0.7.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.google.protobuf/protobuf-java -->
<dependency>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
<version>${protobuf.version}</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>${guava.version}</version>
</dependency>
<dependency>
<groupId>io.grpc</groupId>
<artifactId>grpc-netty-shaded</artifactId>
<version>${grpc.version}</version>
</dependency>
<dependency>
<groupId>io.grpc</groupId>
<artifactId>grpc-protobuf</artifactId>
<version>${grpc.version}</version>
</dependency>
<dependency>
<groupId>io.grpc</groupId>
<artifactId>grpc-stub</artifactId>
<version>${grpc.version}</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.25</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpmime</artifactId>
<version>4.5.6</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
</dependencies>
<distributionManagement>
<snapshotRepository>
<id>ossrh</id>
<url>https://oss.sonatype.org/content/repositories/snapshots</url>
</snapshotRepository>
</distributionManagement>
<build>
<extensions>
<extension>
<groupId>kr.motd.maven</groupId>
<artifactId>os-maven-plugin</artifactId>
<version>1.6.2</version>
</extension>
</extensions>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>8</source>
<target>8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.xolstice.maven.plugins</groupId>
<artifactId>protobuf-maven-plugin</artifactId>
<version>0.6.1</version>
<configuration>
<protocArtifact>com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier}
</protocArtifact>
<pluginId>grpc-java</pluginId>
<pluginArtifact>io.grpc:protoc-gen-grpc-java:${grpc.version}:exe:${os.detected.classifier}
</pluginArtifact>
</configuration>
<executions>
<execution>
<goals>
<goal>compile</goal>
<goal>compile-custom</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-gpg-plugin</artifactId>
<version>1.5</version>
<executions>
<execution>
<id>sign-artifacts</id>
<phase>verify</phase>
<goals>
<goal>sign</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.sonatype.plugins</groupId>
<artifactId>nexus-staging-maven-plugin</artifactId>
<version>1.6.7</version>
<extensions>true</extensions>
<configuration>
<serverId>ossrh</serverId>
<nexusUrl>https://oss.sonatype.org/</nexusUrl>
<autoReleaseAfterClose>true</autoReleaseAfterClose>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>2.2.1</version>
<executions>
<execution>
<id>attach-sources</id>
<goals>
<goal>jar-no-fork</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>2.9.1</version>
<executions>
<execution>
<id>attach-javadocs</id>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

2
other/java/client/pom_debug.xml

@ -5,7 +5,7 @@
<groupId>com.github.chrislusf</groupId> <groupId>com.github.chrislusf</groupId>
<artifactId>seaweedfs-client</artifactId> <artifactId>seaweedfs-client</artifactId>
<version>1.2.9</version>
<version>1.3.6</version>
<parent> <parent>
<groupId>org.sonatype.oss</groupId> <groupId>org.sonatype.oss</groupId>

22
other/java/client/src/main/java/seaweedfs/client/ByteBufferPool.java

@ -0,0 +1,22 @@
package seaweedfs.client;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
public class ByteBufferPool {
static List<ByteBuffer> bufferList = new ArrayList<>();
public static synchronized ByteBuffer request(int bufferSize) {
if (bufferList.isEmpty()) {
return ByteBuffer.allocate(bufferSize);
}
return bufferList.remove(bufferList.size()-1);
}
public static synchronized void release(ByteBuffer obj) {
bufferList.add(obj);
}
}

11
other/java/client/src/main/java/seaweedfs/client/ChunkCache.java

@ -7,9 +7,12 @@ import java.util.concurrent.TimeUnit;
public class ChunkCache { public class ChunkCache {
private final Cache<String, byte[]> cache;
private Cache<String, byte[]> cache = null;
public ChunkCache(int maxEntries) { public ChunkCache(int maxEntries) {
if (maxEntries == 0) {
return;
}
this.cache = CacheBuilder.newBuilder() this.cache = CacheBuilder.newBuilder()
.maximumSize(maxEntries) .maximumSize(maxEntries)
.expireAfterAccess(1, TimeUnit.HOURS) .expireAfterAccess(1, TimeUnit.HOURS)
@ -17,10 +20,16 @@ public class ChunkCache {
} }
public byte[] getChunk(String fileId) { public byte[] getChunk(String fileId) {
if (this.cache == null) {
return null;
}
return this.cache.getIfPresent(fileId); return this.cache.getIfPresent(fileId);
} }
public void setChunk(String fileId, byte[] data) { public void setChunk(String fileId, byte[] data) {
if (this.cache == null) {
return;
}
this.cache.put(fileId, data); this.cache.put(fileId, data);
} }

5
other/java/client/src/main/java/seaweedfs/client/FilerClient.java

@ -156,7 +156,7 @@ public class FilerClient {
List<FilerProto.Entry> results = new ArrayList<FilerProto.Entry>(); List<FilerProto.Entry> results = new ArrayList<FilerProto.Entry>();
String lastFileName = ""; String lastFileName = "";
for (int limit = Integer.MAX_VALUE; limit > 0; ) { for (int limit = Integer.MAX_VALUE; limit > 0; ) {
List<FilerProto.Entry> t = listEntries(path, "", lastFileName, 1024);
List<FilerProto.Entry> t = listEntries(path, "", lastFileName, 1024, false);
if (t == null) { if (t == null) {
break; break;
} }
@ -173,11 +173,12 @@ public class FilerClient {
return results; return results;
} }
public List<FilerProto.Entry> listEntries(String path, String entryPrefix, String lastEntryName, int limit) {
public List<FilerProto.Entry> listEntries(String path, String entryPrefix, String lastEntryName, int limit, boolean includeLastEntry) {
Iterator<FilerProto.ListEntriesResponse> iter = filerGrpcClient.getBlockingStub().listEntries(FilerProto.ListEntriesRequest.newBuilder() Iterator<FilerProto.ListEntriesResponse> iter = filerGrpcClient.getBlockingStub().listEntries(FilerProto.ListEntriesRequest.newBuilder()
.setDirectory(path) .setDirectory(path)
.setPrefix(entryPrefix) .setPrefix(entryPrefix)
.setStartFromFileName(lastEntryName) .setStartFromFileName(lastEntryName)
.setInclusiveStartFrom(includeLastEntry)
.setLimit(limit) .setLimit(limit)
.build()); .build());
List<FilerProto.Entry> entries = new ArrayList<>(); List<FilerProto.Entry> entries = new ArrayList<>();

4
other/java/client/src/main/java/seaweedfs/client/FilerGrpcClient.java

@ -39,8 +39,10 @@ public class FilerGrpcClient {
public FilerGrpcClient(String host, int grpcPort, SslContext sslContext) { public FilerGrpcClient(String host, int grpcPort, SslContext sslContext) {
this(sslContext == null ? this(sslContext == null ?
ManagedChannelBuilder.forAddress(host, grpcPort).usePlaintext() :
ManagedChannelBuilder.forAddress(host, grpcPort).usePlaintext()
.maxInboundMessageSize(1024 * 1024 * 1024) :
NettyChannelBuilder.forAddress(host, grpcPort) NettyChannelBuilder.forAddress(host, grpcPort)
.maxInboundMessageSize(1024 * 1024 * 1024)
.negotiationType(NegotiationType.TLS) .negotiationType(NegotiationType.TLS)
.sslContext(sslContext)); .sslContext(sslContext));

41
other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java

@ -4,6 +4,7 @@ import org.apache.http.HttpEntity;
import org.apache.http.HttpHeaders; import org.apache.http.HttpHeaders;
import org.apache.http.HttpResponse; import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient; import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.util.EntityUtils; import org.apache.http.util.EntityUtils;
@ -18,7 +19,7 @@ public class SeaweedRead {
private static final Logger LOG = LoggerFactory.getLogger(SeaweedRead.class); private static final Logger LOG = LoggerFactory.getLogger(SeaweedRead.class);
static ChunkCache chunkCache = new ChunkCache(1000);
static ChunkCache chunkCache = new ChunkCache(16);
// returns bytesRead // returns bytesRead
public static long read(FilerGrpcClient filerGrpcClient, List<VisibleInterval> visibleIntervals, public static long read(FilerGrpcClient filerGrpcClient, List<VisibleInterval> visibleIntervals,
@ -78,7 +79,6 @@ public class SeaweedRead {
private static byte[] doFetchFullChunkData(ChunkView chunkView, FilerProto.Locations locations) throws IOException { private static byte[] doFetchFullChunkData(ChunkView chunkView, FilerProto.Locations locations) throws IOException {
HttpClient client = new DefaultHttpClient();
HttpGet request = new HttpGet( HttpGet request = new HttpGet(
String.format("http://%s/%s", locations.getLocations(0).getUrl(), chunkView.fileId)); String.format("http://%s/%s", locations.getLocations(0).getUrl(), chunkView.fileId));
@ -86,20 +86,21 @@ public class SeaweedRead {
byte[] data = null; byte[] data = null;
CloseableHttpResponse response = SeaweedUtil.getClosableHttpClient().execute(request);
try { try {
HttpResponse response = client.execute(request);
HttpEntity entity = response.getEntity(); HttpEntity entity = response.getEntity();
data = EntityUtils.toByteArray(entity); data = EntityUtils.toByteArray(entity);
EntityUtils.consume(entity);
} finally { } finally {
if (client instanceof Closeable) {
Closeable t = (Closeable) client;
t.close();
}
response.close();
request.releaseConnection();
} }
if (chunkView.isGzipped) {
if (chunkView.isCompressed) {
data = Gzip.decompress(data); data = Gzip.decompress(data);
} }
@ -129,7 +130,7 @@ public class SeaweedRead {
offset, offset,
isFullChunk, isFullChunk,
chunk.cipherKey, chunk.cipherKey,
chunk.isGzipped
chunk.isCompressed
)); ));
offset = Math.min(chunk.stop, stop); offset = Math.min(chunk.stop, stop);
} }
@ -165,7 +166,7 @@ public class SeaweedRead {
chunk.getMtime(), chunk.getMtime(),
true, true,
chunk.getCipherKey().toByteArray(), chunk.getCipherKey().toByteArray(),
chunk.getIsGzipped()
chunk.getIsCompressed()
); );
// easy cases to speed up // easy cases to speed up
@ -187,7 +188,7 @@ public class SeaweedRead {
v.modifiedTime, v.modifiedTime,
false, false,
v.cipherKey, v.cipherKey,
v.isGzipped
v.isCompressed
)); ));
} }
long chunkStop = chunk.getOffset() + chunk.getSize(); long chunkStop = chunk.getOffset() + chunk.getSize();
@ -199,7 +200,7 @@ public class SeaweedRead {
v.modifiedTime, v.modifiedTime,
false, false,
v.cipherKey, v.cipherKey,
v.isGzipped
v.isCompressed
)); ));
} }
if (chunkStop <= v.start || v.stop <= chunk.getOffset()) { if (chunkStop <= v.start || v.stop <= chunk.getOffset()) {
@ -247,16 +248,16 @@ public class SeaweedRead {
public final String fileId; public final String fileId;
public final boolean isFullChunk; public final boolean isFullChunk;
public final byte[] cipherKey; public final byte[] cipherKey;
public final boolean isGzipped;
public final boolean isCompressed;
public VisibleInterval(long start, long stop, String fileId, long modifiedTime, boolean isFullChunk, byte[] cipherKey, boolean isGzipped) {
public VisibleInterval(long start, long stop, String fileId, long modifiedTime, boolean isFullChunk, byte[] cipherKey, boolean isCompressed) {
this.start = start; this.start = start;
this.stop = stop; this.stop = stop;
this.modifiedTime = modifiedTime; this.modifiedTime = modifiedTime;
this.fileId = fileId; this.fileId = fileId;
this.isFullChunk = isFullChunk; this.isFullChunk = isFullChunk;
this.cipherKey = cipherKey; this.cipherKey = cipherKey;
this.isGzipped = isGzipped;
this.isCompressed = isCompressed;
} }
@Override @Override
@ -268,7 +269,7 @@ public class SeaweedRead {
", fileId='" + fileId + '\'' + ", fileId='" + fileId + '\'' +
", isFullChunk=" + isFullChunk + ", isFullChunk=" + isFullChunk +
", cipherKey=" + Arrays.toString(cipherKey) + ", cipherKey=" + Arrays.toString(cipherKey) +
", isGzipped=" + isGzipped +
", isCompressed=" + isCompressed +
'}'; '}';
} }
} }
@ -280,16 +281,16 @@ public class SeaweedRead {
public final long logicOffset; public final long logicOffset;
public final boolean isFullChunk; public final boolean isFullChunk;
public final byte[] cipherKey; public final byte[] cipherKey;
public final boolean isGzipped;
public final boolean isCompressed;
public ChunkView(String fileId, long offset, long size, long logicOffset, boolean isFullChunk, byte[] cipherKey, boolean isGzipped) {
public ChunkView(String fileId, long offset, long size, long logicOffset, boolean isFullChunk, byte[] cipherKey, boolean isCompressed) {
this.fileId = fileId; this.fileId = fileId;
this.offset = offset; this.offset = offset;
this.size = size; this.size = size;
this.logicOffset = logicOffset; this.logicOffset = logicOffset;
this.isFullChunk = isFullChunk; this.isFullChunk = isFullChunk;
this.cipherKey = cipherKey; this.cipherKey = cipherKey;
this.isGzipped = isGzipped;
this.isCompressed = isCompressed;
} }
@Override @Override
@ -301,7 +302,7 @@ public class SeaweedRead {
", logicOffset=" + logicOffset + ", logicOffset=" + logicOffset +
", isFullChunk=" + isFullChunk + ", isFullChunk=" + isFullChunk +
", cipherKey=" + Arrays.toString(cipherKey) + ", cipherKey=" + Arrays.toString(cipherKey) +
", isGzipped=" + isGzipped +
", isCompressed=" + isCompressed +
'}'; '}';
} }
} }

27
other/java/client/src/main/java/seaweedfs/client/SeaweedUtil.java

@ -0,0 +1,27 @@
package seaweedfs.client;
import org.apache.http.impl.DefaultConnectionReuseStrategy;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.DefaultConnectionKeepAliveStrategy;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
public class SeaweedUtil {
static PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager();
static {
// Increase max total connection to 200
cm.setMaxTotal(200);
// Increase default max connection per route to 20
cm.setDefaultMaxPerRoute(20);
}
public static CloseableHttpClient getClosableHttpClient() {
return HttpClientBuilder.create()
.setConnectionManager(cm)
.setConnectionReuseStrategy(DefaultConnectionReuseStrategy.INSTANCE)
.setKeepAliveStrategy(DefaultConnectionKeepAliveStrategy.INSTANCE)
.build();
}
}

20
other/java/client/src/main/java/seaweedfs/client/SeaweedWrite.java

@ -3,10 +3,11 @@ package seaweedfs.client;
import com.google.protobuf.ByteString; import com.google.protobuf.ByteString;
import org.apache.http.HttpResponse; import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient; import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost; import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.mime.HttpMultipartMode; import org.apache.http.entity.mime.HttpMultipartMode;
import org.apache.http.entity.mime.MultipartEntityBuilder; import org.apache.http.entity.mime.MultipartEntityBuilder;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.util.EntityUtils;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.Closeable; import java.io.Closeable;
@ -16,7 +17,7 @@ import java.security.SecureRandom;
public class SeaweedWrite { public class SeaweedWrite {
private static SecureRandom random = new SecureRandom();
private static final SecureRandom random = new SecureRandom();
public static void writeData(FilerProto.Entry.Builder entry, public static void writeData(FilerProto.Entry.Builder entry,
final String replication, final String replication,
@ -63,7 +64,7 @@ public class SeaweedWrite {
public static void writeMeta(final FilerGrpcClient filerGrpcClient, public static void writeMeta(final FilerGrpcClient filerGrpcClient,
final String parentDirectory, final FilerProto.Entry.Builder entry) { final String parentDirectory, final FilerProto.Entry.Builder entry) {
synchronized (entry){
synchronized (entry) {
filerGrpcClient.getBlockingStub().createEntry( filerGrpcClient.getBlockingStub().createEntry(
FilerProto.CreateEntryRequest.newBuilder() FilerProto.CreateEntryRequest.newBuilder()
.setDirectory(parentDirectory) .setDirectory(parentDirectory)
@ -79,8 +80,6 @@ public class SeaweedWrite {
final long bytesOffset, final long bytesLength, final long bytesOffset, final long bytesLength,
byte[] cipherKey) throws IOException { byte[] cipherKey) throws IOException {
HttpClient client = new DefaultHttpClient();
InputStream inputStream = null; InputStream inputStream = null;
if (cipherKey == null || cipherKey.length == 0) { if (cipherKey == null || cipherKey.length == 0) {
inputStream = new ByteArrayInputStream(bytes, (int) bytesOffset, (int) bytesLength); inputStream = new ByteArrayInputStream(bytes, (int) bytesOffset, (int) bytesLength);
@ -103,8 +102,9 @@ public class SeaweedWrite {
.addBinaryBody("upload", inputStream) .addBinaryBody("upload", inputStream)
.build()); .build());
CloseableHttpResponse response = SeaweedUtil.getClosableHttpClient().execute(post);
try { try {
HttpResponse response = client.execute(post);
String etag = response.getLastHeader("ETag").getValue(); String etag = response.getLastHeader("ETag").getValue();
@ -112,12 +112,12 @@ public class SeaweedWrite {
etag = etag.substring(1, etag.length() - 1); etag = etag.substring(1, etag.length() - 1);
} }
EntityUtils.consume(response.getEntity());
return etag; return etag;
} finally { } finally {
if (client instanceof Closeable) {
Closeable t = (Closeable) client;
t.close();
}
response.close();
post.releaseConnection();
} }
} }

5
other/java/client/src/main/proto/filer.proto

@ -115,6 +115,11 @@ message FileChunk {
FileId source_fid = 8; FileId source_fid = 8;
bytes cipher_key = 9; bytes cipher_key = 9;
bool is_compressed = 10; bool is_compressed = 10;
bool is_chunk_manifest = 11; // content is a list of FileChunks
}
message FileChunkManifest {
repeated FileChunk chunks = 1;
} }
message FileId { message FileId {

2
other/java/hdfs2/dependency-reduced-pom.xml

@ -127,7 +127,7 @@
</snapshotRepository> </snapshotRepository>
</distributionManagement> </distributionManagement>
<properties> <properties>
<seaweedfs.client.version>1.2.9</seaweedfs.client.version>
<seaweedfs.client.version>1.3.6</seaweedfs.client.version>
<hadoop.version>2.9.2</hadoop.version> <hadoop.version>2.9.2</hadoop.version>
</properties> </properties>
</project> </project>

2
other/java/hdfs2/pom.xml

@ -5,7 +5,7 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<properties> <properties>
<seaweedfs.client.version>1.2.9</seaweedfs.client.version>
<seaweedfs.client.version>1.3.6</seaweedfs.client.version>
<hadoop.version>2.9.2</hadoop.version> <hadoop.version>2.9.2</hadoop.version>
</properties> </properties>

137
other/java/hdfs2/src/main/java/seaweed/hdfs/ReadBuffer.java

@ -1,137 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweed.hdfs;
import java.util.concurrent.CountDownLatch;
class ReadBuffer {
private SeaweedInputStream stream;
private long offset; // offset within the file for the buffer
private int length; // actual length, set after the buffer is filles
private int requestedLength; // requested length of the read
private byte[] buffer; // the buffer itself
private int bufferindex = -1; // index in the buffers array in Buffer manager
private ReadBufferStatus status; // status of the buffer
private CountDownLatch latch = null; // signaled when the buffer is done reading, so any client
// waiting on this buffer gets unblocked
// fields to help with eviction logic
private long timeStamp = 0; // tick at which buffer became available to read
private boolean isFirstByteConsumed = false;
private boolean isLastByteConsumed = false;
private boolean isAnyByteConsumed = false;
public SeaweedInputStream getStream() {
return stream;
}
public void setStream(SeaweedInputStream stream) {
this.stream = stream;
}
public long getOffset() {
return offset;
}
public void setOffset(long offset) {
this.offset = offset;
}
public int getLength() {
return length;
}
public void setLength(int length) {
this.length = length;
}
public int getRequestedLength() {
return requestedLength;
}
public void setRequestedLength(int requestedLength) {
this.requestedLength = requestedLength;
}
public byte[] getBuffer() {
return buffer;
}
public void setBuffer(byte[] buffer) {
this.buffer = buffer;
}
public int getBufferindex() {
return bufferindex;
}
public void setBufferindex(int bufferindex) {
this.bufferindex = bufferindex;
}
public ReadBufferStatus getStatus() {
return status;
}
public void setStatus(ReadBufferStatus status) {
this.status = status;
}
public CountDownLatch getLatch() {
return latch;
}
public void setLatch(CountDownLatch latch) {
this.latch = latch;
}
public long getTimeStamp() {
return timeStamp;
}
public void setTimeStamp(long timeStamp) {
this.timeStamp = timeStamp;
}
public boolean isFirstByteConsumed() {
return isFirstByteConsumed;
}
public void setFirstByteConsumed(boolean isFirstByteConsumed) {
this.isFirstByteConsumed = isFirstByteConsumed;
}
public boolean isLastByteConsumed() {
return isLastByteConsumed;
}
public void setLastByteConsumed(boolean isLastByteConsumed) {
this.isLastByteConsumed = isLastByteConsumed;
}
public boolean isAnyByteConsumed() {
return isAnyByteConsumed;
}
public void setAnyByteConsumed(boolean isAnyByteConsumed) {
this.isAnyByteConsumed = isAnyByteConsumed;
}
}

394
other/java/hdfs2/src/main/java/seaweed/hdfs/ReadBufferManager.java

@ -1,394 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweed.hdfs;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Collection;
import java.util.LinkedList;
import java.util.Queue;
import java.util.Stack;
import java.util.concurrent.CountDownLatch;
/**
* The Read Buffer Manager for Rest AbfsClient.
*/
final class ReadBufferManager {
private static final Logger LOGGER = LoggerFactory.getLogger(ReadBufferManager.class);
private static final int NUM_BUFFERS = 16;
private static final int BLOCK_SIZE = 4 * 1024 * 1024;
private static final int NUM_THREADS = 8;
private static final int THRESHOLD_AGE_MILLISECONDS = 3000; // have to see if 3 seconds is a good threshold
private Thread[] threads = new Thread[NUM_THREADS];
private byte[][] buffers; // array of byte[] buffers, to hold the data that is read
private Stack<Integer> freeList = new Stack<>(); // indices in buffers[] array that are available
private Queue<ReadBuffer> readAheadQueue = new LinkedList<>(); // queue of requests that are not picked up by any worker thread yet
private LinkedList<ReadBuffer> inProgressList = new LinkedList<>(); // requests being processed by worker threads
private LinkedList<ReadBuffer> completedReadList = new LinkedList<>(); // buffers available for reading
private static final ReadBufferManager BUFFER_MANAGER; // singleton, initialized in static initialization block
static {
BUFFER_MANAGER = new ReadBufferManager();
BUFFER_MANAGER.init();
}
static ReadBufferManager getBufferManager() {
return BUFFER_MANAGER;
}
private void init() {
buffers = new byte[NUM_BUFFERS][];
for (int i = 0; i < NUM_BUFFERS; i++) {
buffers[i] = new byte[BLOCK_SIZE]; // same buffers are reused. The byte array never goes back to GC
freeList.add(i);
}
for (int i = 0; i < NUM_THREADS; i++) {
Thread t = new Thread(new ReadBufferWorker(i));
t.setDaemon(true);
threads[i] = t;
t.setName("SeaweedFS-prefetch-" + i);
t.start();
}
ReadBufferWorker.UNLEASH_WORKERS.countDown();
}
// hide instance constructor
private ReadBufferManager() {
}
/*
*
* SeaweedInputStream-facing methods
*
*/
/**
* {@link SeaweedInputStream} calls this method to queue read-aheads.
*
* @param stream The {@link SeaweedInputStream} for which to do the read-ahead
* @param requestedOffset The offset in the file which shoukd be read
* @param requestedLength The length to read
*/
void queueReadAhead(final SeaweedInputStream stream, final long requestedOffset, final int requestedLength) {
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("Start Queueing readAhead for {} offset {} length {}",
stream.getPath(), requestedOffset, requestedLength);
}
ReadBuffer buffer;
synchronized (this) {
if (isAlreadyQueued(stream, requestedOffset)) {
return; // already queued, do not queue again
}
if (freeList.isEmpty() && !tryEvict()) {
return; // no buffers available, cannot queue anything
}
buffer = new ReadBuffer();
buffer.setStream(stream);
buffer.setOffset(requestedOffset);
buffer.setLength(0);
buffer.setRequestedLength(requestedLength);
buffer.setStatus(ReadBufferStatus.NOT_AVAILABLE);
buffer.setLatch(new CountDownLatch(1));
Integer bufferIndex = freeList.pop(); // will return a value, since we have checked size > 0 already
buffer.setBuffer(buffers[bufferIndex]);
buffer.setBufferindex(bufferIndex);
readAheadQueue.add(buffer);
notifyAll();
}
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("Done q-ing readAhead for file {} offset {} buffer idx {}",
stream.getPath(), requestedOffset, buffer.getBufferindex());
}
}
/**
* {@link SeaweedInputStream} calls this method read any bytes already available in a buffer (thereby saving a
* remote read). This returns the bytes if the data already exists in buffer. If there is a buffer that is reading
* the requested offset, then this method blocks until that read completes. If the data is queued in a read-ahead
* but not picked up by a worker thread yet, then it cancels that read-ahead and reports cache miss. This is because
* depending on worker thread availability, the read-ahead may take a while - the calling thread can do it's own
* read to get the data faster (copmared to the read waiting in queue for an indeterminate amount of time).
*
* @param stream the file to read bytes for
* @param position the offset in the file to do a read for
* @param length the length to read
* @param buffer the buffer to read data into. Note that the buffer will be written into from offset 0.
* @return the number of bytes read
*/
int getBlock(final SeaweedInputStream stream, final long position, final int length, final byte[] buffer) {
// not synchronized, so have to be careful with locking
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("getBlock for file {} position {} thread {}",
stream.getPath(), position, Thread.currentThread().getName());
}
waitForProcess(stream, position);
int bytesRead = 0;
synchronized (this) {
bytesRead = getBlockFromCompletedQueue(stream, position, length, buffer);
}
if (bytesRead > 0) {
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("Done read from Cache for {} position {} length {}",
stream.getPath(), position, bytesRead);
}
return bytesRead;
}
// otherwise, just say we got nothing - calling thread can do its own read
return 0;
}
/*
*
* Internal methods
*
*/
private void waitForProcess(final SeaweedInputStream stream, final long position) {
ReadBuffer readBuf;
synchronized (this) {
clearFromReadAheadQueue(stream, position);
readBuf = getFromList(inProgressList, stream, position);
}
if (readBuf != null) { // if in in-progress queue, then block for it
try {
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("got a relevant read buffer for file {} offset {} buffer idx {}",
stream.getPath(), readBuf.getOffset(), readBuf.getBufferindex());
}
readBuf.getLatch().await(); // blocking wait on the caller stream's thread
// Note on correctness: readBuf gets out of inProgressList only in 1 place: after worker thread
// is done processing it (in doneReading). There, the latch is set after removing the buffer from
// inProgressList. So this latch is safe to be outside the synchronized block.
// Putting it in synchronized would result in a deadlock, since this thread would be holding the lock
// while waiting, so no one will be able to change any state. If this becomes more complex in the future,
// then the latch cane be removed and replaced with wait/notify whenever inProgressList is touched.
} catch (InterruptedException ex) {
Thread.currentThread().interrupt();
}
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("latch done for file {} buffer idx {} length {}",
stream.getPath(), readBuf.getBufferindex(), readBuf.getLength());
}
}
}
/**
* If any buffer in the completedlist can be reclaimed then reclaim it and return the buffer to free list.
* The objective is to find just one buffer - there is no advantage to evicting more than one.
*
* @return whether the eviction succeeeded - i.e., were we able to free up one buffer
*/
private synchronized boolean tryEvict() {
ReadBuffer nodeToEvict = null;
if (completedReadList.size() <= 0) {
return false; // there are no evict-able buffers
}
// first, try buffers where all bytes have been consumed (approximated as first and last bytes consumed)
for (ReadBuffer buf : completedReadList) {
if (buf.isFirstByteConsumed() && buf.isLastByteConsumed()) {
nodeToEvict = buf;
break;
}
}
if (nodeToEvict != null) {
return evict(nodeToEvict);
}
// next, try buffers where any bytes have been consumed (may be a bad idea? have to experiment and see)
for (ReadBuffer buf : completedReadList) {
if (buf.isAnyByteConsumed()) {
nodeToEvict = buf;
break;
}
}
if (nodeToEvict != null) {
return evict(nodeToEvict);
}
// next, try any old nodes that have not been consumed
long earliestBirthday = Long.MAX_VALUE;
for (ReadBuffer buf : completedReadList) {
if (buf.getTimeStamp() < earliestBirthday) {
nodeToEvict = buf;
earliestBirthday = buf.getTimeStamp();
}
}
if ((currentTimeMillis() - earliestBirthday > THRESHOLD_AGE_MILLISECONDS) && (nodeToEvict != null)) {
return evict(nodeToEvict);
}
// nothing can be evicted
return false;
}
private boolean evict(final ReadBuffer buf) {
freeList.push(buf.getBufferindex());
completedReadList.remove(buf);
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("Evicting buffer idx {}; was used for file {} offset {} length {}",
buf.getBufferindex(), buf.getStream().getPath(), buf.getOffset(), buf.getLength());
}
return true;
}
private boolean isAlreadyQueued(final SeaweedInputStream stream, final long requestedOffset) {
// returns true if any part of the buffer is already queued
return (isInList(readAheadQueue, stream, requestedOffset)
|| isInList(inProgressList, stream, requestedOffset)
|| isInList(completedReadList, stream, requestedOffset));
}
private boolean isInList(final Collection<ReadBuffer> list, final SeaweedInputStream stream, final long requestedOffset) {
return (getFromList(list, stream, requestedOffset) != null);
}
private ReadBuffer getFromList(final Collection<ReadBuffer> list, final SeaweedInputStream stream, final long requestedOffset) {
for (ReadBuffer buffer : list) {
if (buffer.getStream() == stream) {
if (buffer.getStatus() == ReadBufferStatus.AVAILABLE
&& requestedOffset >= buffer.getOffset()
&& requestedOffset < buffer.getOffset() + buffer.getLength()) {
return buffer;
} else if (requestedOffset >= buffer.getOffset()
&& requestedOffset < buffer.getOffset() + buffer.getRequestedLength()) {
return buffer;
}
}
}
return null;
}
private void clearFromReadAheadQueue(final SeaweedInputStream stream, final long requestedOffset) {
ReadBuffer buffer = getFromList(readAheadQueue, stream, requestedOffset);
if (buffer != null) {
readAheadQueue.remove(buffer);
notifyAll(); // lock is held in calling method
freeList.push(buffer.getBufferindex());
}
}
private int getBlockFromCompletedQueue(final SeaweedInputStream stream, final long position, final int length,
final byte[] buffer) {
ReadBuffer buf = getFromList(completedReadList, stream, position);
if (buf == null || position >= buf.getOffset() + buf.getLength()) {
return 0;
}
int cursor = (int) (position - buf.getOffset());
int availableLengthInBuffer = buf.getLength() - cursor;
int lengthToCopy = Math.min(length, availableLengthInBuffer);
System.arraycopy(buf.getBuffer(), cursor, buffer, 0, lengthToCopy);
if (cursor == 0) {
buf.setFirstByteConsumed(true);
}
if (cursor + lengthToCopy == buf.getLength()) {
buf.setLastByteConsumed(true);
}
buf.setAnyByteConsumed(true);
return lengthToCopy;
}
/*
*
* ReadBufferWorker-thread-facing methods
*
*/
/**
* ReadBufferWorker thread calls this to get the next buffer that it should work on.
*
* @return {@link ReadBuffer}
* @throws InterruptedException if thread is interrupted
*/
ReadBuffer getNextBlockToRead() throws InterruptedException {
ReadBuffer buffer = null;
synchronized (this) {
//buffer = readAheadQueue.take(); // blocking method
while (readAheadQueue.size() == 0) {
wait();
}
buffer = readAheadQueue.remove();
notifyAll();
if (buffer == null) {
return null; // should never happen
}
buffer.setStatus(ReadBufferStatus.READING_IN_PROGRESS);
inProgressList.add(buffer);
}
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("ReadBufferWorker picked file {} for offset {}",
buffer.getStream().getPath(), buffer.getOffset());
}
return buffer;
}
/**
* ReadBufferWorker thread calls this method to post completion.
*
* @param buffer the buffer whose read was completed
* @param result the {@link ReadBufferStatus} after the read operation in the worker thread
* @param bytesActuallyRead the number of bytes that the worker thread was actually able to read
*/
void doneReading(final ReadBuffer buffer, final ReadBufferStatus result, final int bytesActuallyRead) {
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("ReadBufferWorker completed file {} for offset {} bytes {}",
buffer.getStream().getPath(), buffer.getOffset(), bytesActuallyRead);
}
synchronized (this) {
inProgressList.remove(buffer);
if (result == ReadBufferStatus.AVAILABLE && bytesActuallyRead > 0) {
buffer.setStatus(ReadBufferStatus.AVAILABLE);
buffer.setTimeStamp(currentTimeMillis());
buffer.setLength(bytesActuallyRead);
completedReadList.add(buffer);
} else {
freeList.push(buffer.getBufferindex());
// buffer should go out of scope after the end of the calling method in ReadBufferWorker, and eligible for GC
}
}
//outside the synchronized, since anyone receiving a wake-up from the latch must see safe-published results
buffer.getLatch().countDown(); // wake up waiting threads (if any)
}
/**
* Similar to System.currentTimeMillis, except implemented with System.nanoTime().
* System.currentTimeMillis can go backwards when system clock is changed (e.g., with NTP time synchronization),
* making it unsuitable for measuring time intervals. nanotime is strictly monotonically increasing per CPU core.
* Note: it is not monotonic across Sockets, and even within a CPU, its only the
* more recent parts which share a clock across all cores.
*
* @return current time in milliseconds
*/
private long currentTimeMillis() {
return System.nanoTime() / 1000 / 1000;
}
}

70
other/java/hdfs2/src/main/java/seaweed/hdfs/ReadBufferWorker.java

@ -1,70 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweed.hdfs;
import java.util.concurrent.CountDownLatch;
class ReadBufferWorker implements Runnable {
protected static final CountDownLatch UNLEASH_WORKERS = new CountDownLatch(1);
private int id;
ReadBufferWorker(final int id) {
this.id = id;
}
/**
* return the ID of ReadBufferWorker.
*/
public int getId() {
return this.id;
}
/**
* Waits until a buffer becomes available in ReadAheadQueue.
* Once a buffer becomes available, reads the file specified in it and then posts results back to buffer manager.
* Rinse and repeat. Forever.
*/
public void run() {
try {
UNLEASH_WORKERS.await();
} catch (InterruptedException ex) {
Thread.currentThread().interrupt();
}
ReadBufferManager bufferManager = ReadBufferManager.getBufferManager();
ReadBuffer buffer;
while (true) {
try {
buffer = bufferManager.getNextBlockToRead(); // blocks, until a buffer is available for this thread
} catch (InterruptedException ex) {
Thread.currentThread().interrupt();
return;
}
if (buffer != null) {
try {
// do the actual read, from the file.
int bytesRead = buffer.getStream().readRemote(buffer.getOffset(), buffer.getBuffer(), 0, buffer.getRequestedLength());
bufferManager.doneReading(buffer, ReadBufferStatus.AVAILABLE, bytesRead); // post result back to ReadBufferManager
} catch (Exception ex) {
bufferManager.doneReading(buffer, ReadBufferStatus.READ_FAILED, 0);
}
}
}
}
}

22
other/java/hdfs3/src/main/java/seaweed/hdfs/ReadBufferStatus.java → other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedAbstractFileSystem.java

@ -18,12 +18,18 @@
package seaweed.hdfs; package seaweed.hdfs;
/**
* The ReadBufferStatus for Rest AbfsClient
*/
public enum ReadBufferStatus {
NOT_AVAILABLE, // buffers sitting in readaheadqueue have this stats
READING_IN_PROGRESS, // reading is in progress on this buffer. Buffer should be in inProgressList
AVAILABLE, // data is available in buffer. It should be in completedList
READ_FAILED // read completed, but failed.
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.DelegateToFileSystem;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
public class SeaweedAbstractFileSystem extends DelegateToFileSystem {
SeaweedAbstractFileSystem(final URI uri, final Configuration conf)
throws IOException, URISyntaxException {
super(uri, new SeaweedFileSystem(), conf, "seaweedfs", false);
}
} }

28
other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedFileSystem.java

@ -10,6 +10,7 @@ import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.Progressable; import org.apache.hadoop.util.Progressable;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import seaweedfs.client.FilerProto;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
@ -22,7 +23,7 @@ import java.util.Map;
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY;
public class SeaweedFileSystem extends org.apache.hadoop.fs.FileSystem {
public class SeaweedFileSystem extends FileSystem {
public static final int FS_SEAWEED_DEFAULT_PORT = 8888; public static final int FS_SEAWEED_DEFAULT_PORT = 8888;
public static final String FS_SEAWEED_FILER_HOST = "fs.seaweed.filer.host"; public static final String FS_SEAWEED_FILER_HOST = "fs.seaweed.filer.host";
@ -144,7 +145,7 @@ public class SeaweedFileSystem extends org.apache.hadoop.fs.FileSystem {
} }
@Override @Override
public boolean rename(Path src, Path dst) {
public boolean rename(Path src, Path dst) throws IOException {
LOG.debug("rename path: {} => {}", src, dst); LOG.debug("rename path: {} => {}", src, dst);
@ -155,12 +156,13 @@ public class SeaweedFileSystem extends org.apache.hadoop.fs.FileSystem {
if (src.equals(dst)) { if (src.equals(dst)) {
return true; return true;
} }
FileStatus dstFileStatus = getFileStatus(dst);
FilerProto.Entry entry = seaweedFileSystemStore.lookupEntry(dst);
String sourceFileName = src.getName();
Path adjustedDst = dst; Path adjustedDst = dst;
if (dstFileStatus != null) {
if (entry != null) {
FileStatus dstFileStatus = getFileStatus(dst);
String sourceFileName = src.getName();
if (!dstFileStatus.isDirectory()) { if (!dstFileStatus.isDirectory()) {
return false; return false;
} }
@ -175,18 +177,20 @@ public class SeaweedFileSystem extends org.apache.hadoop.fs.FileSystem {
} }
@Override @Override
public boolean delete(Path path, boolean recursive) {
public boolean delete(Path path, boolean recursive) throws IOException {
LOG.debug("delete path: {} recursive:{}", path, recursive); LOG.debug("delete path: {} recursive:{}", path, recursive);
path = qualify(path); path = qualify(path);
FileStatus fileStatus = getFileStatus(path);
FilerProto.Entry entry = seaweedFileSystemStore.lookupEntry(path);
if (fileStatus == null) {
if (entry == null) {
return true; return true;
} }
FileStatus fileStatus = getFileStatus(path);
return seaweedFileSystemStore.deleteEntries(path, fileStatus.isDirectory(), recursive); return seaweedFileSystemStore.deleteEntries(path, fileStatus.isDirectory(), recursive);
} }
@ -222,9 +226,9 @@ public class SeaweedFileSystem extends org.apache.hadoop.fs.FileSystem {
path = qualify(path); path = qualify(path);
FileStatus fileStatus = getFileStatus(path);
FilerProto.Entry entry = seaweedFileSystemStore.lookupEntry(path);
if (fileStatus == null) {
if (entry == null) {
UserGroupInformation currentUser = UserGroupInformation.getCurrentUser(); UserGroupInformation currentUser = UserGroupInformation.getCurrentUser();
return seaweedFileSystemStore.createDirectory(path, currentUser, return seaweedFileSystemStore.createDirectory(path, currentUser,
@ -233,6 +237,8 @@ public class SeaweedFileSystem extends org.apache.hadoop.fs.FileSystem {
} }
FileStatus fileStatus = getFileStatus(path);
if (fileStatus.isDirectory()) { if (fileStatus.isDirectory()) {
return true; return true;
} else { } else {
@ -241,7 +247,7 @@ public class SeaweedFileSystem extends org.apache.hadoop.fs.FileSystem {
} }
@Override @Override
public FileStatus getFileStatus(Path path) {
public FileStatus getFileStatus(Path path) throws IOException {
LOG.debug("getFileStatus path: {}", path); LOG.debug("getFileStatus path: {}", path);

12
other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedFileSystemStore.java

@ -61,7 +61,7 @@ public class SeaweedFileSystemStore {
); );
} }
public FileStatus[] listEntries(final Path path) {
public FileStatus[] listEntries(final Path path) throws IOException {
LOG.debug("listEntries path: {}", path); LOG.debug("listEntries path: {}", path);
FileStatus pathStatus = getFileStatus(path); FileStatus pathStatus = getFileStatus(path);
@ -89,11 +89,11 @@ public class SeaweedFileSystemStore {
} }
public FileStatus getFileStatus(final Path path) {
public FileStatus getFileStatus(final Path path) throws IOException {
FilerProto.Entry entry = lookupEntry(path); FilerProto.Entry entry = lookupEntry(path);
if (entry == null) { if (entry == null) {
return null;
throw new FileNotFoundException("File does not exist: " + path);
} }
LOG.debug("doGetFileStatus path:{} entry:{}", path, entry); LOG.debug("doGetFileStatus path:{} entry:{}", path, entry);
@ -136,7 +136,7 @@ public class SeaweedFileSystemStore {
modification_time, access_time, permission, owner, group, null, path); modification_time, access_time, permission, owner, group, null, path);
} }
private FilerProto.Entry lookupEntry(Path path) {
public FilerProto.Entry lookupEntry(Path path) {
return filerClient.lookupEntry(getParentDirectory(path), path.getName()); return filerClient.lookupEntry(getParentDirectory(path), path.getName());
@ -212,7 +212,6 @@ public class SeaweedFileSystemStore {
LOG.debug("openFileForRead path:{} bufferSize:{}", path, bufferSize); LOG.debug("openFileForRead path:{} bufferSize:{}", path, bufferSize);
int readAheadQueueDepth = 2;
FilerProto.Entry entry = lookupEntry(path); FilerProto.Entry entry = lookupEntry(path);
if (entry == null) { if (entry == null) {
@ -223,8 +222,7 @@ public class SeaweedFileSystemStore {
statistics, statistics,
path.toUri().getPath(), path.toUri().getPath(),
entry, entry,
bufferSize,
readAheadQueueDepth);
bufferSize);
} }
public void setOwner(Path path, String owner, String group) { public void setOwner(Path path, String owner, String group) {

183
other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedInputStream.java

@ -27,33 +27,23 @@ public class SeaweedInputStream extends FSInputStream {
private final List<SeaweedRead.VisibleInterval> visibleIntervalList; private final List<SeaweedRead.VisibleInterval> visibleIntervalList;
private final long contentLength; private final long contentLength;
private final int bufferSize; // default buffer size private final int bufferSize; // default buffer size
private final int readAheadQueueDepth; // initialized in constructor
private final boolean readAheadEnabled; // whether enable readAhead;
private byte[] buffer = null; // will be initialized on first use
private long position = 0; // cursor of the file
private long fCursor = 0; // cursor of buffer within file - offset of next byte to read from remote server
private long fCursorAfterLastRead = -1;
private int bCursor = 0; // cursor of read within buffer - offset of next byte to be returned from buffer
private int limit = 0; // offset of next byte to be read into buffer from service (i.e., upper marker+1
// of valid bytes in buffer)
private boolean closed = false; private boolean closed = false;
public SeaweedInputStream( public SeaweedInputStream(
final FilerGrpcClient filerGrpcClient,
final Statistics statistics,
final String path,
final FilerProto.Entry entry,
final int bufferSize,
final int readAheadQueueDepth) {
final FilerGrpcClient filerGrpcClient,
final Statistics statistics,
final String path,
final FilerProto.Entry entry,
final int bufferSize) {
this.filerGrpcClient = filerGrpcClient; this.filerGrpcClient = filerGrpcClient;
this.statistics = statistics; this.statistics = statistics;
this.path = path; this.path = path;
this.entry = entry; this.entry = entry;
this.contentLength = SeaweedRead.totalSize(entry.getChunksList()); this.contentLength = SeaweedRead.totalSize(entry.getChunksList());
this.bufferSize = bufferSize; this.bufferSize = bufferSize;
this.readAheadQueueDepth = (readAheadQueueDepth >= 0) ? readAheadQueueDepth : Runtime.getRuntime().availableProcessors();
this.readAheadEnabled = true;
this.visibleIntervalList = SeaweedRead.nonOverlappingVisibleIntervals(entry.getChunksList()); this.visibleIntervalList = SeaweedRead.nonOverlappingVisibleIntervals(entry.getChunksList());
@ -78,122 +68,7 @@ public class SeaweedInputStream extends FSInputStream {
@Override @Override
public synchronized int read(final byte[] b, final int off, final int len) throws IOException { public synchronized int read(final byte[] b, final int off, final int len) throws IOException {
int currentOff = off;
int currentLen = len;
int lastReadBytes;
int totalReadBytes = 0;
do {
lastReadBytes = readOneBlock(b, currentOff, currentLen);
if (lastReadBytes > 0) {
currentOff += lastReadBytes;
currentLen -= lastReadBytes;
totalReadBytes += lastReadBytes;
}
if (currentLen <= 0 || currentLen > b.length - currentOff) {
break;
}
} while (lastReadBytes > 0);
return totalReadBytes > 0 ? totalReadBytes : lastReadBytes;
}
private int readOneBlock(final byte[] b, final int off, final int len) throws IOException {
if (closed) {
throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED);
}
Preconditions.checkNotNull(b);
if (len == 0) {
return 0;
}
if (this.available() == 0) {
return -1;
}
if (off < 0 || len < 0 || len > b.length - off) {
throw new IndexOutOfBoundsException();
}
//If buffer is empty, then fill the buffer.
if (bCursor == limit) {
//If EOF, then return -1
if (fCursor >= contentLength) {
return -1;
}
long bytesRead = 0;
//reset buffer to initial state - i.e., throw away existing data
bCursor = 0;
limit = 0;
if (buffer == null) {
buffer = new byte[bufferSize];
}
// Enable readAhead when reading sequentially
if (-1 == fCursorAfterLastRead || fCursorAfterLastRead == fCursor || b.length >= bufferSize) {
bytesRead = readInternal(fCursor, buffer, 0, bufferSize, false);
} else {
bytesRead = readInternal(fCursor, buffer, 0, b.length, true);
}
if (bytesRead == -1) {
return -1;
}
limit += bytesRead;
fCursor += bytesRead;
fCursorAfterLastRead = fCursor;
}
//If there is anything in the buffer, then return lesser of (requested bytes) and (bytes in buffer)
//(bytes returned may be less than requested)
int bytesRemaining = limit - bCursor;
int bytesToRead = Math.min(len, bytesRemaining);
System.arraycopy(buffer, bCursor, b, off, bytesToRead);
bCursor += bytesToRead;
if (statistics != null) {
statistics.incrementBytesRead(bytesToRead);
}
return bytesToRead;
}
private int readInternal(final long position, final byte[] b, final int offset, final int length,
final boolean bypassReadAhead) throws IOException {
if (readAheadEnabled && !bypassReadAhead) {
// try reading from read-ahead
if (offset != 0) {
throw new IllegalArgumentException("readahead buffers cannot have non-zero buffer offsets");
}
int receivedBytes;
// queue read-aheads
int numReadAheads = this.readAheadQueueDepth;
long nextSize;
long nextOffset = position;
while (numReadAheads > 0 && nextOffset < contentLength) {
nextSize = Math.min((long) bufferSize, contentLength - nextOffset);
ReadBufferManager.getBufferManager().queueReadAhead(this, nextOffset, (int) nextSize);
nextOffset = nextOffset + nextSize;
numReadAheads--;
}
// try reading from buffers first
receivedBytes = ReadBufferManager.getBufferManager().getBlock(this, position, length, b);
if (receivedBytes > 0) {
return receivedBytes;
}
// got nothing from read-ahead, do our own read now
receivedBytes = readRemote(position, b, offset, length);
return receivedBytes;
} else {
return readRemote(position, b, offset, length);
}
}
int readRemote(long position, byte[] b, int offset, int length) throws IOException {
if (position < 0) { if (position < 0) {
throw new IllegalArgumentException("attempting to read from negative offset"); throw new IllegalArgumentException("attempting to read from negative offset");
} }
@ -203,21 +78,30 @@ public class SeaweedInputStream extends FSInputStream {
if (b == null) { if (b == null) {
throw new IllegalArgumentException("null byte array passed in to read() method"); throw new IllegalArgumentException("null byte array passed in to read() method");
} }
if (offset >= b.length) {
if (off >= b.length) {
throw new IllegalArgumentException("offset greater than length of array"); throw new IllegalArgumentException("offset greater than length of array");
} }
if (length < 0) {
if (len < 0) {
throw new IllegalArgumentException("requested read length is less than zero"); throw new IllegalArgumentException("requested read length is less than zero");
} }
if (length > (b.length - offset)) {
if (len > (b.length - off)) {
throw new IllegalArgumentException("requested read length is more than will fit after requested offset in buffer"); throw new IllegalArgumentException("requested read length is more than will fit after requested offset in buffer");
} }
long bytesRead = SeaweedRead.read(filerGrpcClient, visibleIntervalList, position, b, offset, length);
long bytesRead = SeaweedRead.read(this.filerGrpcClient, this.visibleIntervalList, this.position, b, off, len);
if (bytesRead > Integer.MAX_VALUE) { if (bytesRead > Integer.MAX_VALUE) {
throw new IOException("Unexpected Content-Length"); throw new IOException("Unexpected Content-Length");
} }
return (int) bytesRead;
if (bytesRead > 0) {
this.position += bytesRead;
if (statistics != null) {
statistics.incrementBytesRead(bytesRead);
}
}
return (int)bytesRead;
} }
/** /**
@ -239,17 +123,8 @@ public class SeaweedInputStream extends FSInputStream {
throw new EOFException(FSExceptionMessages.CANNOT_SEEK_PAST_EOF); throw new EOFException(FSExceptionMessages.CANNOT_SEEK_PAST_EOF);
} }
if (n >= fCursor - limit && n <= fCursor) { // within buffer
bCursor = (int) (n - (fCursor - limit));
return;
}
// next read will read from here
fCursor = n;
this.position = n;
//invalidate buffer
limit = 0;
bCursor = 0;
} }
@Override @Override
@ -257,20 +132,19 @@ public class SeaweedInputStream extends FSInputStream {
if (closed) { if (closed) {
throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED);
} }
long currentPos = getPos();
if (currentPos == contentLength) {
if (this.position == contentLength) {
if (n > 0) { if (n > 0) {
throw new EOFException(FSExceptionMessages.CANNOT_SEEK_PAST_EOF); throw new EOFException(FSExceptionMessages.CANNOT_SEEK_PAST_EOF);
} }
} }
long newPos = currentPos + n;
long newPos = this.position + n;
if (newPos < 0) { if (newPos < 0) {
newPos = 0; newPos = 0;
n = newPos - currentPos;
n = newPos - this.position;
} }
if (newPos > contentLength) { if (newPos > contentLength) {
newPos = contentLength; newPos = contentLength;
n = newPos - currentPos;
n = newPos - this.position;
} }
seek(newPos); seek(newPos);
return n; return n;
@ -289,11 +163,11 @@ public class SeaweedInputStream extends FSInputStream {
public synchronized int available() throws IOException { public synchronized int available() throws IOException {
if (closed) { if (closed) {
throw new IOException( throw new IOException(
FSExceptionMessages.STREAM_IS_CLOSED);
FSExceptionMessages.STREAM_IS_CLOSED);
} }
final long remaining = this.contentLength - this.getPos(); final long remaining = this.contentLength - this.getPos();
return remaining <= Integer.MAX_VALUE return remaining <= Integer.MAX_VALUE
? (int) remaining : Integer.MAX_VALUE;
? (int) remaining : Integer.MAX_VALUE;
} }
/** /**
@ -321,7 +195,7 @@ public class SeaweedInputStream extends FSInputStream {
if (closed) { if (closed) {
throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED);
} }
return fCursor - limit + bCursor;
return position;
} }
/** /**
@ -338,7 +212,6 @@ public class SeaweedInputStream extends FSInputStream {
@Override @Override
public synchronized void close() throws IOException { public synchronized void close() throws IOException {
closed = true; closed = true;
buffer = null; // de-reference the buffer so it can be GC'ed sooner
} }
/** /**

89
other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedOutputStream.java

@ -7,6 +7,7 @@ import org.apache.hadoop.fs.FSExceptionMessages;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import seaweedfs.client.ByteBufferPool;
import seaweedfs.client.FilerGrpcClient; import seaweedfs.client.FilerGrpcClient;
import seaweedfs.client.FilerProto; import seaweedfs.client.FilerProto;
import seaweedfs.client.SeaweedWrite; import seaweedfs.client.SeaweedWrite;
@ -14,6 +15,7 @@ import seaweedfs.client.SeaweedWrite;
import java.io.IOException; import java.io.IOException;
import java.io.InterruptedIOException; import java.io.InterruptedIOException;
import java.io.OutputStream; import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.util.concurrent.*; import java.util.concurrent.*;
import static seaweed.hdfs.SeaweedFileSystemStore.getParentDirectory; import static seaweed.hdfs.SeaweedFileSystemStore.getParentDirectory;
@ -28,16 +30,16 @@ public class SeaweedOutputStream extends OutputStream {
private final int maxConcurrentRequestCount; private final int maxConcurrentRequestCount;
private final ThreadPoolExecutor threadExecutor; private final ThreadPoolExecutor threadExecutor;
private final ExecutorCompletionService<Void> completionService; private final ExecutorCompletionService<Void> completionService;
private FilerProto.Entry.Builder entry;
private final FilerProto.Entry.Builder entry;
private final boolean supportFlush = false; // true;
private final ConcurrentLinkedDeque<WriteOperation> writeOperations;
private long position; private long position;
private boolean closed; private boolean closed;
private boolean supportFlush = true;
private volatile IOException lastError; private volatile IOException lastError;
private long lastFlushOffset; private long lastFlushOffset;
private long lastTotalAppendOffset = 0; private long lastTotalAppendOffset = 0;
private byte[] buffer;
private int bufferIndex;
private ConcurrentLinkedDeque<WriteOperation> writeOperations;
private ByteBuffer buffer;
private long outputIndex;
private String replication = "000"; private String replication = "000";
public SeaweedOutputStream(FilerGrpcClient filerGrpcClient, final Path path, FilerProto.Entry.Builder entry, public SeaweedOutputStream(FilerGrpcClient filerGrpcClient, final Path path, FilerProto.Entry.Builder entry,
@ -50,18 +52,18 @@ public class SeaweedOutputStream extends OutputStream {
this.lastError = null; this.lastError = null;
this.lastFlushOffset = 0; this.lastFlushOffset = 0;
this.bufferSize = bufferSize; this.bufferSize = bufferSize;
this.buffer = new byte[bufferSize];
this.bufferIndex = 0;
this.buffer = ByteBufferPool.request(bufferSize);
this.outputIndex = 0;
this.writeOperations = new ConcurrentLinkedDeque<>(); this.writeOperations = new ConcurrentLinkedDeque<>();
this.maxConcurrentRequestCount = 4 * Runtime.getRuntime().availableProcessors(); this.maxConcurrentRequestCount = 4 * Runtime.getRuntime().availableProcessors();
this.threadExecutor this.threadExecutor
= new ThreadPoolExecutor(maxConcurrentRequestCount,
maxConcurrentRequestCount,
10L,
TimeUnit.SECONDS,
new LinkedBlockingQueue<Runnable>());
= new ThreadPoolExecutor(maxConcurrentRequestCount,
maxConcurrentRequestCount,
10L,
TimeUnit.SECONDS,
new LinkedBlockingQueue<Runnable>());
this.completionService = new ExecutorCompletionService<>(this.threadExecutor); this.completionService = new ExecutorCompletionService<>(this.threadExecutor);
this.entry = entry; this.entry = entry;
@ -84,7 +86,7 @@ public class SeaweedOutputStream extends OutputStream {
@Override @Override
public synchronized void write(final byte[] data, final int off, final int length) public synchronized void write(final byte[] data, final int off, final int length)
throws IOException {
throws IOException {
maybeThrowLastError(); maybeThrowLastError();
Preconditions.checkArgument(data != null, "null data"); Preconditions.checkArgument(data != null, "null data");
@ -93,25 +95,29 @@ public class SeaweedOutputStream extends OutputStream {
throw new IndexOutOfBoundsException(); throw new IndexOutOfBoundsException();
} }
// System.out.println(path + " write [" + (outputIndex + off) + "," + ((outputIndex + off) + length) + ")");
int currentOffset = off; int currentOffset = off;
int writableBytes = bufferSize - bufferIndex;
int writableBytes = bufferSize - buffer.position();
int numberOfBytesToWrite = length; int numberOfBytesToWrite = length;
while (numberOfBytesToWrite > 0) { while (numberOfBytesToWrite > 0) {
if (writableBytes <= numberOfBytesToWrite) {
System.arraycopy(data, currentOffset, buffer, bufferIndex, writableBytes);
bufferIndex += writableBytes;
writeCurrentBufferToService();
currentOffset += writableBytes;
numberOfBytesToWrite = numberOfBytesToWrite - writableBytes;
} else {
System.arraycopy(data, currentOffset, buffer, bufferIndex, numberOfBytesToWrite);
bufferIndex += numberOfBytesToWrite;
numberOfBytesToWrite = 0;
if (numberOfBytesToWrite < writableBytes) {
buffer.put(data, currentOffset, numberOfBytesToWrite);
outputIndex += numberOfBytesToWrite;
break;
} }
writableBytes = bufferSize - bufferIndex;
// System.out.println(path + " [" + (outputIndex + currentOffset) + "," + ((outputIndex + currentOffset) + writableBytes) + ")");
buffer.put(data, currentOffset, writableBytes);
outputIndex += writableBytes;
currentOffset += writableBytes;
writeCurrentBufferToService();
numberOfBytesToWrite = numberOfBytesToWrite - writableBytes;
writableBytes = bufferSize - buffer.position();
} }
} }
/** /**
@ -147,8 +153,9 @@ public class SeaweedOutputStream extends OutputStream {
threadExecutor.shutdown(); threadExecutor.shutdown();
} finally { } finally {
lastError = new IOException(FSExceptionMessages.STREAM_IS_CLOSED); lastError = new IOException(FSExceptionMessages.STREAM_IS_CLOSED);
ByteBufferPool.release(buffer);
buffer = null; buffer = null;
bufferIndex = 0;
outputIndex = 0;
closed = true; closed = true;
writeOperations.clear(); writeOperations.clear();
if (!threadExecutor.isShutdown()) { if (!threadExecutor.isShutdown()) {
@ -158,35 +165,17 @@ public class SeaweedOutputStream extends OutputStream {
} }
private synchronized void writeCurrentBufferToService() throws IOException { private synchronized void writeCurrentBufferToService() throws IOException {
if (bufferIndex == 0) {
if (buffer.position() == 0) {
return; return;
} }
final byte[] bytes = buffer;
final int bytesLength = bufferIndex;
buffer = new byte[bufferSize];
bufferIndex = 0;
final long offset = position;
buffer.flip();
int bytesLength = buffer.limit() - buffer.position();
SeaweedWrite.writeData(entry, replication, filerGrpcClient, position, buffer.array(), buffer.position(), buffer.limit());
// System.out.println(path + " saved [" + (position) + "," + ((position) + bytesLength) + ")");
position += bytesLength; position += bytesLength;
buffer.clear();
if (threadExecutor.getQueue().size() >= maxConcurrentRequestCount * 2) {
waitForTaskToComplete();
}
final Future<Void> job = completionService.submit(new Callable<Void>() {
@Override
public Void call() throws Exception {
// originally: client.append(path, offset, bytes, 0, bytesLength);
SeaweedWrite.writeData(entry, replication, filerGrpcClient, offset, bytes, 0, bytesLength);
return null;
}
});
writeOperations.add(new WriteOperation(job, offset, bytesLength));
// Try to shrink the queue
shrinkWriteOperationQueue();
} }
private void waitForTaskToComplete() throws IOException { private void waitForTaskToComplete() throws IOException {

2
other/java/hdfs3/dependency-reduced-pom.xml

@ -127,7 +127,7 @@
</snapshotRepository> </snapshotRepository>
</distributionManagement> </distributionManagement>
<properties> <properties>
<seaweedfs.client.version>1.2.9</seaweedfs.client.version>
<seaweedfs.client.version>1.3.6</seaweedfs.client.version>
<hadoop.version>3.1.1</hadoop.version> <hadoop.version>3.1.1</hadoop.version>
</properties> </properties>
</project> </project>

2
other/java/hdfs3/pom.xml

@ -5,7 +5,7 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<properties> <properties>
<seaweedfs.client.version>1.2.9</seaweedfs.client.version>
<seaweedfs.client.version>1.3.6</seaweedfs.client.version>
<hadoop.version>3.1.1</hadoop.version> <hadoop.version>3.1.1</hadoop.version>
</properties> </properties>

137
other/java/hdfs3/src/main/java/seaweed/hdfs/ReadBuffer.java

@ -1,137 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweed.hdfs;
import java.util.concurrent.CountDownLatch;
class ReadBuffer {
private SeaweedInputStream stream;
private long offset; // offset within the file for the buffer
private int length; // actual length, set after the buffer is filles
private int requestedLength; // requested length of the read
private byte[] buffer; // the buffer itself
private int bufferindex = -1; // index in the buffers array in Buffer manager
private ReadBufferStatus status; // status of the buffer
private CountDownLatch latch = null; // signaled when the buffer is done reading, so any client
// waiting on this buffer gets unblocked
// fields to help with eviction logic
private long timeStamp = 0; // tick at which buffer became available to read
private boolean isFirstByteConsumed = false;
private boolean isLastByteConsumed = false;
private boolean isAnyByteConsumed = false;
public SeaweedInputStream getStream() {
return stream;
}
public void setStream(SeaweedInputStream stream) {
this.stream = stream;
}
public long getOffset() {
return offset;
}
public void setOffset(long offset) {
this.offset = offset;
}
public int getLength() {
return length;
}
public void setLength(int length) {
this.length = length;
}
public int getRequestedLength() {
return requestedLength;
}
public void setRequestedLength(int requestedLength) {
this.requestedLength = requestedLength;
}
public byte[] getBuffer() {
return buffer;
}
public void setBuffer(byte[] buffer) {
this.buffer = buffer;
}
public int getBufferindex() {
return bufferindex;
}
public void setBufferindex(int bufferindex) {
this.bufferindex = bufferindex;
}
public ReadBufferStatus getStatus() {
return status;
}
public void setStatus(ReadBufferStatus status) {
this.status = status;
}
public CountDownLatch getLatch() {
return latch;
}
public void setLatch(CountDownLatch latch) {
this.latch = latch;
}
public long getTimeStamp() {
return timeStamp;
}
public void setTimeStamp(long timeStamp) {
this.timeStamp = timeStamp;
}
public boolean isFirstByteConsumed() {
return isFirstByteConsumed;
}
public void setFirstByteConsumed(boolean isFirstByteConsumed) {
this.isFirstByteConsumed = isFirstByteConsumed;
}
public boolean isLastByteConsumed() {
return isLastByteConsumed;
}
public void setLastByteConsumed(boolean isLastByteConsumed) {
this.isLastByteConsumed = isLastByteConsumed;
}
public boolean isAnyByteConsumed() {
return isAnyByteConsumed;
}
public void setAnyByteConsumed(boolean isAnyByteConsumed) {
this.isAnyByteConsumed = isAnyByteConsumed;
}
}

394
other/java/hdfs3/src/main/java/seaweed/hdfs/ReadBufferManager.java

@ -1,394 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweed.hdfs;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Collection;
import java.util.LinkedList;
import java.util.Queue;
import java.util.Stack;
import java.util.concurrent.CountDownLatch;
/**
* The Read Buffer Manager for Rest AbfsClient.
*/
final class ReadBufferManager {
private static final Logger LOGGER = LoggerFactory.getLogger(ReadBufferManager.class);
private static final int NUM_BUFFERS = 16;
private static final int BLOCK_SIZE = 4 * 1024 * 1024;
private static final int NUM_THREADS = 8;
private static final int THRESHOLD_AGE_MILLISECONDS = 3000; // have to see if 3 seconds is a good threshold
private Thread[] threads = new Thread[NUM_THREADS];
private byte[][] buffers; // array of byte[] buffers, to hold the data that is read
private Stack<Integer> freeList = new Stack<>(); // indices in buffers[] array that are available
private Queue<ReadBuffer> readAheadQueue = new LinkedList<>(); // queue of requests that are not picked up by any worker thread yet
private LinkedList<ReadBuffer> inProgressList = new LinkedList<>(); // requests being processed by worker threads
private LinkedList<ReadBuffer> completedReadList = new LinkedList<>(); // buffers available for reading
private static final ReadBufferManager BUFFER_MANAGER; // singleton, initialized in static initialization block
static {
BUFFER_MANAGER = new ReadBufferManager();
BUFFER_MANAGER.init();
}
static ReadBufferManager getBufferManager() {
return BUFFER_MANAGER;
}
private void init() {
buffers = new byte[NUM_BUFFERS][];
for (int i = 0; i < NUM_BUFFERS; i++) {
buffers[i] = new byte[BLOCK_SIZE]; // same buffers are reused. The byte array never goes back to GC
freeList.add(i);
}
for (int i = 0; i < NUM_THREADS; i++) {
Thread t = new Thread(new ReadBufferWorker(i));
t.setDaemon(true);
threads[i] = t;
t.setName("SeaweedFS-prefetch-" + i);
t.start();
}
ReadBufferWorker.UNLEASH_WORKERS.countDown();
}
// hide instance constructor
private ReadBufferManager() {
}
/*
*
* SeaweedInputStream-facing methods
*
*/
/**
* {@link SeaweedInputStream} calls this method to queue read-aheads.
*
* @param stream The {@link SeaweedInputStream} for which to do the read-ahead
* @param requestedOffset The offset in the file which shoukd be read
* @param requestedLength The length to read
*/
void queueReadAhead(final SeaweedInputStream stream, final long requestedOffset, final int requestedLength) {
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("Start Queueing readAhead for {} offset {} length {}",
stream.getPath(), requestedOffset, requestedLength);
}
ReadBuffer buffer;
synchronized (this) {
if (isAlreadyQueued(stream, requestedOffset)) {
return; // already queued, do not queue again
}
if (freeList.isEmpty() && !tryEvict()) {
return; // no buffers available, cannot queue anything
}
buffer = new ReadBuffer();
buffer.setStream(stream);
buffer.setOffset(requestedOffset);
buffer.setLength(0);
buffer.setRequestedLength(requestedLength);
buffer.setStatus(ReadBufferStatus.NOT_AVAILABLE);
buffer.setLatch(new CountDownLatch(1));
Integer bufferIndex = freeList.pop(); // will return a value, since we have checked size > 0 already
buffer.setBuffer(buffers[bufferIndex]);
buffer.setBufferindex(bufferIndex);
readAheadQueue.add(buffer);
notifyAll();
}
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("Done q-ing readAhead for file {} offset {} buffer idx {}",
stream.getPath(), requestedOffset, buffer.getBufferindex());
}
}
/**
* {@link SeaweedInputStream} calls this method read any bytes already available in a buffer (thereby saving a
* remote read). This returns the bytes if the data already exists in buffer. If there is a buffer that is reading
* the requested offset, then this method blocks until that read completes. If the data is queued in a read-ahead
* but not picked up by a worker thread yet, then it cancels that read-ahead and reports cache miss. This is because
* depending on worker thread availability, the read-ahead may take a while - the calling thread can do it's own
* read to get the data faster (copmared to the read waiting in queue for an indeterminate amount of time).
*
* @param stream the file to read bytes for
* @param position the offset in the file to do a read for
* @param length the length to read
* @param buffer the buffer to read data into. Note that the buffer will be written into from offset 0.
* @return the number of bytes read
*/
int getBlock(final SeaweedInputStream stream, final long position, final int length, final byte[] buffer) {
// not synchronized, so have to be careful with locking
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("getBlock for file {} position {} thread {}",
stream.getPath(), position, Thread.currentThread().getName());
}
waitForProcess(stream, position);
int bytesRead = 0;
synchronized (this) {
bytesRead = getBlockFromCompletedQueue(stream, position, length, buffer);
}
if (bytesRead > 0) {
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("Done read from Cache for {} position {} length {}",
stream.getPath(), position, bytesRead);
}
return bytesRead;
}
// otherwise, just say we got nothing - calling thread can do its own read
return 0;
}
/*
*
* Internal methods
*
*/
private void waitForProcess(final SeaweedInputStream stream, final long position) {
ReadBuffer readBuf;
synchronized (this) {
clearFromReadAheadQueue(stream, position);
readBuf = getFromList(inProgressList, stream, position);
}
if (readBuf != null) { // if in in-progress queue, then block for it
try {
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("got a relevant read buffer for file {} offset {} buffer idx {}",
stream.getPath(), readBuf.getOffset(), readBuf.getBufferindex());
}
readBuf.getLatch().await(); // blocking wait on the caller stream's thread
// Note on correctness: readBuf gets out of inProgressList only in 1 place: after worker thread
// is done processing it (in doneReading). There, the latch is set after removing the buffer from
// inProgressList. So this latch is safe to be outside the synchronized block.
// Putting it in synchronized would result in a deadlock, since this thread would be holding the lock
// while waiting, so no one will be able to change any state. If this becomes more complex in the future,
// then the latch cane be removed and replaced with wait/notify whenever inProgressList is touched.
} catch (InterruptedException ex) {
Thread.currentThread().interrupt();
}
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("latch done for file {} buffer idx {} length {}",
stream.getPath(), readBuf.getBufferindex(), readBuf.getLength());
}
}
}
/**
* If any buffer in the completedlist can be reclaimed then reclaim it and return the buffer to free list.
* The objective is to find just one buffer - there is no advantage to evicting more than one.
*
* @return whether the eviction succeeeded - i.e., were we able to free up one buffer
*/
private synchronized boolean tryEvict() {
ReadBuffer nodeToEvict = null;
if (completedReadList.size() <= 0) {
return false; // there are no evict-able buffers
}
// first, try buffers where all bytes have been consumed (approximated as first and last bytes consumed)
for (ReadBuffer buf : completedReadList) {
if (buf.isFirstByteConsumed() && buf.isLastByteConsumed()) {
nodeToEvict = buf;
break;
}
}
if (nodeToEvict != null) {
return evict(nodeToEvict);
}
// next, try buffers where any bytes have been consumed (may be a bad idea? have to experiment and see)
for (ReadBuffer buf : completedReadList) {
if (buf.isAnyByteConsumed()) {
nodeToEvict = buf;
break;
}
}
if (nodeToEvict != null) {
return evict(nodeToEvict);
}
// next, try any old nodes that have not been consumed
long earliestBirthday = Long.MAX_VALUE;
for (ReadBuffer buf : completedReadList) {
if (buf.getTimeStamp() < earliestBirthday) {
nodeToEvict = buf;
earliestBirthday = buf.getTimeStamp();
}
}
if ((currentTimeMillis() - earliestBirthday > THRESHOLD_AGE_MILLISECONDS) && (nodeToEvict != null)) {
return evict(nodeToEvict);
}
// nothing can be evicted
return false;
}
private boolean evict(final ReadBuffer buf) {
freeList.push(buf.getBufferindex());
completedReadList.remove(buf);
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("Evicting buffer idx {}; was used for file {} offset {} length {}",
buf.getBufferindex(), buf.getStream().getPath(), buf.getOffset(), buf.getLength());
}
return true;
}
private boolean isAlreadyQueued(final SeaweedInputStream stream, final long requestedOffset) {
// returns true if any part of the buffer is already queued
return (isInList(readAheadQueue, stream, requestedOffset)
|| isInList(inProgressList, stream, requestedOffset)
|| isInList(completedReadList, stream, requestedOffset));
}
private boolean isInList(final Collection<ReadBuffer> list, final SeaweedInputStream stream, final long requestedOffset) {
return (getFromList(list, stream, requestedOffset) != null);
}
private ReadBuffer getFromList(final Collection<ReadBuffer> list, final SeaweedInputStream stream, final long requestedOffset) {
for (ReadBuffer buffer : list) {
if (buffer.getStream() == stream) {
if (buffer.getStatus() == ReadBufferStatus.AVAILABLE
&& requestedOffset >= buffer.getOffset()
&& requestedOffset < buffer.getOffset() + buffer.getLength()) {
return buffer;
} else if (requestedOffset >= buffer.getOffset()
&& requestedOffset < buffer.getOffset() + buffer.getRequestedLength()) {
return buffer;
}
}
}
return null;
}
private void clearFromReadAheadQueue(final SeaweedInputStream stream, final long requestedOffset) {
ReadBuffer buffer = getFromList(readAheadQueue, stream, requestedOffset);
if (buffer != null) {
readAheadQueue.remove(buffer);
notifyAll(); // lock is held in calling method
freeList.push(buffer.getBufferindex());
}
}
private int getBlockFromCompletedQueue(final SeaweedInputStream stream, final long position, final int length,
final byte[] buffer) {
ReadBuffer buf = getFromList(completedReadList, stream, position);
if (buf == null || position >= buf.getOffset() + buf.getLength()) {
return 0;
}
int cursor = (int) (position - buf.getOffset());
int availableLengthInBuffer = buf.getLength() - cursor;
int lengthToCopy = Math.min(length, availableLengthInBuffer);
System.arraycopy(buf.getBuffer(), cursor, buffer, 0, lengthToCopy);
if (cursor == 0) {
buf.setFirstByteConsumed(true);
}
if (cursor + lengthToCopy == buf.getLength()) {
buf.setLastByteConsumed(true);
}
buf.setAnyByteConsumed(true);
return lengthToCopy;
}
/*
*
* ReadBufferWorker-thread-facing methods
*
*/
/**
* ReadBufferWorker thread calls this to get the next buffer that it should work on.
*
* @return {@link ReadBuffer}
* @throws InterruptedException if thread is interrupted
*/
ReadBuffer getNextBlockToRead() throws InterruptedException {
ReadBuffer buffer = null;
synchronized (this) {
//buffer = readAheadQueue.take(); // blocking method
while (readAheadQueue.size() == 0) {
wait();
}
buffer = readAheadQueue.remove();
notifyAll();
if (buffer == null) {
return null; // should never happen
}
buffer.setStatus(ReadBufferStatus.READING_IN_PROGRESS);
inProgressList.add(buffer);
}
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("ReadBufferWorker picked file {} for offset {}",
buffer.getStream().getPath(), buffer.getOffset());
}
return buffer;
}
/**
* ReadBufferWorker thread calls this method to post completion.
*
* @param buffer the buffer whose read was completed
* @param result the {@link ReadBufferStatus} after the read operation in the worker thread
* @param bytesActuallyRead the number of bytes that the worker thread was actually able to read
*/
void doneReading(final ReadBuffer buffer, final ReadBufferStatus result, final int bytesActuallyRead) {
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("ReadBufferWorker completed file {} for offset {} bytes {}",
buffer.getStream().getPath(), buffer.getOffset(), bytesActuallyRead);
}
synchronized (this) {
inProgressList.remove(buffer);
if (result == ReadBufferStatus.AVAILABLE && bytesActuallyRead > 0) {
buffer.setStatus(ReadBufferStatus.AVAILABLE);
buffer.setTimeStamp(currentTimeMillis());
buffer.setLength(bytesActuallyRead);
completedReadList.add(buffer);
} else {
freeList.push(buffer.getBufferindex());
// buffer should go out of scope after the end of the calling method in ReadBufferWorker, and eligible for GC
}
}
//outside the synchronized, since anyone receiving a wake-up from the latch must see safe-published results
buffer.getLatch().countDown(); // wake up waiting threads (if any)
}
/**
* Similar to System.currentTimeMillis, except implemented with System.nanoTime().
* System.currentTimeMillis can go backwards when system clock is changed (e.g., with NTP time synchronization),
* making it unsuitable for measuring time intervals. nanotime is strictly monotonically increasing per CPU core.
* Note: it is not monotonic across Sockets, and even within a CPU, its only the
* more recent parts which share a clock across all cores.
*
* @return current time in milliseconds
*/
private long currentTimeMillis() {
return System.nanoTime() / 1000 / 1000;
}
}

70
other/java/hdfs3/src/main/java/seaweed/hdfs/ReadBufferWorker.java

@ -1,70 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweed.hdfs;
import java.util.concurrent.CountDownLatch;
class ReadBufferWorker implements Runnable {
protected static final CountDownLatch UNLEASH_WORKERS = new CountDownLatch(1);
private int id;
ReadBufferWorker(final int id) {
this.id = id;
}
/**
* return the ID of ReadBufferWorker.
*/
public int getId() {
return this.id;
}
/**
* Waits until a buffer becomes available in ReadAheadQueue.
* Once a buffer becomes available, reads the file specified in it and then posts results back to buffer manager.
* Rinse and repeat. Forever.
*/
public void run() {
try {
UNLEASH_WORKERS.await();
} catch (InterruptedException ex) {
Thread.currentThread().interrupt();
}
ReadBufferManager bufferManager = ReadBufferManager.getBufferManager();
ReadBuffer buffer;
while (true) {
try {
buffer = bufferManager.getNextBlockToRead(); // blocks, until a buffer is available for this thread
} catch (InterruptedException ex) {
Thread.currentThread().interrupt();
return;
}
if (buffer != null) {
try {
// do the actual read, from the file.
int bytesRead = buffer.getStream().readRemote(buffer.getOffset(), buffer.getBuffer(), 0, buffer.getRequestedLength());
bufferManager.doneReading(buffer, ReadBufferStatus.AVAILABLE, bytesRead); // post result back to ReadBufferManager
} catch (Exception ex) {
bufferManager.doneReading(buffer, ReadBufferStatus.READ_FAILED, 0);
}
}
}
}
}

22
other/java/hdfs2/src/main/java/seaweed/hdfs/ReadBufferStatus.java → other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedAbstractFileSystem.java

@ -18,12 +18,18 @@
package seaweed.hdfs; package seaweed.hdfs;
/**
* The ReadBufferStatus for Rest AbfsClient
*/
public enum ReadBufferStatus {
NOT_AVAILABLE, // buffers sitting in readaheadqueue have this stats
READING_IN_PROGRESS, // reading is in progress on this buffer. Buffer should be in inProgressList
AVAILABLE, // data is available in buffer. It should be in completedList
READ_FAILED // read completed, but failed.
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.DelegateToFileSystem;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
public class SeaweedAbstractFileSystem extends DelegateToFileSystem {
SeaweedAbstractFileSystem(final URI uri, final Configuration conf)
throws IOException, URISyntaxException {
super(uri, new SeaweedFileSystem(), conf, "seaweedfs", false);
}
} }

26
other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedFileSystem.java

@ -10,6 +10,7 @@ import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.Progressable; import org.apache.hadoop.util.Progressable;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import seaweedfs.client.FilerProto;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
@ -144,7 +145,7 @@ public class SeaweedFileSystem extends FileSystem {
} }
@Override @Override
public boolean rename(Path src, Path dst) {
public boolean rename(Path src, Path dst) throws IOException {
LOG.debug("rename path: {} => {}", src, dst); LOG.debug("rename path: {} => {}", src, dst);
@ -155,12 +156,13 @@ public class SeaweedFileSystem extends FileSystem {
if (src.equals(dst)) { if (src.equals(dst)) {
return true; return true;
} }
FileStatus dstFileStatus = getFileStatus(dst);
FilerProto.Entry entry = seaweedFileSystemStore.lookupEntry(dst);
String sourceFileName = src.getName();
Path adjustedDst = dst; Path adjustedDst = dst;
if (dstFileStatus != null) {
if (entry != null) {
FileStatus dstFileStatus = getFileStatus(dst);
String sourceFileName = src.getName();
if (!dstFileStatus.isDirectory()) { if (!dstFileStatus.isDirectory()) {
return false; return false;
} }
@ -175,18 +177,20 @@ public class SeaweedFileSystem extends FileSystem {
} }
@Override @Override
public boolean delete(Path path, boolean recursive) {
public boolean delete(Path path, boolean recursive) throws IOException {
LOG.debug("delete path: {} recursive:{}", path, recursive); LOG.debug("delete path: {} recursive:{}", path, recursive);
path = qualify(path); path = qualify(path);
FileStatus fileStatus = getFileStatus(path);
FilerProto.Entry entry = seaweedFileSystemStore.lookupEntry(path);
if (fileStatus == null) {
if (entry == null) {
return true; return true;
} }
FileStatus fileStatus = getFileStatus(path);
return seaweedFileSystemStore.deleteEntries(path, fileStatus.isDirectory(), recursive); return seaweedFileSystemStore.deleteEntries(path, fileStatus.isDirectory(), recursive);
} }
@ -222,9 +226,9 @@ public class SeaweedFileSystem extends FileSystem {
path = qualify(path); path = qualify(path);
FileStatus fileStatus = getFileStatus(path);
FilerProto.Entry entry = seaweedFileSystemStore.lookupEntry(path);
if (fileStatus == null) {
if (entry == null) {
UserGroupInformation currentUser = UserGroupInformation.getCurrentUser(); UserGroupInformation currentUser = UserGroupInformation.getCurrentUser();
return seaweedFileSystemStore.createDirectory(path, currentUser, return seaweedFileSystemStore.createDirectory(path, currentUser,
@ -233,6 +237,8 @@ public class SeaweedFileSystem extends FileSystem {
} }
FileStatus fileStatus = getFileStatus(path);
if (fileStatus.isDirectory()) { if (fileStatus.isDirectory()) {
return true; return true;
} else { } else {
@ -241,7 +247,7 @@ public class SeaweedFileSystem extends FileSystem {
} }
@Override @Override
public FileStatus getFileStatus(Path path) {
public FileStatus getFileStatus(Path path) throws IOException {
LOG.debug("getFileStatus path: {}", path); LOG.debug("getFileStatus path: {}", path);

12
other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedFileSystemStore.java

@ -61,7 +61,7 @@ public class SeaweedFileSystemStore {
); );
} }
public FileStatus[] listEntries(final Path path) {
public FileStatus[] listEntries(final Path path) throws IOException {
LOG.debug("listEntries path: {}", path); LOG.debug("listEntries path: {}", path);
FileStatus pathStatus = getFileStatus(path); FileStatus pathStatus = getFileStatus(path);
@ -89,11 +89,11 @@ public class SeaweedFileSystemStore {
} }
public FileStatus getFileStatus(final Path path) {
public FileStatus getFileStatus(final Path path) throws IOException {
FilerProto.Entry entry = lookupEntry(path); FilerProto.Entry entry = lookupEntry(path);
if (entry == null) { if (entry == null) {
return null;
throw new FileNotFoundException("File does not exist: " + path);
} }
LOG.debug("doGetFileStatus path:{} entry:{}", path, entry); LOG.debug("doGetFileStatus path:{} entry:{}", path, entry);
@ -136,7 +136,7 @@ public class SeaweedFileSystemStore {
modification_time, access_time, permission, owner, group, null, path); modification_time, access_time, permission, owner, group, null, path);
} }
private FilerProto.Entry lookupEntry(Path path) {
public FilerProto.Entry lookupEntry(Path path) {
return filerClient.lookupEntry(getParentDirectory(path), path.getName()); return filerClient.lookupEntry(getParentDirectory(path), path.getName());
@ -212,7 +212,6 @@ public class SeaweedFileSystemStore {
LOG.debug("openFileForRead path:{} bufferSize:{}", path, bufferSize); LOG.debug("openFileForRead path:{} bufferSize:{}", path, bufferSize);
int readAheadQueueDepth = 2;
FilerProto.Entry entry = lookupEntry(path); FilerProto.Entry entry = lookupEntry(path);
if (entry == null) { if (entry == null) {
@ -223,8 +222,7 @@ public class SeaweedFileSystemStore {
statistics, statistics,
path.toUri().getPath(), path.toUri().getPath(),
entry, entry,
bufferSize,
readAheadQueueDepth);
bufferSize);
} }
public void setOwner(Path path, String owner, String group) { public void setOwner(Path path, String owner, String group) {

183
other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedInputStream.java

@ -27,33 +27,23 @@ public class SeaweedInputStream extends FSInputStream {
private final List<SeaweedRead.VisibleInterval> visibleIntervalList; private final List<SeaweedRead.VisibleInterval> visibleIntervalList;
private final long contentLength; private final long contentLength;
private final int bufferSize; // default buffer size private final int bufferSize; // default buffer size
private final int readAheadQueueDepth; // initialized in constructor
private final boolean readAheadEnabled; // whether enable readAhead;
private byte[] buffer = null; // will be initialized on first use
private long position = 0; // cursor of the file
private long fCursor = 0; // cursor of buffer within file - offset of next byte to read from remote server
private long fCursorAfterLastRead = -1;
private int bCursor = 0; // cursor of read within buffer - offset of next byte to be returned from buffer
private int limit = 0; // offset of next byte to be read into buffer from service (i.e., upper marker+1
// of valid bytes in buffer)
private boolean closed = false; private boolean closed = false;
public SeaweedInputStream( public SeaweedInputStream(
final FilerGrpcClient filerGrpcClient,
final Statistics statistics,
final String path,
final FilerProto.Entry entry,
final int bufferSize,
final int readAheadQueueDepth) {
final FilerGrpcClient filerGrpcClient,
final Statistics statistics,
final String path,
final FilerProto.Entry entry,
final int bufferSize) {
this.filerGrpcClient = filerGrpcClient; this.filerGrpcClient = filerGrpcClient;
this.statistics = statistics; this.statistics = statistics;
this.path = path; this.path = path;
this.entry = entry; this.entry = entry;
this.contentLength = SeaweedRead.totalSize(entry.getChunksList()); this.contentLength = SeaweedRead.totalSize(entry.getChunksList());
this.bufferSize = bufferSize; this.bufferSize = bufferSize;
this.readAheadQueueDepth = (readAheadQueueDepth >= 0) ? readAheadQueueDepth : Runtime.getRuntime().availableProcessors();
this.readAheadEnabled = true;
this.visibleIntervalList = SeaweedRead.nonOverlappingVisibleIntervals(entry.getChunksList()); this.visibleIntervalList = SeaweedRead.nonOverlappingVisibleIntervals(entry.getChunksList());
@ -78,122 +68,7 @@ public class SeaweedInputStream extends FSInputStream {
@Override @Override
public synchronized int read(final byte[] b, final int off, final int len) throws IOException { public synchronized int read(final byte[] b, final int off, final int len) throws IOException {
int currentOff = off;
int currentLen = len;
int lastReadBytes;
int totalReadBytes = 0;
do {
lastReadBytes = readOneBlock(b, currentOff, currentLen);
if (lastReadBytes > 0) {
currentOff += lastReadBytes;
currentLen -= lastReadBytes;
totalReadBytes += lastReadBytes;
}
if (currentLen <= 0 || currentLen > b.length - currentOff) {
break;
}
} while (lastReadBytes > 0);
return totalReadBytes > 0 ? totalReadBytes : lastReadBytes;
}
private int readOneBlock(final byte[] b, final int off, final int len) throws IOException {
if (closed) {
throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED);
}
Preconditions.checkNotNull(b);
if (len == 0) {
return 0;
}
if (this.available() == 0) {
return -1;
}
if (off < 0 || len < 0 || len > b.length - off) {
throw new IndexOutOfBoundsException();
}
//If buffer is empty, then fill the buffer.
if (bCursor == limit) {
//If EOF, then return -1
if (fCursor >= contentLength) {
return -1;
}
long bytesRead = 0;
//reset buffer to initial state - i.e., throw away existing data
bCursor = 0;
limit = 0;
if (buffer == null) {
buffer = new byte[bufferSize];
}
// Enable readAhead when reading sequentially
if (-1 == fCursorAfterLastRead || fCursorAfterLastRead == fCursor || b.length >= bufferSize) {
bytesRead = readInternal(fCursor, buffer, 0, bufferSize, false);
} else {
bytesRead = readInternal(fCursor, buffer, 0, b.length, true);
}
if (bytesRead == -1) {
return -1;
}
limit += bytesRead;
fCursor += bytesRead;
fCursorAfterLastRead = fCursor;
}
//If there is anything in the buffer, then return lesser of (requested bytes) and (bytes in buffer)
//(bytes returned may be less than requested)
int bytesRemaining = limit - bCursor;
int bytesToRead = Math.min(len, bytesRemaining);
System.arraycopy(buffer, bCursor, b, off, bytesToRead);
bCursor += bytesToRead;
if (statistics != null) {
statistics.incrementBytesRead(bytesToRead);
}
return bytesToRead;
}
private int readInternal(final long position, final byte[] b, final int offset, final int length,
final boolean bypassReadAhead) throws IOException {
if (readAheadEnabled && !bypassReadAhead) {
// try reading from read-ahead
if (offset != 0) {
throw new IllegalArgumentException("readahead buffers cannot have non-zero buffer offsets");
}
int receivedBytes;
// queue read-aheads
int numReadAheads = this.readAheadQueueDepth;
long nextSize;
long nextOffset = position;
while (numReadAheads > 0 && nextOffset < contentLength) {
nextSize = Math.min((long) bufferSize, contentLength - nextOffset);
ReadBufferManager.getBufferManager().queueReadAhead(this, nextOffset, (int) nextSize);
nextOffset = nextOffset + nextSize;
numReadAheads--;
}
// try reading from buffers first
receivedBytes = ReadBufferManager.getBufferManager().getBlock(this, position, length, b);
if (receivedBytes > 0) {
return receivedBytes;
}
// got nothing from read-ahead, do our own read now
receivedBytes = readRemote(position, b, offset, length);
return receivedBytes;
} else {
return readRemote(position, b, offset, length);
}
}
int readRemote(long position, byte[] b, int offset, int length) throws IOException {
if (position < 0) { if (position < 0) {
throw new IllegalArgumentException("attempting to read from negative offset"); throw new IllegalArgumentException("attempting to read from negative offset");
} }
@ -203,21 +78,30 @@ public class SeaweedInputStream extends FSInputStream {
if (b == null) { if (b == null) {
throw new IllegalArgumentException("null byte array passed in to read() method"); throw new IllegalArgumentException("null byte array passed in to read() method");
} }
if (offset >= b.length) {
if (off >= b.length) {
throw new IllegalArgumentException("offset greater than length of array"); throw new IllegalArgumentException("offset greater than length of array");
} }
if (length < 0) {
if (len < 0) {
throw new IllegalArgumentException("requested read length is less than zero"); throw new IllegalArgumentException("requested read length is less than zero");
} }
if (length > (b.length - offset)) {
if (len > (b.length - off)) {
throw new IllegalArgumentException("requested read length is more than will fit after requested offset in buffer"); throw new IllegalArgumentException("requested read length is more than will fit after requested offset in buffer");
} }
long bytesRead = SeaweedRead.read(filerGrpcClient, visibleIntervalList, position, b, offset, length);
long bytesRead = SeaweedRead.read(this.filerGrpcClient, this.visibleIntervalList, this.position, b, off, len);
if (bytesRead > Integer.MAX_VALUE) { if (bytesRead > Integer.MAX_VALUE) {
throw new IOException("Unexpected Content-Length"); throw new IOException("Unexpected Content-Length");
} }
return (int) bytesRead;
if (bytesRead > 0) {
this.position += bytesRead;
if (statistics != null) {
statistics.incrementBytesRead(bytesRead);
}
}
return (int)bytesRead;
} }
/** /**
@ -239,17 +123,8 @@ public class SeaweedInputStream extends FSInputStream {
throw new EOFException(FSExceptionMessages.CANNOT_SEEK_PAST_EOF); throw new EOFException(FSExceptionMessages.CANNOT_SEEK_PAST_EOF);
} }
if (n >= fCursor - limit && n <= fCursor) { // within buffer
bCursor = (int) (n - (fCursor - limit));
return;
}
// next read will read from here
fCursor = n;
this.position = n;
//invalidate buffer
limit = 0;
bCursor = 0;
} }
@Override @Override
@ -257,20 +132,19 @@ public class SeaweedInputStream extends FSInputStream {
if (closed) { if (closed) {
throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED);
} }
long currentPos = getPos();
if (currentPos == contentLength) {
if (this.position == contentLength) {
if (n > 0) { if (n > 0) {
throw new EOFException(FSExceptionMessages.CANNOT_SEEK_PAST_EOF); throw new EOFException(FSExceptionMessages.CANNOT_SEEK_PAST_EOF);
} }
} }
long newPos = currentPos + n;
long newPos = this.position + n;
if (newPos < 0) { if (newPos < 0) {
newPos = 0; newPos = 0;
n = newPos - currentPos;
n = newPos - this.position;
} }
if (newPos > contentLength) { if (newPos > contentLength) {
newPos = contentLength; newPos = contentLength;
n = newPos - currentPos;
n = newPos - this.position;
} }
seek(newPos); seek(newPos);
return n; return n;
@ -289,11 +163,11 @@ public class SeaweedInputStream extends FSInputStream {
public synchronized int available() throws IOException { public synchronized int available() throws IOException {
if (closed) { if (closed) {
throw new IOException( throw new IOException(
FSExceptionMessages.STREAM_IS_CLOSED);
FSExceptionMessages.STREAM_IS_CLOSED);
} }
final long remaining = this.contentLength - this.getPos(); final long remaining = this.contentLength - this.getPos();
return remaining <= Integer.MAX_VALUE return remaining <= Integer.MAX_VALUE
? (int) remaining : Integer.MAX_VALUE;
? (int) remaining : Integer.MAX_VALUE;
} }
/** /**
@ -321,7 +195,7 @@ public class SeaweedInputStream extends FSInputStream {
if (closed) { if (closed) {
throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED);
} }
return fCursor - limit + bCursor;
return position;
} }
/** /**
@ -338,7 +212,6 @@ public class SeaweedInputStream extends FSInputStream {
@Override @Override
public synchronized void close() throws IOException { public synchronized void close() throws IOException {
closed = true; closed = true;
buffer = null; // de-reference the buffer so it can be GC'ed sooner
} }
/** /**

98
other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedOutputStream.java

@ -9,6 +9,7 @@ import org.apache.hadoop.fs.StreamCapabilities;
import org.apache.hadoop.fs.Syncable; import org.apache.hadoop.fs.Syncable;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import seaweedfs.client.ByteBufferPool;
import seaweedfs.client.FilerGrpcClient; import seaweedfs.client.FilerGrpcClient;
import seaweedfs.client.FilerProto; import seaweedfs.client.FilerProto;
import seaweedfs.client.SeaweedWrite; import seaweedfs.client.SeaweedWrite;
@ -16,14 +17,10 @@ import seaweedfs.client.SeaweedWrite;
import java.io.IOException; import java.io.IOException;
import java.io.InterruptedIOException; import java.io.InterruptedIOException;
import java.io.OutputStream; import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Locale; import java.util.Locale;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentLinkedDeque;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.Future;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.*;
import static seaweed.hdfs.SeaweedFileSystemStore.getParentDirectory; import static seaweed.hdfs.SeaweedFileSystemStore.getParentDirectory;
@ -37,16 +34,16 @@ public class SeaweedOutputStream extends OutputStream implements Syncable, Strea
private final int maxConcurrentRequestCount; private final int maxConcurrentRequestCount;
private final ThreadPoolExecutor threadExecutor; private final ThreadPoolExecutor threadExecutor;
private final ExecutorCompletionService<Void> completionService; private final ExecutorCompletionService<Void> completionService;
private FilerProto.Entry.Builder entry;
private final FilerProto.Entry.Builder entry;
private final boolean supportFlush = false; // true;
private final ConcurrentLinkedDeque<WriteOperation> writeOperations;
private long position; private long position;
private boolean closed; private boolean closed;
private boolean supportFlush = true;
private volatile IOException lastError; private volatile IOException lastError;
private long lastFlushOffset; private long lastFlushOffset;
private long lastTotalAppendOffset = 0; private long lastTotalAppendOffset = 0;
private byte[] buffer;
private int bufferIndex;
private ConcurrentLinkedDeque<WriteOperation> writeOperations;
private ByteBuffer buffer;
private long outputIndex;
private String replication = "000"; private String replication = "000";
public SeaweedOutputStream(FilerGrpcClient filerGrpcClient, final Path path, FilerProto.Entry.Builder entry, public SeaweedOutputStream(FilerGrpcClient filerGrpcClient, final Path path, FilerProto.Entry.Builder entry,
@ -59,18 +56,18 @@ public class SeaweedOutputStream extends OutputStream implements Syncable, Strea
this.lastError = null; this.lastError = null;
this.lastFlushOffset = 0; this.lastFlushOffset = 0;
this.bufferSize = bufferSize; this.bufferSize = bufferSize;
this.buffer = new byte[bufferSize];
this.bufferIndex = 0;
this.buffer = ByteBufferPool.request(bufferSize);
this.outputIndex = 0;
this.writeOperations = new ConcurrentLinkedDeque<>(); this.writeOperations = new ConcurrentLinkedDeque<>();
this.maxConcurrentRequestCount = 4 * Runtime.getRuntime().availableProcessors(); this.maxConcurrentRequestCount = 4 * Runtime.getRuntime().availableProcessors();
this.threadExecutor this.threadExecutor
= new ThreadPoolExecutor(maxConcurrentRequestCount,
maxConcurrentRequestCount,
10L,
TimeUnit.SECONDS,
new LinkedBlockingQueue<Runnable>());
= new ThreadPoolExecutor(maxConcurrentRequestCount,
maxConcurrentRequestCount,
10L,
TimeUnit.SECONDS,
new LinkedBlockingQueue<Runnable>());
this.completionService = new ExecutorCompletionService<>(this.threadExecutor); this.completionService = new ExecutorCompletionService<>(this.threadExecutor);
this.entry = entry; this.entry = entry;
@ -93,7 +90,7 @@ public class SeaweedOutputStream extends OutputStream implements Syncable, Strea
@Override @Override
public synchronized void write(final byte[] data, final int off, final int length) public synchronized void write(final byte[] data, final int off, final int length)
throws IOException {
throws IOException {
maybeThrowLastError(); maybeThrowLastError();
Preconditions.checkArgument(data != null, "null data"); Preconditions.checkArgument(data != null, "null data");
@ -102,25 +99,29 @@ public class SeaweedOutputStream extends OutputStream implements Syncable, Strea
throw new IndexOutOfBoundsException(); throw new IndexOutOfBoundsException();
} }
// System.out.println(path + " write [" + (outputIndex + off) + "," + ((outputIndex + off) + length) + ")");
int currentOffset = off; int currentOffset = off;
int writableBytes = bufferSize - bufferIndex;
int writableBytes = bufferSize - buffer.position();
int numberOfBytesToWrite = length; int numberOfBytesToWrite = length;
while (numberOfBytesToWrite > 0) { while (numberOfBytesToWrite > 0) {
if (writableBytes <= numberOfBytesToWrite) {
System.arraycopy(data, currentOffset, buffer, bufferIndex, writableBytes);
bufferIndex += writableBytes;
writeCurrentBufferToService();
currentOffset += writableBytes;
numberOfBytesToWrite = numberOfBytesToWrite - writableBytes;
} else {
System.arraycopy(data, currentOffset, buffer, bufferIndex, numberOfBytesToWrite);
bufferIndex += numberOfBytesToWrite;
numberOfBytesToWrite = 0;
if (numberOfBytesToWrite < writableBytes) {
buffer.put(data, currentOffset, numberOfBytesToWrite);
outputIndex += numberOfBytesToWrite;
break;
} }
writableBytes = bufferSize - bufferIndex;
// System.out.println(path + " [" + (outputIndex + currentOffset) + "," + ((outputIndex + currentOffset) + writableBytes) + ")");
buffer.put(data, currentOffset, writableBytes);
outputIndex += writableBytes;
currentOffset += writableBytes;
writeCurrentBufferToService();
numberOfBytesToWrite = numberOfBytesToWrite - writableBytes;
writableBytes = bufferSize - buffer.position();
} }
} }
/** /**
@ -199,8 +200,9 @@ public class SeaweedOutputStream extends OutputStream implements Syncable, Strea
threadExecutor.shutdown(); threadExecutor.shutdown();
} finally { } finally {
lastError = new IOException(FSExceptionMessages.STREAM_IS_CLOSED); lastError = new IOException(FSExceptionMessages.STREAM_IS_CLOSED);
ByteBufferPool.release(buffer);
buffer = null; buffer = null;
bufferIndex = 0;
outputIndex = 0;
closed = true; closed = true;
writeOperations.clear(); writeOperations.clear();
if (!threadExecutor.isShutdown()) { if (!threadExecutor.isShutdown()) {
@ -210,35 +212,17 @@ public class SeaweedOutputStream extends OutputStream implements Syncable, Strea
} }
private synchronized void writeCurrentBufferToService() throws IOException { private synchronized void writeCurrentBufferToService() throws IOException {
if (bufferIndex == 0) {
if (buffer.position() == 0) {
return; return;
} }
final byte[] bytes = buffer;
final int bytesLength = bufferIndex;
buffer = new byte[bufferSize];
bufferIndex = 0;
final long offset = position;
buffer.flip();
int bytesLength = buffer.limit() - buffer.position();
SeaweedWrite.writeData(entry, replication, filerGrpcClient, position, buffer.array(), buffer.position(), buffer.limit());
// System.out.println(path + " saved [" + (position) + "," + ((position) + bytesLength) + ")");
position += bytesLength; position += bytesLength;
buffer.clear();
if (threadExecutor.getQueue().size() >= maxConcurrentRequestCount * 2) {
waitForTaskToComplete();
}
final Future<Void> job = completionService.submit(new Callable<Void>() {
@Override
public Void call() throws Exception {
// originally: client.append(path, offset, bytes, 0, bytesLength);
SeaweedWrite.writeData(entry, replication, filerGrpcClient, offset, bytes, 0, bytesLength);
return null;
}
});
writeOperations.add(new WriteOperation(job, offset, bytesLength));
// Try to shrink the queue
shrinkWriteOperationQueue();
} }
private void waitForTaskToComplete() throws IOException { private void waitForTaskToComplete() throws IOException {

0
test/sample.idx → test/data/sample.idx

111
test/s3/basic/basic_test.go

@ -0,0 +1,111 @@
package basic
import (
"fmt"
"os"
"strings"
"testing"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/awserr"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3"
)
var (
svc *s3.S3
)
func init() {
// Initialize a session in us-west-2 that the SDK will use to load
// credentials from the shared credentials file ~/.aws/credentials.
sess, err := session.NewSession(&aws.Config{
Region: aws.String("us-west-2"),
Endpoint: aws.String("localhost:8333"),
DisableSSL: aws.Bool(true),
})
if err != nil {
exitErrorf("create session, %v", err)
}
// Create S3 service client
svc = s3.New(sess)
}
func TestCreateBucket(t *testing.T) {
input := &s3.CreateBucketInput{
Bucket: aws.String("theBucket"),
}
result, err := svc.CreateBucket(input)
if err != nil {
if aerr, ok := err.(awserr.Error); ok {
switch aerr.Code() {
case s3.ErrCodeBucketAlreadyExists:
fmt.Println(s3.ErrCodeBucketAlreadyExists, aerr.Error())
case s3.ErrCodeBucketAlreadyOwnedByYou:
fmt.Println(s3.ErrCodeBucketAlreadyOwnedByYou, aerr.Error())
default:
fmt.Println(aerr.Error())
}
} else {
// Print the error, cast err to awserr.Error to get the Code and
// Message from an error.
fmt.Println(err.Error())
}
return
}
fmt.Println(result)
}
func TestListBuckets(t *testing.T) {
input := &s3.PutObjectInput{
ACL: aws.String("authenticated-read"),
Body: aws.ReadSeekCloser(strings.NewReader("filetoupload")),
Bucket: aws.String("theBucket"),
Key: aws.String("exampleobject"),
}
result, err := svc.PutObject(input)
if err != nil {
if aerr, ok := err.(awserr.Error); ok {
switch aerr.Code() {
default:
fmt.Println(aerr.Error())
}
} else {
// Print the error, cast err to awserr.Error to get the Code and
// Message from an error.
fmt.Println(err.Error())
}
return
}
fmt.Println(result)
}
func TestPutObject(t *testing.T) {
result, err := svc.ListBuckets(nil)
if err != nil {
exitErrorf("Unable to list buckets, %v", err)
}
fmt.Println("Buckets:")
for _, b := range result.Buckets {
fmt.Printf("* %s created on %s\n",
aws.StringValue(b.Name), aws.TimeValue(b.CreationDate))
}
}
func exitErrorf(msg string, args ...interface{}) {
fmt.Fprintf(os.Stderr, msg+"\n", args...)
os.Exit(1)
}

4
weed/command/backup.go

@ -112,7 +112,7 @@ func runBackup(cmd *Command, args []string) bool {
return true return true
} }
} }
v, err := storage.NewVolume(*s.dir, *s.collection, vid, storage.NeedleMapInMemory, replication, ttl, 0, 0)
v, err := storage.NewVolume(util.ResolvePath(*s.dir), *s.collection, vid, storage.NeedleMapInMemory, replication, ttl, 0, 0)
if err != nil { if err != nil {
fmt.Printf("Error creating or reading from volume %d: %v\n", vid, err) fmt.Printf("Error creating or reading from volume %d: %v\n", vid, err)
return true return true
@ -137,7 +137,7 @@ func runBackup(cmd *Command, args []string) bool {
// remove the old data // remove the old data
v.Destroy() v.Destroy()
// recreate an empty volume // recreate an empty volume
v, err = storage.NewVolume(*s.dir, *s.collection, vid, storage.NeedleMapInMemory, replication, ttl, 0, 0)
v, err = storage.NewVolume(util.ResolvePath(*s.dir), *s.collection, vid, storage.NeedleMapInMemory, replication, ttl, 0, 0)
if err != nil { if err != nil {
fmt.Printf("Error creating or reading from volume %d: %v\n", vid, err) fmt.Printf("Error creating or reading from volume %d: %v\n", vid, err)
return true return true

3
weed/command/compact.go

@ -4,6 +4,7 @@ import (
"github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/storage" "github.com/chrislusf/seaweedfs/weed/storage"
"github.com/chrislusf/seaweedfs/weed/storage/needle" "github.com/chrislusf/seaweedfs/weed/storage/needle"
"github.com/chrislusf/seaweedfs/weed/util"
) )
func init() { func init() {
@ -40,7 +41,7 @@ func runCompact(cmd *Command, args []string) bool {
preallocate := *compactVolumePreallocate * (1 << 20) preallocate := *compactVolumePreallocate * (1 << 20)
vid := needle.VolumeId(*compactVolumeId) vid := needle.VolumeId(*compactVolumeId)
v, err := storage.NewVolume(*compactVolumePath, *compactVolumeCollection, vid,
v, err := storage.NewVolume(util.ResolvePath(*compactVolumePath), *compactVolumeCollection, vid,
storage.NeedleMapInMemory, nil, nil, preallocate, 0) storage.NeedleMapInMemory, nil, nil, preallocate, 0)
if err != nil { if err != nil {
glog.Fatalf("Load Volume [ERROR] %s\n", err) glog.Fatalf("Load Volume [ERROR] %s\n", err)

2
weed/command/download.go

@ -43,7 +43,7 @@ var cmdDownload = &Command{
func runDownload(cmd *Command, args []string) bool { func runDownload(cmd *Command, args []string) bool {
for _, fid := range args { for _, fid := range args {
if e := downloadToFile(*d.server, fid, *d.dir); e != nil {
if e := downloadToFile(*d.server, fid, util.ResolvePath(*d.dir)); e != nil {
fmt.Println("Download Error: ", fid, e) fmt.Println("Download Error: ", fid, e)
} }
} }

4
weed/command/export.go

@ -198,7 +198,7 @@ func runExport(cmd *Command, args []string) bool {
needleMap := needle_map.NewMemDb() needleMap := needle_map.NewMemDb()
defer needleMap.Close() defer needleMap.Close()
if err := needleMap.LoadFromIdx(path.Join(*export.dir, fileName+".idx")); err != nil {
if err := needleMap.LoadFromIdx(path.Join(util.ResolvePath(*export.dir), fileName+".idx")); err != nil {
glog.Fatalf("cannot load needle map from %s.idx: %s", fileName, err) glog.Fatalf("cannot load needle map from %s.idx: %s", fileName, err)
} }
@ -211,7 +211,7 @@ func runExport(cmd *Command, args []string) bool {
fmt.Printf("key\tname\tsize\tgzip\tmime\tmodified\tttl\tdeleted\n") fmt.Printf("key\tname\tsize\tgzip\tmime\tmodified\tttl\tdeleted\n")
} }
err = storage.ScanVolumeFile(*export.dir, *export.collection, vid, storage.NeedleMapInMemory, volumeFileScanner)
err = storage.ScanVolumeFile(util.ResolvePath(*export.dir), *export.collection, vid, storage.NeedleMapInMemory, volumeFileScanner)
if err != nil && err != io.EOF { if err != nil && err != io.EOF {
glog.Fatalf("Export Volume File [ERROR] %s\n", err) glog.Fatalf("Export Volume File [ERROR] %s\n", err)
} }

2
weed/command/filer.go

@ -100,7 +100,7 @@ func (fo *FilerOptions) startFiler() {
defaultLevelDbDirectory := "./filerldb2" defaultLevelDbDirectory := "./filerldb2"
if fo.defaultLevelDbDirectory != nil { if fo.defaultLevelDbDirectory != nil {
defaultLevelDbDirectory = *fo.defaultLevelDbDirectory + "/filerldb2"
defaultLevelDbDirectory = util.ResolvePath(*fo.defaultLevelDbDirectory + "/filerldb2")
} }
var peers []string var peers []string

5
weed/command/fix.go

@ -11,6 +11,7 @@ import (
"github.com/chrislusf/seaweedfs/weed/storage/needle_map" "github.com/chrislusf/seaweedfs/weed/storage/needle_map"
"github.com/chrislusf/seaweedfs/weed/storage/super_block" "github.com/chrislusf/seaweedfs/weed/storage/super_block"
"github.com/chrislusf/seaweedfs/weed/storage/types" "github.com/chrislusf/seaweedfs/weed/storage/types"
"github.com/chrislusf/seaweedfs/weed/util"
) )
func init() { func init() {
@ -67,7 +68,7 @@ func runFix(cmd *Command, args []string) bool {
if *fixVolumeCollection != "" { if *fixVolumeCollection != "" {
baseFileName = *fixVolumeCollection + "_" + baseFileName baseFileName = *fixVolumeCollection + "_" + baseFileName
} }
indexFileName := path.Join(*fixVolumePath, baseFileName+".idx")
indexFileName := path.Join(util.ResolvePath(*fixVolumePath), baseFileName+".idx")
nm := needle_map.NewMemDb() nm := needle_map.NewMemDb()
defer nm.Close() defer nm.Close()
@ -77,7 +78,7 @@ func runFix(cmd *Command, args []string) bool {
nm: nm, nm: nm,
} }
if err := storage.ScanVolumeFile(*fixVolumePath, *fixVolumeCollection, vid, storage.NeedleMapInMemory, scanner); err != nil {
if err := storage.ScanVolumeFile(util.ResolvePath(*fixVolumePath), *fixVolumeCollection, vid, storage.NeedleMapInMemory, scanner); err != nil {
glog.Fatalf("scan .dat File: %v", err) glog.Fatalf("scan .dat File: %v", err)
os.Remove(indexFileName) os.Remove(indexFileName)
} }

31
weed/command/master.go

@ -8,10 +8,11 @@ import (
"strings" "strings"
"github.com/chrislusf/raft/protobuf" "github.com/chrislusf/raft/protobuf"
"github.com/chrislusf/seaweedfs/weed/util/grace"
"github.com/gorilla/mux" "github.com/gorilla/mux"
"google.golang.org/grpc/reflection" "google.golang.org/grpc/reflection"
"github.com/chrislusf/seaweedfs/weed/util/grace"
"github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/pb" "github.com/chrislusf/seaweedfs/weed/pb"
"github.com/chrislusf/seaweedfs/weed/pb/master_pb" "github.com/chrislusf/seaweedfs/weed/pb/master_pb"
@ -26,13 +27,13 @@ var (
) )
type MasterOptions struct { type MasterOptions struct {
port *int
ip *string
ipBind *string
metaFolder *string
peers *string
volumeSizeLimitMB *uint
volumePreallocate *bool
port *int
ip *string
ipBind *string
metaFolder *string
peers *string
volumeSizeLimitMB *uint
volumePreallocate *bool
// pulseSeconds *int // pulseSeconds *int
defaultReplication *string defaultReplication *string
garbageThreshold *float64 garbageThreshold *float64
@ -85,7 +86,7 @@ func runMaster(cmd *Command, args []string) bool {
runtime.GOMAXPROCS(runtime.NumCPU()) runtime.GOMAXPROCS(runtime.NumCPU())
grace.SetupProfiling(*masterCpuProfile, *masterMemProfile) grace.SetupProfiling(*masterCpuProfile, *masterMemProfile)
if err := util.TestFolderWritable(*m.metaFolder); err != nil {
if err := util.TestFolderWritable(util.ResolvePath(*m.metaFolder)); err != nil {
glog.Fatalf("Check Meta Folder (-mdir) Writable %s : %s", *m.metaFolder, err) glog.Fatalf("Check Meta Folder (-mdir) Writable %s : %s", *m.metaFolder, err)
} }
@ -118,7 +119,7 @@ func startMaster(masterOption MasterOptions, masterWhiteList []string) {
} }
// start raftServer // start raftServer
raftServer := weed_server.NewRaftServer(security.LoadClientTLS(util.GetViper(), "grpc.master"), raftServer := weed_server.NewRaftServer(security.LoadClientTLS(util.GetViper(), "grpc.master"),
peers, myMasterAddress, *masterOption.metaFolder, ms.Topo, 5)
peers, myMasterAddress, util.ResolvePath(*masterOption.metaFolder), ms.Topo, 5)
if raftServer == nil { if raftServer == nil {
glog.Fatalf("please verify %s is writable, see https://github.com/chrislusf/seaweedfs/issues/717", *masterOption.metaFolder) glog.Fatalf("please verify %s is writable, see https://github.com/chrislusf/seaweedfs/issues/717", *masterOption.metaFolder)
} }
@ -173,11 +174,11 @@ func checkPeers(masterIp string, masterPort int, peers string) (masterAddress st
func (m *MasterOptions) toMasterOption(whiteList []string) *weed_server.MasterOption { func (m *MasterOptions) toMasterOption(whiteList []string) *weed_server.MasterOption {
return &weed_server.MasterOption{ return &weed_server.MasterOption{
Host: *m.ip,
Port: *m.port,
MetaFolder: *m.metaFolder,
VolumeSizeLimitMB: *m.volumeSizeLimitMB,
VolumePreallocate: *m.volumePreallocate,
Host: *m.ip,
Port: *m.port,
MetaFolder: *m.metaFolder,
VolumeSizeLimitMB: *m.volumeSizeLimitMB,
VolumePreallocate: *m.volumePreallocate,
// PulseSeconds: *m.pulseSeconds, // PulseSeconds: *m.pulseSeconds,
DefaultReplicaPlacement: *m.defaultReplication, DefaultReplicaPlacement: *m.defaultReplication,
GarbageThreshold: *m.garbageThreshold, GarbageThreshold: *m.garbageThreshold,

10
weed/command/mount_std.go

@ -13,6 +13,9 @@ import (
"strings" "strings"
"time" "time"
"github.com/seaweedfs/fuse"
"github.com/seaweedfs/fuse/fs"
"github.com/chrislusf/seaweedfs/weed/filesys" "github.com/chrislusf/seaweedfs/weed/filesys"
"github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/pb" "github.com/chrislusf/seaweedfs/weed/pb"
@ -20,8 +23,6 @@ import (
"github.com/chrislusf/seaweedfs/weed/security" "github.com/chrislusf/seaweedfs/weed/security"
"github.com/chrislusf/seaweedfs/weed/util" "github.com/chrislusf/seaweedfs/weed/util"
"github.com/chrislusf/seaweedfs/weed/util/grace" "github.com/chrislusf/seaweedfs/weed/util/grace"
"github.com/seaweedfs/fuse"
"github.com/seaweedfs/fuse/fs"
) )
func runMount(cmd *Command, args []string) bool { func runMount(cmd *Command, args []string) bool {
@ -68,7 +69,7 @@ func RunMount(option *MountOptions, umask os.FileMode) bool {
} }
filerMountRootPath := *option.filerMountRootPath filerMountRootPath := *option.filerMountRootPath
dir := *option.dir
dir := util.ResolvePath(*option.dir)
chunkSizeLimitMB := *mountOptions.chunkSizeLimitMB chunkSizeLimitMB := *mountOptions.chunkSizeLimitMB
util.LoadConfiguration("security", false) util.LoadConfiguration("security", false)
@ -97,6 +98,9 @@ func RunMount(option *MountOptions, umask os.FileMode) bool {
mountMode = os.ModeDir | fileInfo.Mode() mountMode = os.ModeDir | fileInfo.Mode()
uid, gid = util.GetFileUidGid(fileInfo) uid, gid = util.GetFileUidGid(fileInfo)
fmt.Printf("mount point owner uid=%d gid=%d mode=%s\n", uid, gid, fileInfo.Mode()) fmt.Printf("mount point owner uid=%d gid=%d mode=%s\n", uid, gid, fileInfo.Mode())
} else {
fmt.Printf("can not stat %s\n", dir)
return false
} }
if uid == 0 { if uid == 0 {

2
weed/command/server.go

@ -176,7 +176,7 @@ func runServer(cmd *Command, args []string) bool {
if *masterOptions.metaFolder == "" { if *masterOptions.metaFolder == "" {
*masterOptions.metaFolder = folders[0] *masterOptions.metaFolder = folders[0]
} }
if err := util.TestFolderWritable(*masterOptions.metaFolder); err != nil {
if err := util.TestFolderWritable(util.ResolvePath(*masterOptions.metaFolder)); err != nil {
glog.Fatalf("Check Meta Folder (-mdir=\"%s\") Writable: %s", *masterOptions.metaFolder, err) glog.Fatalf("Check Meta Folder (-mdir=\"%s\") Writable: %s", *masterOptions.metaFolder, err)
} }
filerOptions.defaultLevelDbDirectory = masterOptions.metaFolder filerOptions.defaultLevelDbDirectory = masterOptions.metaFolder

2
weed/command/upload.go

@ -69,7 +69,7 @@ func runUpload(cmd *Command, args []string) bool {
if *upload.dir == "" { if *upload.dir == "" {
return false return false
} }
filepath.Walk(*upload.dir, func(path string, info os.FileInfo, err error) error {
filepath.Walk(util.ResolvePath(*upload.dir), func(path string, info os.FileInfo, err error) error {
if err == nil { if err == nil {
if !info.IsDir() { if !info.IsDir() {
if *upload.include != "" { if *upload.include != "" {

2
weed/command/volume.go

@ -117,7 +117,7 @@ func (v VolumeServerOptions) startVolumeServer(volumeFolders, maxVolumeCounts, v
// Set multiple folders and each folder's max volume count limit' // Set multiple folders and each folder's max volume count limit'
v.folders = strings.Split(volumeFolders, ",") v.folders = strings.Split(volumeFolders, ",")
for _, folder := range v.folders { for _, folder := range v.folders {
if err := util.TestFolderWritable(folder); err != nil {
if err := util.TestFolderWritable(util.ResolvePath(folder)); err != nil {
glog.Fatalf("Check Data Folder(-dir) Writable %s : %s", folder, err) glog.Fatalf("Check Data Folder(-dir) Writable %s : %s", folder, err)
} }
} }

2
weed/command/webdav.go

@ -110,7 +110,7 @@ func (wo *WebDavOption) startWebDav() bool {
Uid: uid, Uid: uid,
Gid: gid, Gid: gid,
Cipher: cipher, Cipher: cipher,
CacheDir: *wo.cacheDir,
CacheDir: util.ResolvePath(*wo.cacheDir),
CacheSizeMB: *wo.cacheSizeMB, CacheSizeMB: *wo.cacheSizeMB,
}) })
if webdavServer_err != nil { if webdavServer_err != nil {

43
weed/filer2/filer.go

@ -26,7 +26,7 @@ var (
) )
type Filer struct { type Filer struct {
store *FilerStoreWrapper
Store *FilerStoreWrapper
directoryCache *ccache.Cache directoryCache *ccache.Cache
MasterClient *wdclient.MasterClient MasterClient *wdclient.MasterClient
fileIdDeletionQueue *util.UnboundedQueue fileIdDeletionQueue *util.UnboundedQueue
@ -38,9 +38,11 @@ type Filer struct {
LocalMetaLogBuffer *log_buffer.LogBuffer LocalMetaLogBuffer *log_buffer.LogBuffer
metaLogCollection string metaLogCollection string
metaLogReplication string metaLogReplication string
MetaAggregator *MetaAggregator
} }
func NewFiler(masters []string, grpcDialOption grpc.DialOption, filerHost string, filerGrpcPort uint32, collection string, replication string, notifyFn func()) *Filer {
func NewFiler(masters []string, grpcDialOption grpc.DialOption,
filerHost string, filerGrpcPort uint32, collection string, replication string, notifyFn func()) *Filer {
f := &Filer{ f := &Filer{
directoryCache: ccache.New(ccache.Configure().MaxSize(1000).ItemsToPrune(100)), directoryCache: ccache.New(ccache.Configure().MaxSize(1000).ItemsToPrune(100)),
MasterClient: wdclient.NewMasterClient(grpcDialOption, "filer", filerHost, filerGrpcPort, masters), MasterClient: wdclient.NewMasterClient(grpcDialOption, "filer", filerHost, filerGrpcPort, masters),
@ -56,12 +58,23 @@ func NewFiler(masters []string, grpcDialOption grpc.DialOption, filerHost string
return f return f
} }
func (f *Filer) AggregateFromPeers(self string, filers []string) {
// set peers
if len(filers) == 0 {
filers = append(filers, self)
}
f.MetaAggregator = NewMetaAggregator(filers, f.GrpcDialOption)
f.MetaAggregator.StartLoopSubscribe(f, self)
}
func (f *Filer) SetStore(store FilerStore) { func (f *Filer) SetStore(store FilerStore) {
f.store = NewFilerStoreWrapper(store)
f.Store = NewFilerStoreWrapper(store)
} }
func (f *Filer) GetStore() (store FilerStore) { func (f *Filer) GetStore() (store FilerStore) {
return f.store
return f.Store
} }
func (f *Filer) DisableDirectoryCache() { func (f *Filer) DisableDirectoryCache() {
@ -77,15 +90,15 @@ func (fs *Filer) KeepConnectedToMaster() {
} }
func (f *Filer) BeginTransaction(ctx context.Context) (context.Context, error) { func (f *Filer) BeginTransaction(ctx context.Context) (context.Context, error) {
return f.store.BeginTransaction(ctx)
return f.Store.BeginTransaction(ctx)
} }
func (f *Filer) CommitTransaction(ctx context.Context) error { func (f *Filer) CommitTransaction(ctx context.Context) error {
return f.store.CommitTransaction(ctx)
return f.Store.CommitTransaction(ctx)
} }
func (f *Filer) RollbackTransaction(ctx context.Context) error { func (f *Filer) RollbackTransaction(ctx context.Context) error {
return f.store.RollbackTransaction(ctx)
return f.Store.RollbackTransaction(ctx)
} }
func (f *Filer) CreateEntry(ctx context.Context, entry *Entry, o_excl bool, isFromOtherCluster bool) error { func (f *Filer) CreateEntry(ctx context.Context, entry *Entry, o_excl bool, isFromOtherCluster bool) error {
@ -137,7 +150,7 @@ func (f *Filer) CreateEntry(ctx context.Context, entry *Entry, o_excl bool, isFr
} }
glog.V(2).Infof("create directory: %s %v", dirPath, dirEntry.Mode) glog.V(2).Infof("create directory: %s %v", dirPath, dirEntry.Mode)
mkdirErr := f.store.InsertEntry(ctx, dirEntry)
mkdirErr := f.Store.InsertEntry(ctx, dirEntry)
if mkdirErr != nil { if mkdirErr != nil {
if _, err := f.FindEntry(ctx, util.FullPath(dirPath)); err == filer_pb.ErrNotFound { if _, err := f.FindEntry(ctx, util.FullPath(dirPath)); err == filer_pb.ErrNotFound {
glog.V(3).Infof("mkdir %s: %v", dirPath, mkdirErr) glog.V(3).Infof("mkdir %s: %v", dirPath, mkdirErr)
@ -180,7 +193,7 @@ func (f *Filer) CreateEntry(ctx context.Context, entry *Entry, o_excl bool, isFr
glog.V(4).Infof("CreateEntry %s: old entry: %v exclusive:%v", entry.FullPath, oldEntry, o_excl) glog.V(4).Infof("CreateEntry %s: old entry: %v exclusive:%v", entry.FullPath, oldEntry, o_excl)
if oldEntry == nil { if oldEntry == nil {
if err := f.store.InsertEntry(ctx, entry); err != nil {
if err := f.Store.InsertEntry(ctx, entry); err != nil {
glog.Errorf("insert entry %s: %v", entry.FullPath, err) glog.Errorf("insert entry %s: %v", entry.FullPath, err)
return fmt.Errorf("insert entry %s: %v", entry.FullPath, err) return fmt.Errorf("insert entry %s: %v", entry.FullPath, err)
} }
@ -216,7 +229,7 @@ func (f *Filer) UpdateEntry(ctx context.Context, oldEntry, entry *Entry) (err er
return fmt.Errorf("existing %s is a file", entry.FullPath) return fmt.Errorf("existing %s is a file", entry.FullPath)
} }
} }
return f.store.UpdateEntry(ctx, entry)
return f.Store.UpdateEntry(ctx, entry)
} }
func (f *Filer) FindEntry(ctx context.Context, p util.FullPath) (entry *Entry, err error) { func (f *Filer) FindEntry(ctx context.Context, p util.FullPath) (entry *Entry, err error) {
@ -235,10 +248,10 @@ func (f *Filer) FindEntry(ctx context.Context, p util.FullPath) (entry *Entry, e
}, },
}, nil }, nil
} }
entry, err = f.store.FindEntry(ctx, p)
entry, err = f.Store.FindEntry(ctx, p)
if entry != nil && entry.TtlSec > 0 { if entry != nil && entry.TtlSec > 0 {
if entry.Crtime.Add(time.Duration(entry.TtlSec) * time.Second).Before(time.Now()) { if entry.Crtime.Add(time.Duration(entry.TtlSec) * time.Second).Before(time.Now()) {
f.store.DeleteEntry(ctx, p.Child(entry.Name()))
f.Store.DeleteEntry(ctx, p.Child(entry.Name()))
return nil, filer_pb.ErrNotFound return nil, filer_pb.ErrNotFound
} }
} }
@ -264,7 +277,7 @@ func (f *Filer) ListDirectoryEntries(ctx context.Context, p util.FullPath, start
} }
func (f *Filer) doListDirectoryEntries(ctx context.Context, p util.FullPath, startFileName string, inclusive bool, limit int) (entries []*Entry, expiredCount int, lastFileName string, err error) { func (f *Filer) doListDirectoryEntries(ctx context.Context, p util.FullPath, startFileName string, inclusive bool, limit int) (entries []*Entry, expiredCount int, lastFileName string, err error) {
listedEntries, listErr := f.store.ListDirectoryEntries(ctx, p, startFileName, inclusive, limit)
listedEntries, listErr := f.Store.ListDirectoryEntries(ctx, p, startFileName, inclusive, limit)
if listErr != nil { if listErr != nil {
return listedEntries, expiredCount, "", listErr return listedEntries, expiredCount, "", listErr
} }
@ -272,7 +285,7 @@ func (f *Filer) doListDirectoryEntries(ctx context.Context, p util.FullPath, sta
lastFileName = entry.Name() lastFileName = entry.Name()
if entry.TtlSec > 0 { if entry.TtlSec > 0 {
if entry.Crtime.Add(time.Duration(entry.TtlSec) * time.Second).Before(time.Now()) { if entry.Crtime.Add(time.Duration(entry.TtlSec) * time.Second).Before(time.Now()) {
f.store.DeleteEntry(ctx, p.Child(entry.Name()))
f.Store.DeleteEntry(ctx, p.Child(entry.Name()))
expiredCount++ expiredCount++
continue continue
} }
@ -323,5 +336,5 @@ func (f *Filer) cacheSetDirectory(dirpath string, dirEntry *Entry, level int) {
func (f *Filer) Shutdown() { func (f *Filer) Shutdown() {
f.LocalMetaLogBuffer.Shutdown() f.LocalMetaLogBuffer.Shutdown()
f.store.Shutdown()
f.Store.Shutdown()
} }

11
weed/filer2/filer_delete_entry.go

@ -74,9 +74,9 @@ func (f *Filer) doBatchDeleteFolderMetaAndData(ctx context.Context, entry *Entry
if sub.IsDirectory() { if sub.IsDirectory() {
dirChunks, err = f.doBatchDeleteFolderMetaAndData(ctx, sub, isRecursive, ignoreRecursiveError, shouldDeleteChunks, false) dirChunks, err = f.doBatchDeleteFolderMetaAndData(ctx, sub, isRecursive, ignoreRecursiveError, shouldDeleteChunks, false)
f.cacheDelDirectory(string(sub.FullPath)) f.cacheDelDirectory(string(sub.FullPath))
f.NotifyUpdateEvent(ctx, sub, nil, shouldDeleteChunks, isFromOtherCluster)
chunks = append(chunks, dirChunks...) chunks = append(chunks, dirChunks...)
} else { } else {
f.NotifyUpdateEvent(ctx, sub, nil, shouldDeleteChunks, isFromOtherCluster)
chunks = append(chunks, sub.Chunks...) chunks = append(chunks, sub.Chunks...)
} }
if err != nil && !ignoreRecursiveError { if err != nil && !ignoreRecursiveError {
@ -91,10 +91,12 @@ func (f *Filer) doBatchDeleteFolderMetaAndData(ctx context.Context, entry *Entry
glog.V(3).Infof("deleting directory %v delete %d chunks: %v", entry.FullPath, len(chunks), shouldDeleteChunks) glog.V(3).Infof("deleting directory %v delete %d chunks: %v", entry.FullPath, len(chunks), shouldDeleteChunks)
if storeDeletionErr := f.store.DeleteFolderChildren(ctx, entry.FullPath); storeDeletionErr != nil {
if storeDeletionErr := f.Store.DeleteFolderChildren(ctx, entry.FullPath); storeDeletionErr != nil {
return nil, fmt.Errorf("filer store delete: %v", storeDeletionErr) return nil, fmt.Errorf("filer store delete: %v", storeDeletionErr)
} }
f.NotifyUpdateEvent(ctx, entry, nil, shouldDeleteChunks, isFromOtherCluster)
return chunks, nil return chunks, nil
} }
@ -102,13 +104,14 @@ func (f *Filer) doDeleteEntryMetaAndData(ctx context.Context, entry *Entry, shou
glog.V(3).Infof("deleting entry %v, delete chunks: %v", entry.FullPath, shouldDeleteChunks) glog.V(3).Infof("deleting entry %v, delete chunks: %v", entry.FullPath, shouldDeleteChunks)
if storeDeletionErr := f.store.DeleteEntry(ctx, entry.FullPath); storeDeletionErr != nil {
if storeDeletionErr := f.Store.DeleteEntry(ctx, entry.FullPath); storeDeletionErr != nil {
return fmt.Errorf("filer store delete: %v", storeDeletionErr) return fmt.Errorf("filer store delete: %v", storeDeletionErr)
} }
if entry.IsDirectory() { if entry.IsDirectory() {
f.cacheDelDirectory(string(entry.FullPath)) f.cacheDelDirectory(string(entry.FullPath))
} else {
f.NotifyUpdateEvent(ctx, entry, nil, shouldDeleteChunks, isFromOtherCluster)
} }
f.NotifyUpdateEvent(ctx, entry, nil, shouldDeleteChunks, isFromOtherCluster)
return nil return nil
} }

9
weed/filer2/filer_notify.go

@ -78,8 +78,13 @@ func (f *Filer) logFlushFunc(startTime, stopTime time.Time, buf []byte) {
// startTime.Second(), startTime.Nanosecond(), // startTime.Second(), startTime.Nanosecond(),
) )
if err := f.appendToFile(targetFile, buf); err != nil {
glog.V(0).Infof("log write failed %s: %v", targetFile, err)
for {
if err := f.appendToFile(targetFile, buf); err != nil {
glog.V(1).Infof("log write failed %s: %v", targetFile, err)
time.Sleep(737 * time.Millisecond)
} else {
break
}
} }
} }

57
weed/filer2/filerstore.go

@ -29,8 +29,13 @@ type FilerStore interface {
Shutdown() Shutdown()
} }
type FilerLocalStore interface {
UpdateOffset(filer string, lastTsNs int64) error
ReadOffset(filer string) (lastTsNs int64, err error)
}
type FilerStoreWrapper struct { type FilerStoreWrapper struct {
actualStore FilerStore
ActualStore FilerStore
} }
func NewFilerStoreWrapper(store FilerStore) *FilerStoreWrapper { func NewFilerStoreWrapper(store FilerStore) *FilerStoreWrapper {
@ -38,48 +43,48 @@ func NewFilerStoreWrapper(store FilerStore) *FilerStoreWrapper {
return innerStore return innerStore
} }
return &FilerStoreWrapper{ return &FilerStoreWrapper{
actualStore: store,
ActualStore: store,
} }
} }
func (fsw *FilerStoreWrapper) GetName() string { func (fsw *FilerStoreWrapper) GetName() string {
return fsw.actualStore.GetName()
return fsw.ActualStore.GetName()
} }
func (fsw *FilerStoreWrapper) Initialize(configuration util.Configuration, prefix string) error { func (fsw *FilerStoreWrapper) Initialize(configuration util.Configuration, prefix string) error {
return fsw.actualStore.Initialize(configuration, prefix)
return fsw.ActualStore.Initialize(configuration, prefix)
} }
func (fsw *FilerStoreWrapper) InsertEntry(ctx context.Context, entry *Entry) error { func (fsw *FilerStoreWrapper) InsertEntry(ctx context.Context, entry *Entry) error {
stats.FilerStoreCounter.WithLabelValues(fsw.actualStore.GetName(), "insert").Inc()
stats.FilerStoreCounter.WithLabelValues(fsw.ActualStore.GetName(), "insert").Inc()
start := time.Now() start := time.Now()
defer func() { defer func() {
stats.FilerStoreHistogram.WithLabelValues(fsw.actualStore.GetName(), "insert").Observe(time.Since(start).Seconds())
stats.FilerStoreHistogram.WithLabelValues(fsw.ActualStore.GetName(), "insert").Observe(time.Since(start).Seconds())
}() }()
filer_pb.BeforeEntrySerialization(entry.Chunks) filer_pb.BeforeEntrySerialization(entry.Chunks)
return fsw.actualStore.InsertEntry(ctx, entry)
return fsw.ActualStore.InsertEntry(ctx, entry)
} }
func (fsw *FilerStoreWrapper) UpdateEntry(ctx context.Context, entry *Entry) error { func (fsw *FilerStoreWrapper) UpdateEntry(ctx context.Context, entry *Entry) error {
stats.FilerStoreCounter.WithLabelValues(fsw.actualStore.GetName(), "update").Inc()
stats.FilerStoreCounter.WithLabelValues(fsw.ActualStore.GetName(), "update").Inc()
start := time.Now() start := time.Now()
defer func() { defer func() {
stats.FilerStoreHistogram.WithLabelValues(fsw.actualStore.GetName(), "update").Observe(time.Since(start).Seconds())
stats.FilerStoreHistogram.WithLabelValues(fsw.ActualStore.GetName(), "update").Observe(time.Since(start).Seconds())
}() }()
filer_pb.BeforeEntrySerialization(entry.Chunks) filer_pb.BeforeEntrySerialization(entry.Chunks)
return fsw.actualStore.UpdateEntry(ctx, entry)
return fsw.ActualStore.UpdateEntry(ctx, entry)
} }
func (fsw *FilerStoreWrapper) FindEntry(ctx context.Context, fp util.FullPath) (entry *Entry, err error) { func (fsw *FilerStoreWrapper) FindEntry(ctx context.Context, fp util.FullPath) (entry *Entry, err error) {
stats.FilerStoreCounter.WithLabelValues(fsw.actualStore.GetName(), "find").Inc()
stats.FilerStoreCounter.WithLabelValues(fsw.ActualStore.GetName(), "find").Inc()
start := time.Now() start := time.Now()
defer func() { defer func() {
stats.FilerStoreHistogram.WithLabelValues(fsw.actualStore.GetName(), "find").Observe(time.Since(start).Seconds())
stats.FilerStoreHistogram.WithLabelValues(fsw.ActualStore.GetName(), "find").Observe(time.Since(start).Seconds())
}() }()
entry, err = fsw.actualStore.FindEntry(ctx, fp)
entry, err = fsw.ActualStore.FindEntry(ctx, fp)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -88,33 +93,33 @@ func (fsw *FilerStoreWrapper) FindEntry(ctx context.Context, fp util.FullPath) (
} }
func (fsw *FilerStoreWrapper) DeleteEntry(ctx context.Context, fp util.FullPath) (err error) { func (fsw *FilerStoreWrapper) DeleteEntry(ctx context.Context, fp util.FullPath) (err error) {
stats.FilerStoreCounter.WithLabelValues(fsw.actualStore.GetName(), "delete").Inc()
stats.FilerStoreCounter.WithLabelValues(fsw.ActualStore.GetName(), "delete").Inc()
start := time.Now() start := time.Now()
defer func() { defer func() {
stats.FilerStoreHistogram.WithLabelValues(fsw.actualStore.GetName(), "delete").Observe(time.Since(start).Seconds())
stats.FilerStoreHistogram.WithLabelValues(fsw.ActualStore.GetName(), "delete").Observe(time.Since(start).Seconds())
}() }()
return fsw.actualStore.DeleteEntry(ctx, fp)
return fsw.ActualStore.DeleteEntry(ctx, fp)
} }
func (fsw *FilerStoreWrapper) DeleteFolderChildren(ctx context.Context, fp util.FullPath) (err error) { func (fsw *FilerStoreWrapper) DeleteFolderChildren(ctx context.Context, fp util.FullPath) (err error) {
stats.FilerStoreCounter.WithLabelValues(fsw.actualStore.GetName(), "deleteFolderChildren").Inc()
stats.FilerStoreCounter.WithLabelValues(fsw.ActualStore.GetName(), "deleteFolderChildren").Inc()
start := time.Now() start := time.Now()
defer func() { defer func() {
stats.FilerStoreHistogram.WithLabelValues(fsw.actualStore.GetName(), "deleteFolderChildren").Observe(time.Since(start).Seconds())
stats.FilerStoreHistogram.WithLabelValues(fsw.ActualStore.GetName(), "deleteFolderChildren").Observe(time.Since(start).Seconds())
}() }()
return fsw.actualStore.DeleteFolderChildren(ctx, fp)
return fsw.ActualStore.DeleteFolderChildren(ctx, fp)
} }
func (fsw *FilerStoreWrapper) ListDirectoryEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int) ([]*Entry, error) { func (fsw *FilerStoreWrapper) ListDirectoryEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int) ([]*Entry, error) {
stats.FilerStoreCounter.WithLabelValues(fsw.actualStore.GetName(), "list").Inc()
stats.FilerStoreCounter.WithLabelValues(fsw.ActualStore.GetName(), "list").Inc()
start := time.Now() start := time.Now()
defer func() { defer func() {
stats.FilerStoreHistogram.WithLabelValues(fsw.actualStore.GetName(), "list").Observe(time.Since(start).Seconds())
stats.FilerStoreHistogram.WithLabelValues(fsw.ActualStore.GetName(), "list").Observe(time.Since(start).Seconds())
}() }()
entries, err := fsw.actualStore.ListDirectoryEntries(ctx, dirPath, startFileName, includeStartFile, limit)
entries, err := fsw.ActualStore.ListDirectoryEntries(ctx, dirPath, startFileName, includeStartFile, limit)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -125,17 +130,17 @@ func (fsw *FilerStoreWrapper) ListDirectoryEntries(ctx context.Context, dirPath
} }
func (fsw *FilerStoreWrapper) BeginTransaction(ctx context.Context) (context.Context, error) { func (fsw *FilerStoreWrapper) BeginTransaction(ctx context.Context) (context.Context, error) {
return fsw.actualStore.BeginTransaction(ctx)
return fsw.ActualStore.BeginTransaction(ctx)
} }
func (fsw *FilerStoreWrapper) CommitTransaction(ctx context.Context) error { func (fsw *FilerStoreWrapper) CommitTransaction(ctx context.Context) error {
return fsw.actualStore.CommitTransaction(ctx)
return fsw.ActualStore.CommitTransaction(ctx)
} }
func (fsw *FilerStoreWrapper) RollbackTransaction(ctx context.Context) error { func (fsw *FilerStoreWrapper) RollbackTransaction(ctx context.Context) error {
return fsw.actualStore.RollbackTransaction(ctx)
return fsw.ActualStore.RollbackTransaction(ctx)
} }
func (fsw *FilerStoreWrapper) Shutdown() { func (fsw *FilerStoreWrapper) Shutdown() {
fsw.actualStore.Shutdown()
fsw.ActualStore.Shutdown()
} }

43
weed/filer2/leveldb2/leveldb2_local_store.go

@ -0,0 +1,43 @@
package leveldb
import (
"fmt"
"github.com/chrislusf/seaweedfs/weed/filer2"
"github.com/chrislusf/seaweedfs/weed/util"
)
var (
_ = filer2.FilerLocalStore(&LevelDB2Store{})
)
func (store *LevelDB2Store) UpdateOffset(filer string, lastTsNs int64) error {
value := make([]byte, 8)
util.Uint64toBytes(value, uint64(lastTsNs))
err := store.dbs[0].Put([]byte("meta"+filer), value, nil)
if err != nil {
return fmt.Errorf("UpdateOffset %s : %v", filer, err)
}
println("UpdateOffset", filer, "lastTsNs", lastTsNs)
return nil
}
func (store *LevelDB2Store) ReadOffset(filer string) (lastTsNs int64, err error) {
value, err := store.dbs[0].Get([]byte("meta"+filer), nil)
if err != nil {
return 0, fmt.Errorf("ReadOffset %s : %v", filer, err)
}
lastTsNs = int64(util.BytesToUint64(value))
println("ReadOffset", filer, "lastTsNs", lastTsNs)
return
}

46
weed/filer2/meta_aggregator.go

@ -37,13 +37,48 @@ func NewMetaAggregator(filers []string, grpcDialOption grpc.DialOption) *MetaAgg
return t return t
} }
func (ma *MetaAggregator) StartLoopSubscribe(lastTsNs int64) {
func (ma *MetaAggregator) StartLoopSubscribe(f *Filer, self string) {
for _, filer := range ma.filers { for _, filer := range ma.filers {
go ma.subscribeToOneFiler(filer, lastTsNs)
go ma.subscribeToOneFiler(f, self, filer)
} }
} }
func (ma *MetaAggregator) subscribeToOneFiler(filer string, lastTsNs int64) {
func (ma *MetaAggregator) subscribeToOneFiler(f *Filer, self string, filer string) {
var maybeReplicateMetadataChange func(*filer_pb.SubscribeMetadataResponse)
lastPersistTime := time.Now()
changesSinceLastPersist := 0
lastTsNs := int64(0)
MaxChangeLimit := 100
if localStore, ok := f.Store.ActualStore.(FilerLocalStore); ok {
if self != filer {
if prevTsNs, err := localStore.ReadOffset(filer); err == nil {
lastTsNs = prevTsNs
}
glog.V(0).Infof("follow filer: %v, last %v (%d)", filer, time.Unix(0, lastTsNs), lastTsNs)
maybeReplicateMetadataChange = func(event *filer_pb.SubscribeMetadataResponse) {
if err := Replay(f.Store.ActualStore, event); err != nil {
glog.Errorf("failed to reply metadata change from %v: %v", filer, err)
return
}
changesSinceLastPersist++
if changesSinceLastPersist >= MaxChangeLimit || lastPersistTime.Add(time.Minute).Before(time.Now()) {
if err := localStore.UpdateOffset(filer, event.TsNs); err == nil {
lastPersistTime = time.Now()
changesSinceLastPersist = 0
} else {
glog.V(0).Infof("failed to update offset for %v: %v", filer, err)
}
}
}
} else {
glog.V(0).Infof("skipping following self: %v", self)
}
}
processEventFn := func(event *filer_pb.SubscribeMetadataResponse) error { processEventFn := func(event *filer_pb.SubscribeMetadataResponse) error {
data, err := proto.Marshal(event) data, err := proto.Marshal(event)
@ -54,13 +89,16 @@ func (ma *MetaAggregator) subscribeToOneFiler(filer string, lastTsNs int64) {
dir := event.Directory dir := event.Directory
// println("received meta change", dir, "size", len(data)) // println("received meta change", dir, "size", len(data))
ma.MetaLogBuffer.AddToBuffer([]byte(dir), data) ma.MetaLogBuffer.AddToBuffer([]byte(dir), data)
if maybeReplicateMetadataChange != nil {
maybeReplicateMetadataChange(event)
}
return nil return nil
} }
for { for {
err := pb.WithFilerClient(filer, ma.grpcDialOption, func(client filer_pb.SeaweedFilerClient) error { err := pb.WithFilerClient(filer, ma.grpcDialOption, func(client filer_pb.SeaweedFilerClient) error {
stream, err := client.SubscribeLocalMetadata(context.Background(), &filer_pb.SubscribeMetadataRequest{ stream, err := client.SubscribeLocalMetadata(context.Background(), &filer_pb.SubscribeMetadataRequest{
ClientName: "filer",
ClientName: "filer:" + self,
PathPrefix: "/", PathPrefix: "/",
SinceNs: lastTsNs, SinceNs: lastTsNs,
}) })

37
weed/filer2/meta_replay.go

@ -0,0 +1,37 @@
package filer2
import (
"context"
"github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
"github.com/chrislusf/seaweedfs/weed/util"
)
func Replay(filerStore FilerStore, resp *filer_pb.SubscribeMetadataResponse) error {
message := resp.EventNotification
var oldPath util.FullPath
var newEntry *Entry
if message.OldEntry != nil {
oldPath = util.NewFullPath(resp.Directory, message.OldEntry.Name)
glog.V(4).Infof("deleting %v", oldPath)
if err := filerStore.DeleteEntry(context.Background(), oldPath); err != nil {
return err
}
}
if message.NewEntry != nil {
dir := resp.Directory
if message.NewParentPath != "" {
dir = message.NewParentPath
}
key := util.NewFullPath(dir, message.NewEntry.Name)
glog.V(4).Infof("creating %v", key)
newEntry = FromPbEntry(dir, message.NewEntry)
if err := filerStore.InsertEntry(context.Background(), newEntry); err != nil {
return err
}
}
return nil
}

2
weed/filesys/meta_cache/meta_cache.go

@ -60,7 +60,7 @@ func (mc *MetaCache) AtomicUpdateEntry(ctx context.Context, oldPath util.FullPat
return err return err
} }
} }
}else{
} else {
// println("unknown old directory:", oldDir) // println("unknown old directory:", oldDir)
} }

12
weed/filesys/meta_cache/meta_cache_init.go

@ -10,18 +10,6 @@ import (
"github.com/chrislusf/seaweedfs/weed/util" "github.com/chrislusf/seaweedfs/weed/util"
) )
func InitMetaCache(mc *MetaCache, client filer_pb.FilerClient, path string) error {
return nil
glog.V(0).Infof("synchronizing meta data ...")
filer_pb.TraverseBfs(client, util.FullPath(path), func(parentPath util.FullPath, pbEntry *filer_pb.Entry) {
entry := filer2.FromPbEntry(string(parentPath), pbEntry)
if err := mc.InsertEntry(context.Background(), entry); err != nil {
glog.V(0).Infof("read %s: %v", entry.FullPath, err)
}
})
return nil
}
func EnsureVisited(mc *MetaCache, client filer_pb.FilerClient, dirPath util.FullPath) { func EnsureVisited(mc *MetaCache, client filer_pb.FilerClient, dirPath util.FullPath) {
mc.visitedBoundary.EnsureVisited(dirPath, func(path util.FullPath) (childDirectories []string, err error) { mc.visitedBoundary.EnsureVisited(dirPath, func(path util.FullPath) (childDirectories []string, err error) {

4
weed/filesys/meta_cache/meta_cache_subscribe.go

@ -56,13 +56,13 @@ func SubscribeMetaEvents(mc *MetaCache, client filer_pb.FilerClient, dir string,
} }
if err := processEventFn(resp); err != nil { if err := processEventFn(resp); err != nil {
return fmt.Errorf("process %v: %v", resp, err)
glog.Fatalf("process %v: %v", resp, err)
} }
lastTsNs = resp.TsNs lastTsNs = resp.TsNs
} }
}) })
if err != nil { if err != nil {
glog.V(0).Infof("subscribing filer meta change: %v", err)
glog.Errorf("subscribing filer meta change: %v", err)
time.Sleep(time.Second) time.Sleep(time.Second)
} }
} }

2
weed/filesys/unimplemented.go

@ -10,11 +10,13 @@ import (
// https://github.com/bazil/fuse/issues/130 // https://github.com/bazil/fuse/issues/130
var _ = fs.NodeAccesser(&Dir{}) var _ = fs.NodeAccesser(&Dir{})
func (dir *Dir) Access(ctx context.Context, req *fuse.AccessRequest) error { func (dir *Dir) Access(ctx context.Context, req *fuse.AccessRequest) error {
return fuse.ENOSYS return fuse.ENOSYS
} }
var _ = fs.NodeAccesser(&File{}) var _ = fs.NodeAccesser(&File{})
func (file *File) Access(ctx context.Context, req *fuse.AccessRequest) error { func (file *File) Access(ctx context.Context, req *fuse.AccessRequest) error {
return fuse.ENOSYS return fuse.ENOSYS
} }

20
weed/filesys/wfs.go

@ -53,7 +53,7 @@ var _ = fs.FS(&WFS{})
var _ = fs.FSStatfser(&WFS{}) var _ = fs.FSStatfser(&WFS{})
type WFS struct { type WFS struct {
option *Option
option *Option
// contains all open handles, protected by handlesLock // contains all open handles, protected by handlesLock
handlesLock sync.Mutex handlesLock sync.Mutex
@ -76,15 +76,15 @@ type statsCache struct {
func NewSeaweedFileSystem(option *Option) *WFS { func NewSeaweedFileSystem(option *Option) *WFS {
wfs := &WFS{ wfs := &WFS{
option: option,
handles: make(map[uint64]*FileHandle),
option: option,
handles: make(map[uint64]*FileHandle),
bufPool: sync.Pool{ bufPool: sync.Pool{
New: func() interface{} { New: func() interface{} {
return make([]byte, option.ChunkSizeLimit) return make([]byte, option.ChunkSizeLimit)
}, },
}, },
} }
cacheUniqueId := util.Md5([]byte(option.FilerGrpcAddress))[0:4]
cacheUniqueId := util.Md5([]byte(option.FilerGrpcAddress + option.FilerMountRootPath + util.Version()))[0:4]
cacheDir := path.Join(option.CacheDir, cacheUniqueId) cacheDir := path.Join(option.CacheDir, cacheUniqueId)
if option.CacheSizeMB > 0 { if option.CacheSizeMB > 0 {
os.MkdirAll(cacheDir, 0755) os.MkdirAll(cacheDir, 0755)
@ -96,14 +96,10 @@ func NewSeaweedFileSystem(option *Option) *WFS {
wfs.metaCache = meta_cache.NewMetaCache(path.Join(cacheDir, "meta")) wfs.metaCache = meta_cache.NewMetaCache(path.Join(cacheDir, "meta"))
startTime := time.Now() startTime := time.Now()
if err := meta_cache.InitMetaCache(wfs.metaCache, wfs, wfs.option.FilerMountRootPath); err != nil {
glog.V(0).Infof("failed to init meta cache: %v", err)
} else {
go meta_cache.SubscribeMetaEvents(wfs.metaCache, wfs, wfs.option.FilerMountRootPath, startTime.UnixNano())
grace.OnInterrupt(func() {
wfs.metaCache.Shutdown()
})
}
go meta_cache.SubscribeMetaEvents(wfs.metaCache, wfs, wfs.option.FilerMountRootPath, startTime.UnixNano())
grace.OnInterrupt(func() {
wfs.metaCache.Shutdown()
})
wfs.root = &Dir{name: wfs.option.FilerMountRootPath, wfs: wfs} wfs.root = &Dir{name: wfs.option.FilerMountRootPath, wfs: wfs}
wfs.fsNodeCache = newFsCache(wfs.root) wfs.fsNodeCache = newFsCache(wfs.root)

5
weed/operation/needle_parse_test.go

@ -18,7 +18,7 @@ type MockClient struct {
} }
func (m *MockClient) Do(req *http.Request) (*http.Response, error) { func (m *MockClient) Do(req *http.Request) (*http.Response, error) {
n, originalSize, err := needle.CreateNeedleFromRequest(req, 1024*1024)
n, originalSize, err := needle.CreateNeedleFromRequest(req, false, 1024*1024)
if m.needleHandling != nil { if m.needleHandling != nil {
m.needleHandling(n, originalSize, err) m.needleHandling(n, originalSize, err)
} }
@ -101,7 +101,6 @@ func TestCreateNeedleFromRequest(t *testing.T) {
} }
var textContent = `Redistribution and use in source and binary forms, with or without var textContent = `Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are modification, are permitted provided that the following conditions are
met: met:
@ -127,4 +126,4 @@ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
`
`

8
weed/operation/upload_content.go

@ -205,8 +205,8 @@ func upload_content(uploadUrl string, fillBufferFunction func(w io.Writer) error
req, postErr := http.NewRequest("POST", uploadUrl, body_buf) req, postErr := http.NewRequest("POST", uploadUrl, body_buf)
if postErr != nil { if postErr != nil {
glog.V(0).Infoln("failing to upload to", uploadUrl, postErr.Error())
return nil, postErr
glog.V(1).Infof("failing to upload to %s: %v", uploadUrl, postErr)
return nil, fmt.Errorf("failing to upload to %s: %v", uploadUrl, postErr)
} }
req.Header.Set("Content-Type", content_type) req.Header.Set("Content-Type", content_type)
for k, v := range pairMap { for k, v := range pairMap {
@ -217,8 +217,8 @@ func upload_content(uploadUrl string, fillBufferFunction func(w io.Writer) error
} }
resp, post_err := HttpClient.Do(req) resp, post_err := HttpClient.Do(req)
if post_err != nil { if post_err != nil {
glog.V(0).Infoln("failing to upload to", uploadUrl, post_err.Error())
return nil, post_err
glog.V(1).Infof("failing to upload to %v: %v", uploadUrl, post_err)
return nil, fmt.Errorf("failing to upload to %v: %v", uploadUrl, post_err)
} }
defer resp.Body.Close() defer resp.Body.Close()

5
weed/pb/filer.proto

@ -115,6 +115,11 @@ message FileChunk {
FileId source_fid = 8; FileId source_fid = 8;
bytes cipher_key = 9; bytes cipher_key = 9;
bool is_compressed = 10; bool is_compressed = 10;
bool is_chunk_manifest = 11; // content is a list of FileChunks
}
message FileChunkManifest {
repeated FileChunk chunks = 1;
} }
message FileId { message FileId {

1162
weed/pb/filer_pb/filer.pb.go
File diff suppressed because it is too large
View File

8
weed/s3api/auth_credentials.go

@ -91,7 +91,13 @@ func (iam *IdentityAccessManagement) loadS3ApiConfiguration(fileName string) err
return nil return nil
} }
func (iam *IdentityAccessManagement) isEnabled() bool {
return len(iam.identities) > 0
}
func (iam *IdentityAccessManagement) lookupByAccessKey(accessKey string) (identity *Identity, cred *Credential, found bool) { func (iam *IdentityAccessManagement) lookupByAccessKey(accessKey string) (identity *Identity, cred *Credential, found bool) {
for _, ident := range iam.identities { for _, ident := range iam.identities {
for _, cred := range ident.Credentials { for _, cred := range ident.Credentials {
if cred.AccessKey == accessKey { if cred.AccessKey == accessKey {
@ -104,7 +110,7 @@ func (iam *IdentityAccessManagement) lookupByAccessKey(accessKey string) (identi
func (iam *IdentityAccessManagement) Auth(f http.HandlerFunc, action Action) http.HandlerFunc { func (iam *IdentityAccessManagement) Auth(f http.HandlerFunc, action Action) http.HandlerFunc {
if len(iam.identities) == 0 {
if iam.isEnabled() {
return f return f
} }

4
weed/server/common.go

@ -274,7 +274,7 @@ func processRangeRequest(r *http.Request, w http.ResponseWriter, totalSize int64
ra := ranges[0] ra := ranges[0]
w.Header().Set("Content-Length", strconv.FormatInt(ra.length, 10)) w.Header().Set("Content-Length", strconv.FormatInt(ra.length, 10))
w.Header().Set("Content-Range", ra.contentRange(totalSize)) w.Header().Set("Content-Range", ra.contentRange(totalSize))
w.WriteHeader(http.StatusPartialContent)
// w.WriteHeader(http.StatusPartialContent)
err = writeFn(w, ra.start, ra.length) err = writeFn(w, ra.start, ra.length)
if err != nil { if err != nil {
@ -315,7 +315,7 @@ func processRangeRequest(r *http.Request, w http.ResponseWriter, totalSize int64
if w.Header().Get("Content-Encoding") == "" { if w.Header().Get("Content-Encoding") == "" {
w.Header().Set("Content-Length", strconv.FormatInt(sendSize, 10)) w.Header().Set("Content-Length", strconv.FormatInt(sendSize, 10))
} }
w.WriteHeader(http.StatusPartialContent)
// w.WriteHeader(http.StatusPartialContent)
if _, err := io.CopyN(w, sendContent, sendSize); err != nil { if _, err := io.CopyN(w, sendContent, sendSize); err != nil {
http.Error(w, "Internal Error", http.StatusInternalServerError) http.Error(w, "Internal Error", http.StatusInternalServerError)
return return

23
weed/server/filer_grpc_server_sub_meta.go

@ -37,10 +37,10 @@ func (fs *FilerServer) SubscribeMetadata(req *filer_pb.SubscribeMetadataRequest,
lastReadTime = time.Unix(0, processedTsNs) lastReadTime = time.Unix(0, processedTsNs)
} }
err = fs.metaAggregator.MetaLogBuffer.LoopProcessLogData(lastReadTime, func() bool {
fs.metaAggregator.ListenersLock.Lock()
fs.metaAggregator.ListenersCond.Wait()
fs.metaAggregator.ListenersLock.Unlock()
err = fs.filer.MetaAggregator.MetaLogBuffer.LoopProcessLogData(lastReadTime, func() bool {
fs.filer.MetaAggregator.ListenersLock.Lock()
fs.filer.MetaAggregator.ListenersCond.Wait()
fs.filer.MetaAggregator.ListenersLock.Unlock()
return true return true
}, eachLogEntryFn) }, eachLogEntryFn)
@ -63,6 +63,20 @@ func (fs *FilerServer) SubscribeLocalMetadata(req *filer_pb.SubscribeMetadataReq
eachLogEntryFn := eachLogEntryFn(eachEventNotificationFn) eachLogEntryFn := eachLogEntryFn(eachEventNotificationFn)
if _, ok := fs.filer.Store.ActualStore.(filer2.FilerLocalStore); ok {
// println("reading from persisted logs ...")
processedTsNs, err := fs.filer.ReadPersistedLogBuffer(lastReadTime, eachLogEntryFn)
if err != nil {
return fmt.Errorf("reading from persisted logs: %v", err)
}
if processedTsNs != 0 {
lastReadTime = time.Unix(0, processedTsNs)
}
glog.V(0).Infof("after local log reads, %v local subscribe %s from %+v", clientName, req.PathPrefix, lastReadTime)
}
// println("reading from in memory logs ...")
err := fs.filer.LocalMetaLogBuffer.LoopProcessLogData(lastReadTime, func() bool { err := fs.filer.LocalMetaLogBuffer.LoopProcessLogData(lastReadTime, func() bool {
fs.listenersLock.Lock() fs.listenersLock.Lock()
fs.listenersCond.Wait() fs.listenersCond.Wait()
@ -117,6 +131,7 @@ func eachEventNotificationFn(req *filer_pb.SubscribeMetadataRequest, stream file
EventNotification: eventNotification, EventNotification: eventNotification,
TsNs: tsNs, TsNs: tsNs,
} }
// println("sending", dirPath, entryName)
if err := stream.Send(message); err != nil { if err := stream.Send(message); err != nil {
glog.V(0).Infof("=> client %v: %+v", clientName, err) glog.V(0).Infof("=> client %v: %+v", clientName, err)
return err return err

12
weed/server/filer_server.go

@ -5,7 +5,6 @@ import (
"fmt" "fmt"
"net/http" "net/http"
"os" "os"
"strings"
"sync" "sync"
"time" "time"
@ -60,7 +59,6 @@ type FilerServer struct {
option *FilerOption option *FilerOption
secret security.SigningKey secret security.SigningKey
filer *filer2.Filer filer *filer2.Filer
metaAggregator *filer2.MetaAggregator
grpcDialOption grpc.DialOption grpcDialOption grpc.DialOption
// notifying clients // notifying clients
@ -121,15 +119,7 @@ func NewFilerServer(defaultMux, readonlyMux *http.ServeMux, option *FilerOption)
readonlyMux.HandleFunc("/", fs.readonlyFilerHandler) readonlyMux.HandleFunc("/", fs.readonlyFilerHandler)
} }
// set peers
if strings.HasPrefix(fs.filer.GetStore().GetName(), "leveldb") && len(option.Filers) > 0 {
glog.Fatalf("filers using separate leveldb stores should not configure %d peers %+v", len(option.Filers), option.Filers)
}
if len(option.Filers) == 0 {
option.Filers = append(option.Filers, fmt.Sprintf("%s:%d", option.Host, option.Port))
}
fs.metaAggregator = filer2.NewMetaAggregator(option.Filers, fs.grpcDialOption)
fs.metaAggregator.StartLoopSubscribe(time.Now().UnixNano())
fs.filer.AggregateFromPeers(fmt.Sprintf("%s:%d", option.Host, option.Port), option.Filers)
fs.filer.LoadBuckets() fs.filer.LoadBuckets()

12
weed/server/master_server.go

@ -32,11 +32,11 @@ const (
) )
type MasterOption struct { type MasterOption struct {
Host string
Port int
MetaFolder string
VolumeSizeLimitMB uint
VolumePreallocate bool
Host string
Port int
MetaFolder string
VolumeSizeLimitMB uint
VolumePreallocate bool
// PulseSeconds int // PulseSeconds int
DefaultReplicaPlacement string DefaultReplicaPlacement string
GarbageThreshold float64 GarbageThreshold float64
@ -66,7 +66,7 @@ type MasterServer struct {
MasterClient *wdclient.MasterClient MasterClient *wdclient.MasterClient
adminLocks *AdminLocks
adminLocks *AdminLocks
} }
func NewMasterServer(r *mux.Router, option *MasterOption, peers []string) *MasterServer { func NewMasterServer(r *mux.Router, option *MasterOption, peers []string) *MasterServer {

2
weed/storage/erasure_coding/ec_decoder.go

@ -119,7 +119,7 @@ func iterateEcxFile(baseFileName string, processNeedleFn func(key types.NeedleId
} }
func iterateEcjFile(baseFileName string, processNeedleFn func(key types.NeedleId) error) error { func iterateEcjFile(baseFileName string, processNeedleFn func(key types.NeedleId) error) error {
if !util.FileExists(baseFileName+".ecj") {
if !util.FileExists(baseFileName + ".ecj") {
return nil return nil
} }
ecjFile, openErr := os.OpenFile(baseFileName+".ecj", os.O_RDONLY, 0644) ecjFile, openErr := os.OpenFile(baseFileName+".ecj", os.O_RDONLY, 0644)

4
weed/storage/erasure_coding/ec_volume_test.go

@ -24,8 +24,8 @@ func TestPositioning(t *testing.T) {
tests := []struct { tests := []struct {
needleId string needleId string
offset int64
size int
offset int64
size int
}{ }{
{needleId: "0f0edb92", offset: 31300679656, size: 1167}, {needleId: "0f0edb92", offset: 31300679656, size: 1167},
{needleId: "0ef7d7f8", offset: 11513014944, size: 66044}, {needleId: "0ef7d7f8", offset: 11513014944, size: 66044},

2
weed/storage/needle_map/compact_map_perf_test.go

@ -32,7 +32,7 @@ func TestMemoryUsage(t *testing.T) {
startTime := time.Now() startTime := time.Now()
for i := 0; i < 10; i++ { for i := 0; i < 10; i++ {
indexFile, ie := os.OpenFile("../../../test/sample.idx", os.O_RDWR|os.O_RDONLY, 0644)
indexFile, ie := os.OpenFile("../../../test/data/sample.idx", os.O_RDWR|os.O_RDONLY, 0644)
if ie != nil { if ie != nil {
log.Fatalln(ie) log.Fatalln(ie)
} }

9
weed/storage/store.go

@ -16,6 +16,7 @@ import (
"github.com/chrislusf/seaweedfs/weed/storage/needle" "github.com/chrislusf/seaweedfs/weed/storage/needle"
"github.com/chrislusf/seaweedfs/weed/storage/super_block" "github.com/chrislusf/seaweedfs/weed/storage/super_block"
. "github.com/chrislusf/seaweedfs/weed/storage/types" . "github.com/chrislusf/seaweedfs/weed/storage/types"
"github.com/chrislusf/seaweedfs/weed/util"
) )
const ( const (
@ -28,13 +29,13 @@ const (
type Store struct { type Store struct {
MasterAddress string MasterAddress string
grpcDialOption grpc.DialOption grpcDialOption grpc.DialOption
volumeSizeLimit uint64 //read from the master
volumeSizeLimit uint64 // read from the master
Ip string Ip string
Port int Port int
PublicUrl string PublicUrl string
Locations []*DiskLocation Locations []*DiskLocation
dataCenter string //optional informaton, overwriting master setting if exists
rack string //optional information, overwriting master setting if exists
dataCenter string // optional informaton, overwriting master setting if exists
rack string // optional information, overwriting master setting if exists
connected bool connected bool
NeedleMapType NeedleMapType NeedleMapType NeedleMapType
NewVolumesChan chan master_pb.VolumeShortInformationMessage NewVolumesChan chan master_pb.VolumeShortInformationMessage
@ -52,7 +53,7 @@ func NewStore(grpcDialOption grpc.DialOption, port int, ip, publicUrl string, di
s = &Store{grpcDialOption: grpcDialOption, Port: port, Ip: ip, PublicUrl: publicUrl, NeedleMapType: needleMapKind} s = &Store{grpcDialOption: grpcDialOption, Port: port, Ip: ip, PublicUrl: publicUrl, NeedleMapType: needleMapKind}
s.Locations = make([]*DiskLocation, 0) s.Locations = make([]*DiskLocation, 0)
for i := 0; i < len(dirnames); i++ { for i := 0; i < len(dirnames); i++ {
location := NewDiskLocation(dirnames[i], maxVolumeCounts[i], minFreeSpacePercents[i])
location := NewDiskLocation(util.ResolvePath(dirnames[i]), maxVolumeCounts[i], minFreeSpacePercents[i])
location.loadExistingVolumes(needleMapKind) location.loadExistingVolumes(needleMapKind)
s.Locations = append(s.Locations, location) s.Locations = append(s.Locations, location)
stats.VolumeServerMaxVolumeCounter.Add(float64(maxVolumeCounts[i])) stats.VolumeServerMaxVolumeCounter.Add(float64(maxVolumeCounts[i]))

5
weed/util/bounded_tree/bounded_tree_test.go

@ -9,9 +9,7 @@ import (
"github.com/chrislusf/seaweedfs/weed/util" "github.com/chrislusf/seaweedfs/weed/util"
) )
var ( var (
visitFn = func(path util.FullPath) (childDirectories []string, err error) { visitFn = func(path util.FullPath) (childDirectories []string, err error) {
fmt.Printf(" visit %v ...\n", path) fmt.Printf(" visit %v ...\n", path)
switch path { switch path {
@ -37,14 +35,11 @@ var (
return nil, nil return nil, nil
} }
printMap = func(m map[string]*Node) { printMap = func(m map[string]*Node) {
for k := range m { for k := range m {
println(" >", k) println(" >", k)
} }
} }
) )
func TestBoundedTree(t *testing.T) { func TestBoundedTree(t *testing.T) {

1
weed/util/compression.go

@ -54,6 +54,7 @@ func ungzipData(input []byte) ([]byte, error) {
} }
var decoder, _ = zstd.NewReader(nil) var decoder, _ = zstd.NewReader(nil)
func unzstdData(input []byte) ([]byte, error) { func unzstdData(input []byte) ([]byte, error) {
return decoder.DecodeAll(input, nil) return decoder.DecodeAll(input, nil)
} }

2
weed/util/constants.go

@ -5,7 +5,7 @@ import (
) )
var ( var (
VERSION = fmt.Sprintf("%s %d.%d", sizeLimit, 1, 84)
VERSION = fmt.Sprintf("%s %d.%d", sizeLimit, 1, 85)
COMMIT = "" COMMIT = ""
) )

20
weed/util/file_util.go

@ -3,6 +3,9 @@ package util
import ( import (
"errors" "errors"
"os" "os"
"os/user"
"path/filepath"
"strings"
"time" "time"
"github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/glog"
@ -63,3 +66,20 @@ func CheckFile(filename string) (exists, canRead, canWrite bool, modTime time.Ti
fileSize = fi.Size() fileSize = fi.Size()
return return
} }
func ResolvePath(path string) string {
usr, _ := user.Current()
dir := usr.HomeDir
if path == "~" {
// In case of "~", which won't be caught by the "else if"
path = dir
} else if strings.HasPrefix(path, "~/") {
// Use strings.HasPrefix so we don't match paths like
// "/something/~/something/"
path = filepath.Join(dir, path[2:])
}
return path
}

13
weed/util/log_buffer/log_buffer.go

@ -145,12 +145,15 @@ func (m *LogBuffer) loopInterval() {
func (m *LogBuffer) copyToFlush() *dataToFlush { func (m *LogBuffer) copyToFlush() *dataToFlush {
if m.flushFn != nil && m.pos > 0 {
if m.pos > 0 {
// fmt.Printf("flush buffer %d pos %d empty space %d\n", len(m.buf), m.pos, len(m.buf)-m.pos) // fmt.Printf("flush buffer %d pos %d empty space %d\n", len(m.buf), m.pos, len(m.buf)-m.pos)
d := &dataToFlush{
startTime: m.startTime,
stopTime: m.stopTime,
data: copiedBytes(m.buf[:m.pos]),
var d *dataToFlush
if m.flushFn != nil {
d = &dataToFlush{
startTime: m.startTime,
stopTime: m.stopTime,
data: copiedBytes(m.buf[:m.pos]),
}
} }
// fmt.Printf("flusing [0,%d) with %d entries\n", m.pos, len(m.idx)) // fmt.Printf("flusing [0,%d) with %d entries\n", m.pos, len(m.idx))
m.buf = m.prevBuffers.SealBuffer(m.startTime, m.stopTime, m.buf, m.pos) m.buf = m.prevBuffers.SealBuffer(m.startTime, m.stopTime, m.buf, m.pos)

Loading…
Cancel
Save