Browse Source

hdfs: support read write chunk manifest

pull/1397/head
Chris Lu 4 years ago
parent
commit
1d724ab237
  1. 134
      other/java/client/src/main/java/seaweedfs/client/FileChunkManifest.java
  2. 9
      other/java/client/src/main/java/seaweedfs/client/FilerClient.java
  3. 12
      other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java
  4. 36
      other/java/client/src/main/java/seaweedfs/client/SeaweedWrite.java
  5. 5
      other/java/client/src/test/java/seaweedfs/client/SeaweedReadTest.java
  6. 5
      other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedInputStream.java
  7. 2
      other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedOutputStream.java
  8. 2
      other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedInputStream.java

134
other/java/client/src/main/java/seaweedfs/client/FileChunkManifest.java

@ -0,0 +1,134 @@
package seaweedfs.client;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
public class FileChunkManifest {
private static final Logger LOG = LoggerFactory.getLogger(FileChunkManifest.class);
private static final int mergeFactor = 3;
public static boolean hasChunkManifest(List<FilerProto.FileChunk> chunks) {
for (FilerProto.FileChunk chunk : chunks) {
if (chunk.getIsChunkManifest()) {
return true;
}
}
return false;
}
public static List<FilerProto.FileChunk> resolveChunkManifest(
final FilerGrpcClient filerGrpcClient, List<FilerProto.FileChunk> chunks) throws IOException {
List<FilerProto.FileChunk> dataChunks = new ArrayList<>();
for (FilerProto.FileChunk chunk : chunks) {
if (!chunk.getIsChunkManifest()) {
dataChunks.add(chunk);
continue;
}
// IsChunkManifest
LOG.debug("fetching chunk manifest:{}", chunk);
byte[] data = fetchChunk(filerGrpcClient, chunk);
FilerProto.FileChunkManifest m = FilerProto.FileChunkManifest.newBuilder().mergeFrom(data).build();
List<FilerProto.FileChunk> resolvedChunks = new ArrayList<>();
for (FilerProto.FileChunk t : m.getChunksList()) {
// avoid deprecated chunk.getFileId()
resolvedChunks.add(t.toBuilder().setFileId(FilerClient.toFileId(t.getFid())).build());
}
dataChunks.addAll(resolveChunkManifest(filerGrpcClient, resolvedChunks));
}
return dataChunks;
}
private static byte[] fetchChunk(final FilerGrpcClient filerGrpcClient, FilerProto.FileChunk chunk) throws IOException {
FilerProto.LookupVolumeRequest.Builder lookupRequest = FilerProto.LookupVolumeRequest.newBuilder();
String vid = "" + chunk.getFid().getVolumeId();
lookupRequest.addVolumeIds(vid);
FilerProto.LookupVolumeResponse lookupResponse = filerGrpcClient
.getBlockingStub().lookupVolume(lookupRequest.build());
Map<String, FilerProto.Locations> vid2Locations = lookupResponse.getLocationsMapMap();
FilerProto.Locations locations = vid2Locations.get(vid);
SeaweedRead.ChunkView chunkView = new SeaweedRead.ChunkView(
FilerClient.toFileId(chunk.getFid()), // avoid deprecated chunk.getFileId()
0,
-1,
0,
true,
chunk.getCipherKey().toByteArray(),
chunk.getIsCompressed());
byte[] chunkData = SeaweedRead.chunkCache.getChunk(chunkView.fileId);
if (chunkData == null) {
LOG.debug("doFetchFullChunkData:{}", chunkView);
chunkData = SeaweedRead.doFetchFullChunkData(chunkView, locations);
}
LOG.debug("chunk {} size {}", chunkView.fileId, chunkData.length);
SeaweedRead.chunkCache.setChunk(chunkView.fileId, chunkData);
return chunkData;
}
public static List<FilerProto.FileChunk> maybeManifestize(
final FilerGrpcClient filerGrpcClient, List<FilerProto.FileChunk> inputChunks) throws IOException {
// the return variable
List<FilerProto.FileChunk> chunks = new ArrayList<>();
List<FilerProto.FileChunk> dataChunks = new ArrayList<>();
for (FilerProto.FileChunk chunk : inputChunks) {
if (!chunk.getIsChunkManifest()) {
dataChunks.add(chunk);
} else {
chunks.add(chunk);
}
}
int remaining = dataChunks.size();
for (int i = 0; i + mergeFactor < dataChunks.size(); i += mergeFactor) {
FilerProto.FileChunk chunk = mergeIntoManifest(filerGrpcClient, dataChunks.subList(i, i + mergeFactor));
chunks.add(chunk);
remaining -= mergeFactor;
}
// remaining
for (int i = dataChunks.size() - remaining; i < dataChunks.size(); i++) {
chunks.add(dataChunks.get(i));
}
return chunks;
}
private static FilerProto.FileChunk mergeIntoManifest(final FilerGrpcClient filerGrpcClient, List<FilerProto.FileChunk> dataChunks) throws IOException {
// create and serialize the manifest
FilerProto.FileChunkManifest.Builder m = FilerProto.FileChunkManifest.newBuilder().addAllChunks(dataChunks);
byte[] data = m.build().toByteArray();
long minOffset = Long.MAX_VALUE;
long maxOffset = -1;
for (FilerProto.FileChunk chunk : dataChunks) {
minOffset = Math.min(minOffset, chunk.getOffset());
maxOffset = Math.max(maxOffset, chunk.getSize() + chunk.getOffset());
}
FilerProto.FileChunk.Builder manifestChunk = SeaweedWrite.writeChunk(
filerGrpcClient.getReplication(),
filerGrpcClient,
minOffset,
data, 0, data.length);
manifestChunk.setIsChunkManifest(true);
manifestChunk.setSize(maxOffset - minOffset);
return manifestChunk.build();
}
}

9
other/java/client/src/main/java/seaweedfs/client/FilerClient.java

@ -24,6 +24,10 @@ public class FilerClient {
this.filerGrpcClient = filerGrpcClient; this.filerGrpcClient = filerGrpcClient;
} }
public static String toFileId(FilerProto.FileId fid) {
return String.format("%d,%d%x", fid.getVolumeId(), fid.getFileKey(), fid.getCookie());
}
public boolean mkdirs(String path, int mode) { public boolean mkdirs(String path, int mode) {
String currentUser = System.getProperty("user.name"); String currentUser = System.getProperty("user.name");
return mkdirs(path, mode, 0, 0, currentUser, new String[]{}); return mkdirs(path, mode, 0, 0, currentUser, new String[]{});
@ -209,7 +213,6 @@ public class FilerClient {
} }
} }
public boolean createEntry(String parent, FilerProto.Entry entry) { public boolean createEntry(String parent, FilerProto.Entry entry) {
try { try {
filerGrpcClient.getBlockingStub().createEntry(FilerProto.CreateEntryRequest.newBuilder() filerGrpcClient.getBlockingStub().createEntry(FilerProto.CreateEntryRequest.newBuilder()
@ -279,9 +282,7 @@ public class FilerClient {
entryBuilder.clearChunks(); entryBuilder.clearChunks();
for (FilerProto.FileChunk chunk : entry.getChunksList()) { for (FilerProto.FileChunk chunk : entry.getChunksList()) {
FilerProto.FileChunk.Builder chunkBuilder = chunk.toBuilder(); FilerProto.FileChunk.Builder chunkBuilder = chunk.toBuilder();
FilerProto.FileId fid = chunk.getFid();
fileId = String.format("%d,%d%x", fid.getVolumeId(), fid.getFileKey(), fid.getCookie());
chunkBuilder.setFileId(fileId);
chunkBuilder.setFileId(toFileId(chunk.getFid()));
entryBuilder.addChunks(chunkBuilder); entryBuilder.addChunks(chunkBuilder);
} }
return entryBuilder.build(); return entryBuilder.build();

12
other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java

@ -2,16 +2,12 @@ package seaweedfs.client;
import org.apache.http.HttpEntity; import org.apache.http.HttpEntity;
import org.apache.http.HttpHeaders; import org.apache.http.HttpHeaders;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.util.EntityUtils; import org.apache.http.util.EntityUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
import java.util.*; import java.util.*;
@ -77,7 +73,7 @@ public class SeaweedRead {
return len; return len;
} }
private static byte[] doFetchFullChunkData(ChunkView chunkView, FilerProto.Locations locations) throws IOException {
public static byte[] doFetchFullChunkData(ChunkView chunkView, FilerProto.Locations locations) throws IOException {
HttpGet request = new HttpGet( HttpGet request = new HttpGet(
String.format("http://%s/%s", locations.getLocations(0).getUrl(), chunkView.fileId)); String.format("http://%s/%s", locations.getLocations(0).getUrl(), chunkView.fileId));
@ -138,7 +134,11 @@ public class SeaweedRead {
return views; return views;
} }
public static List<VisibleInterval> nonOverlappingVisibleIntervals(List<FilerProto.FileChunk> chunkList) {
public static List<VisibleInterval> nonOverlappingVisibleIntervals(
final FilerGrpcClient filerGrpcClient, List<FilerProto.FileChunk> chunkList) throws IOException {
chunkList = FileChunkManifest.resolveChunkManifest(filerGrpcClient, chunkList);
FilerProto.FileChunk[] chunks = chunkList.toArray(new FilerProto.FileChunk[0]); FilerProto.FileChunk[] chunks = chunkList.toArray(new FilerProto.FileChunk[0]);
Arrays.sort(chunks, new Comparator<FilerProto.FileChunk>() { Arrays.sort(chunks, new Comparator<FilerProto.FileChunk>() {
@Override @Override

36
other/java/client/src/main/java/seaweedfs/client/SeaweedWrite.java

@ -1,8 +1,6 @@
package seaweedfs.client; package seaweedfs.client;
import com.google.protobuf.ByteString; import com.google.protobuf.ByteString;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost; import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.mime.HttpMultipartMode; import org.apache.http.entity.mime.HttpMultipartMode;
@ -10,10 +8,10 @@ import org.apache.http.entity.mime.MultipartEntityBuilder;
import org.apache.http.util.EntityUtils; import org.apache.http.util.EntityUtils;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.security.SecureRandom; import java.security.SecureRandom;
import java.util.List;
public class SeaweedWrite { public class SeaweedWrite {
@ -25,6 +23,17 @@ public class SeaweedWrite {
final long offset, final long offset,
final byte[] bytes, final byte[] bytes,
final long bytesOffset, final long bytesLength) throws IOException { final long bytesOffset, final long bytesLength) throws IOException {
synchronized (entry) {
entry.addChunks(writeChunk(replication, filerGrpcClient, offset, bytes, bytesOffset, bytesLength));
}
}
public static FilerProto.FileChunk.Builder writeChunk(final String replication,
final FilerGrpcClient filerGrpcClient,
final long offset,
final byte[] bytes,
final long bytesOffset,
final long bytesLength) throws IOException {
FilerProto.AssignVolumeResponse response = filerGrpcClient.getBlockingStub().assignVolume( FilerProto.AssignVolumeResponse response = filerGrpcClient.getBlockingStub().assignVolume(
FilerProto.AssignVolumeRequest.newBuilder() FilerProto.AssignVolumeRequest.newBuilder()
.setCollection(filerGrpcClient.getCollection()) .setCollection(filerGrpcClient.getCollection())
@ -46,25 +55,28 @@ public class SeaweedWrite {
String etag = multipartUpload(targetUrl, auth, bytes, bytesOffset, bytesLength, cipherKey); String etag = multipartUpload(targetUrl, auth, bytes, bytesOffset, bytesLength, cipherKey);
synchronized (entry) {
entry.addChunks(FilerProto.FileChunk.newBuilder()
// cache fileId ~ bytes
SeaweedRead.chunkCache.setChunk(fileId, bytes);
return FilerProto.FileChunk.newBuilder()
.setFileId(fileId) .setFileId(fileId)
.setOffset(offset) .setOffset(offset)
.setSize(bytesLength) .setSize(bytesLength)
.setMtime(System.currentTimeMillis() / 10000L) .setMtime(System.currentTimeMillis() / 10000L)
.setETag(etag) .setETag(etag)
.setCipherKey(cipherKeyString)
);
.setCipherKey(cipherKeyString);
} }
// cache fileId ~ bytes
SeaweedRead.chunkCache.setChunk(fileId, bytes);
public static void writeMeta(final FilerGrpcClient filerGrpcClient,
final String parentDirectory,
final FilerProto.Entry.Builder entry) throws IOException {
}
int chunkSize = entry.getChunksCount();
List<FilerProto.FileChunk> chunks = FileChunkManifest.maybeManifestize(filerGrpcClient, entry.getChunksList());
public static void writeMeta(final FilerGrpcClient filerGrpcClient,
final String parentDirectory, final FilerProto.Entry.Builder entry) {
synchronized (entry) { synchronized (entry) {
entry.clearChunks();
entry.addAllChunks(chunks);
filerGrpcClient.getBlockingStub().createEntry( filerGrpcClient.getBlockingStub().createEntry(
FilerProto.CreateEntryRequest.newBuilder() FilerProto.CreateEntryRequest.newBuilder()
.setDirectory(parentDirectory) .setDirectory(parentDirectory)

5
other/java/client/src/test/java/seaweedfs/client/SeaweedReadTest.java

@ -3,13 +3,14 @@ package seaweedfs.client;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Test; import org.junit.Test;
import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
public class SeaweedReadTest { public class SeaweedReadTest {
@Test @Test
public void testNonOverlappingVisibleIntervals() {
public void testNonOverlappingVisibleIntervals() throws IOException {
List<FilerProto.FileChunk> chunks = new ArrayList<>(); List<FilerProto.FileChunk> chunks = new ArrayList<>();
chunks.add(FilerProto.FileChunk.newBuilder() chunks.add(FilerProto.FileChunk.newBuilder()
.setFileId("aaa") .setFileId("aaa")
@ -24,7 +25,7 @@ public class SeaweedReadTest {
.setMtime(2000) .setMtime(2000)
.build()); .build());
List<SeaweedRead.VisibleInterval> visibleIntervals = SeaweedRead.nonOverlappingVisibleIntervals(chunks);
List<SeaweedRead.VisibleInterval> visibleIntervals = SeaweedRead.nonOverlappingVisibleIntervals(null, chunks);
for (SeaweedRead.VisibleInterval visibleInterval : visibleIntervals) { for (SeaweedRead.VisibleInterval visibleInterval : visibleIntervals) {
System.out.println("visible:" + visibleInterval); System.out.println("visible:" + visibleInterval);
} }

5
other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedInputStream.java

@ -2,7 +2,6 @@ package seaweed.hdfs;
// based on org.apache.hadoop.fs.azurebfs.services.AbfsInputStream // based on org.apache.hadoop.fs.azurebfs.services.AbfsInputStream
import com.google.common.base.Preconditions;
import org.apache.hadoop.fs.FSExceptionMessages; import org.apache.hadoop.fs.FSExceptionMessages;
import org.apache.hadoop.fs.FSInputStream; import org.apache.hadoop.fs.FSInputStream;
import org.apache.hadoop.fs.FileSystem.Statistics; import org.apache.hadoop.fs.FileSystem.Statistics;
@ -37,7 +36,7 @@ public class SeaweedInputStream extends FSInputStream {
final Statistics statistics, final Statistics statistics,
final String path, final String path,
final FilerProto.Entry entry, final FilerProto.Entry entry,
final int bufferSize) {
final int bufferSize) throws IOException {
this.filerGrpcClient = filerGrpcClient; this.filerGrpcClient = filerGrpcClient;
this.statistics = statistics; this.statistics = statistics;
this.path = path; this.path = path;
@ -45,7 +44,7 @@ public class SeaweedInputStream extends FSInputStream {
this.contentLength = SeaweedRead.totalSize(entry.getChunksList()); this.contentLength = SeaweedRead.totalSize(entry.getChunksList());
this.bufferSize = bufferSize; this.bufferSize = bufferSize;
this.visibleIntervalList = SeaweedRead.nonOverlappingVisibleIntervals(entry.getChunksList());
this.visibleIntervalList = SeaweedRead.nonOverlappingVisibleIntervals(filerGrpcClient, entry.getChunksList());
LOG.debug("new path:{} entry:{} visibleIntervalList:{}", path, entry, visibleIntervalList); LOG.debug("new path:{} entry:{} visibleIntervalList:{}", path, entry, visibleIntervalList);

2
other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedOutputStream.java

@ -109,7 +109,7 @@ public class SeaweedOutputStream extends OutputStream {
break; break;
} }
// System.out.println(path + " [" + (outputIndex + currentOffset) + "," + ((outputIndex + currentOffset) + writableBytes) + ")");
// System.out.println(path + " [" + (outputIndex + currentOffset) + "," + ((outputIndex + currentOffset) + writableBytes) + ") " + buffer.capacity());
buffer.put(data, currentOffset, writableBytes); buffer.put(data, currentOffset, writableBytes);
outputIndex += writableBytes; outputIndex += writableBytes;
currentOffset += writableBytes; currentOffset += writableBytes;

2
other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedInputStream.java

@ -45,7 +45,7 @@ public class SeaweedInputStream extends FSInputStream {
this.contentLength = SeaweedRead.totalSize(entry.getChunksList()); this.contentLength = SeaweedRead.totalSize(entry.getChunksList());
this.bufferSize = bufferSize; this.bufferSize = bufferSize;
this.visibleIntervalList = SeaweedRead.nonOverlappingVisibleIntervals(entry.getChunksList());
this.visibleIntervalList = SeaweedRead.nonOverlappingVisibleIntervals(filerGrpcClient, entry.getChunksList());
LOG.debug("new path:{} entry:{} visibleIntervalList:{}", path, entry, visibleIntervalList); LOG.debug("new path:{} entry:{} visibleIntervalList:{}", path, entry, visibleIntervalList);

Loading…
Cancel
Save