Browse Source

Merge pull request #1 from chrislusf/master

Merge upstream
pull/1507/head
ustuzhanin 4 years ago
committed by GitHub
parent
commit
3e0a79ef05
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 5
      .travis.yml
  2. 26
      README.md
  3. 35
      docker/Dockerfile.go_build_large
  4. 12
      go.mod
  5. 30
      go.sum
  6. 2
      k8s/seaweedfs/Chart.yaml
  7. 22
      k8s/seaweedfs/templates/filer-statefulset.yaml
  8. 30
      k8s/seaweedfs/templates/master-statefulset.yaml
  9. 2
      k8s/seaweedfs/templates/s3-deployment.yaml
  10. 4
      k8s/seaweedfs/templates/seaweefs-grafana-dashboard.yaml
  11. 25
      k8s/seaweedfs/templates/volume-statefulset.yaml
  12. 58
      k8s/seaweedfs/values.yaml
  13. 2
      other/java/client/pom.xml
  14. 2
      other/java/client/pom.xml.deploy
  15. 2
      other/java/client/pom_debug.xml
  16. 55
      other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java
  17. 32
      other/java/client/src/main/proto/filer.proto
  18. 176
      other/java/hdfs2/dependency-reduced-pom.xml
  19. 4
      other/java/hdfs2/pom.xml
  20. 29
      other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedFileSystem.java
  21. 11
      other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedFileSystemStore.java
  22. 4
      other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedInputStream.java
  23. 184
      other/java/hdfs3/dependency-reduced-pom.xml
  24. 4
      other/java/hdfs3/pom.xml
  25. 29
      other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedFileSystem.java
  26. 11
      other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedFileSystemStore.java
  27. 4
      other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedInputStream.java
  28. 1856
      other/metrics/grafana_seaweedfs.json
  29. 58
      test/random_access/pom.xml
  30. 753
      test/random_access/src/main/java/seaweedfs/client/btree/BTreePersistentIndexedCache.java
  31. 59
      test/random_access/src/main/java/seaweedfs/client/btree/Block.java
  32. 51
      test/random_access/src/main/java/seaweedfs/client/btree/BlockPayload.java
  33. 75
      test/random_access/src/main/java/seaweedfs/client/btree/BlockPointer.java
  34. 68
      test/random_access/src/main/java/seaweedfs/client/btree/BlockStore.java
  35. 30
      test/random_access/src/main/java/seaweedfs/client/btree/BufferCaster.java
  36. 74
      test/random_access/src/main/java/seaweedfs/client/btree/ByteInput.java
  37. 74
      test/random_access/src/main/java/seaweedfs/client/btree/ByteOutput.java
  38. 129
      test/random_access/src/main/java/seaweedfs/client/btree/CachingBlockStore.java
  39. 22
      test/random_access/src/main/java/seaweedfs/client/btree/CorruptedCacheException.java
  40. 274
      test/random_access/src/main/java/seaweedfs/client/btree/FileBackedBlockStore.java
  41. 283
      test/random_access/src/main/java/seaweedfs/client/btree/FreeListBlockStore.java
  42. 75
      test/random_access/src/main/java/seaweedfs/client/btree/KeyHasher.java
  43. 54
      test/random_access/src/main/java/seaweedfs/client/btree/RandomAccessFileInputStream.java
  44. 48
      test/random_access/src/main/java/seaweedfs/client/btree/RandomAccessFileOutputStream.java
  45. 87
      test/random_access/src/main/java/seaweedfs/client/btree/StateCheckBlockStore.java
  46. 526
      test/random_access/src/main/java/seaweedfs/client/btree/StreamByteBuffer.java
  47. 88
      test/random_access/src/main/java/seaweedfs/client/btree/UncheckedException.java
  48. 36
      test/random_access/src/main/java/seaweedfs/client/btree/UncheckedIOException.java
  49. 133
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/AbstractDecoder.java
  50. 101
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/AbstractEncoder.java
  51. 40
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/AbstractSerializer.java
  52. 79
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/Cast.java
  53. 43
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/ClassLoaderObjectInputStream.java
  54. 140
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/Decoder.java
  55. 73
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/DefaultSerializer.java
  56. 110
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/Encoder.java
  57. 31
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/FlushableEncoder.java
  58. 28
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/ObjectReader.java
  59. 21
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/ObjectWriter.java
  60. 33
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/Serializer.java
  61. 33
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/StatefulSerializer.java
  62. 210
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/kryo/KryoBackedDecoder.java
  63. 134
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/kryo/KryoBackedEncoder.java
  64. 188
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/kryo/StringDeduplicatingKryoBackedDecoder.java
  65. 128
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/kryo/StringDeduplicatingKryoBackedEncoder.java
  66. 51
      test/random_access/src/main/java/seaweedfs/client/btree/serialize/kryo/TypeSafeSerializer.java
  67. 476
      test/random_access/src/test/java/seaweedfs/client/btree/BTreePersistentIndexedCacheTest.java
  68. 143
      test/random_access/src/test/java/seaweedfs/file/MmapFileTest.java
  69. 70
      test/random_access/src/test/java/seaweedfs/file/RandomeAccessFileTest.java
  70. 21
      test/s3/basic/basic_test.go
  71. 175
      test/s3/multipart/aws_upload.go
  72. 10
      unmaintained/diff_volume_servers/diff_volume_servers.go
  73. 4
      unmaintained/fix_dat/fix_dat.go
  74. 73
      unmaintained/s3/presigned_put/presigned_put.go
  75. 83
      unmaintained/see_dat/see_dat_gzip.go
  76. 2
      unmaintained/see_idx/see_idx.go
  77. 4
      unmaintained/see_log_entry/see_log_entry.go
  78. 27
      weed/Makefile
  79. 1
      weed/command/command.go
  80. 14
      weed/command/download.go
  81. 24
      weed/command/export.go
  82. 31
      weed/command/filer.go
  83. 2
      weed/command/filer_copy.go
  84. 337
      weed/command/filer_sync.go
  85. 2
      weed/command/fix.go
  86. 2
      weed/command/master.go
  87. 4
      weed/command/mount.go
  88. 34
      weed/command/mount_std.go
  89. 24
      weed/command/s3.go
  90. 18
      weed/command/scaffold.go
  91. 15
      weed/command/server.go
  92. 73
      weed/command/volume.go
  93. 60
      weed/command/watch.go
  94. 54
      weed/filer/abstract_sql/abstract_sql_store.go
  95. 87
      weed/filer/abstract_sql/abstract_sql_store_kv.go
  96. 0
      weed/filer/cassandra/README.txt
  97. 36
      weed/filer/cassandra/cassandra_store.go
  98. 61
      weed/filer/cassandra/cassandra_store_kv.go
  99. 2
      weed/filer/configuration.go
  100. 338
      weed/filer/elastic/v7/elastic_store.go

5
.travis.yml

@ -1,9 +1,8 @@
sudo: false
language: go
go:
- 1.12.x
- 1.13.x
- 1.14.x
- 1.15.x
before_install:
- export PATH=/home/travis/gopath/bin:$PATH
@ -45,4 +44,4 @@ deploy:
on:
tags: true
repo: chrislusf/seaweedfs
go: 1.14.x
go: 1.15.x

26
README.md

@ -90,7 +90,7 @@ There is only 40 bytes of disk storage overhead for each file's metadata. It is
SeaweedFS started by implementing [Facebook's Haystack design paper](http://www.usenix.org/event/osdi10/tech/full_papers/Beaver.pdf). Also, SeaweedFS implements erasure coding with ideas from [f4: Facebook’s Warm BLOB Storage System](https://www.usenix.org/system/files/conference/osdi14/osdi14-paper-muralidhar.pdf)
On top of the object store, optional [Filer] can support directories and POSIX attributes. Filer is a separate linearly-scalable stateless server with customizable metadata stores, e.g., MySql, Postgres, Mongodb, Redis, Etcd, Cassandra, LevelDB, MemSql, TiDB, TiKV, CockroachDB, etc.
On top of the object store, optional [Filer] can support directories and POSIX attributes. Filer is a separate linearly-scalable stateless server with customizable metadata stores, e.g., MySql, Postgres, Mongodb, Redis, Cassandra, Elastic Search, LevelDB, MemSql, TiDB, Etcd, CockroachDB, etc.
[Back to TOC](#table-of-contents)
@ -112,22 +112,23 @@ On top of the object store, optional [Filer] can support directories and POSIX a
[Back to TOC](#table-of-contents)
## Filer Features ##
* [Filer server][Filer] provide "normal" directories and files via http.
* [Filer server][Filer] provides "normal" directories and files via http.
* [Super Large Files][SuperLargeFiles] stores large or super large files in tens of TB.
* [Mount filer][Mount] to read and write files directly as a local directory via FUSE.
* [Amazon S3 compatible API][AmazonS3API] to access files with S3 tooling.
* [Hadoop Compatible File System][Hadoop] to access files from Hadoop/Spark/Flink/etc jobs.
* [Async Backup To Cloud][BackupToCloud] has extremely fast local access and backups to Amazon S3, Google Cloud Storage, Azure, BackBlaze.
* [WebDAV] access as a mapped drive on Mac and Windows, or from mobile devices.
* [Mount filer][Mount] reads and writes files directly as a local directory via FUSE.
* [Active-Active Replication][ActiveActiveAsyncReplication] enables asynchronous one-way or two-way cross cluster continuous replication.
* [Amazon S3 compatible API][AmazonS3API] accesses files with S3 tooling.
* [Hadoop Compatible File System][Hadoop] accesses files from Hadoop/Spark/Flink/etc or even runs HBase.
* [Async Replication To Cloud][BackupToCloud] has extremely fast local access and backups to Amazon S3, Google Cloud Storage, Azure, BackBlaze.
* [WebDAV] accesses as a mapped drive on Mac and Windows, or from mobile devices.
* [AES256-GCM Encrypted Storage][FilerDataEncryption] safely stores the encrypted data.
* [File TTL][FilerTTL] automatically purge file metadata and actual file data.
* [File TTL][FilerTTL] automatically purges file metadata and actual file data.
* [Kubernetes CSI Driver][SeaweedFsCsiDriver] A Container Storage Interface (CSI) Driver. [![Docker Pulls](https://img.shields.io/docker/pulls/chrislusf/seaweedfs-csi-driver.svg?maxAge=4800)](https://hub.docker.com/r/chrislusf/seaweedfs-csi-driver/)
[Filer]: https://github.com/chrislusf/seaweedfs/wiki/Directories-and-Files
[SuperLargeFiles]: https://github.com/chrislusf/seaweedfs/wiki/Data-Structure-for-Large-Files
[Mount]: https://github.com/chrislusf/seaweedfs/wiki/FUSE-Mount
[AmazonS3API]: https://github.com/chrislusf/seaweedfs/wiki/Amazon-S3-API
[BackupToCloud]: https://github.com/chrislusf/seaweedfs/wiki/Backup-to-Cloud
[BackupToCloud]: https://github.com/chrislusf/seaweedfs/wiki/Async-Replication-to-Cloud
[Hadoop]: https://github.com/chrislusf/seaweedfs/wiki/Hadoop-Compatible-File-System
[WebDAV]: https://github.com/chrislusf/seaweedfs/wiki/WebDAV
[ErasureCoding]: https://github.com/chrislusf/seaweedfs/wiki/Erasure-coding-for-warm-storage
@ -136,6 +137,7 @@ On top of the object store, optional [Filer] can support directories and POSIX a
[FilerTTL]: https://github.com/chrislusf/seaweedfs/wiki/Filer-Stores
[VolumeServerTTL]: https://github.com/chrislusf/seaweedfs/wiki/Store-file-with-a-Time-To-Live
[SeaweedFsCsiDriver]: https://github.com/seaweedfs/seaweedfs-csi-driver
[ActiveActiveAsyncReplication]: https://github.com/chrislusf/seaweedfs/wiki/Filer-Active-Active-cross-cluster-continuous-synchronization
[Back to TOC](#table-of-contents)
@ -365,10 +367,10 @@ The architectures are mostly the same. SeaweedFS aims to store and read files fa
* SeaweedFS optimizes for small files, ensuring O(1) disk seek operation, and can also handle large files.
* SeaweedFS statically assigns a volume id for a file. Locating file content becomes just a lookup of the volume id, which can be easily cached.
* SeaweedFS Filer metadata store can be any well-known and proven data stores, e.g., Cassandra, Mongodb, Redis, Etcd, MySql, Postgres, MemSql, TiDB, CockroachDB, etc, and is easy to customized.
* SeaweedFS Filer metadata store can be any well-known and proven data stores, e.g., Cassandra, Mongodb, Redis, Elastic Search, MySql, Postgres, MemSql, TiDB, CockroachDB, Etcd etc, and is easy to customized.
* SeaweedFS Volume server also communicates directly with clients via HTTP, supporting range queries, direct uploads, etc.
| System | File Meta | File Content Read| POSIX | REST API | Optimized for small files |
| System | File Meta | File Content Read| POSIX | REST API | Optimized for large number of small files |
| ------------- | ------------------------------- | ---------------- | ------ | -------- | ------------------------- |
| SeaweedFS | lookup volume id, cacheable | O(1) disk seek | | Yes | Yes |
| SeaweedFS Filer| Linearly Scalable, Customizable | O(1) disk seek | FUSE | Yes | Yes |
@ -406,7 +408,7 @@ Ceph uses CRUSH hashing to automatically manage the data placement. SeaweedFS pl
SeaweedFS is optimized for small files. Small files are stored as one continuous block of content, with at most 8 unused bytes between files. Small file access is O(1) disk read.
SeaweedFS Filer uses off-the-shelf stores, such as MySql, Postgres, Mongodb, Redis, Etcd, Cassandra, MemSql, TiDB, CockroachCB, to manage file directories. These stores are proven, scalable, and easier to manage.
SeaweedFS Filer uses off-the-shelf stores, such as MySql, Postgres, Mongodb, Redis, Elastic Search, Cassandra, MemSql, TiDB, CockroachCB, Etcd, to manage file directories. These stores are proven, scalable, and easier to manage.
| SeaweedFS | comparable to Ceph | advantage |
| ------------- | ------------- | ---------------- |

35
docker/Dockerfile.go_build_large

@ -0,0 +1,35 @@
FROM frolvlad/alpine-glibc as builder
RUN apk add git go g++
RUN mkdir -p /go/src/github.com/chrislusf/
RUN git clone https://github.com/chrislusf/seaweedfs /go/src/github.com/chrislusf/seaweedfs
RUN cd /go/src/github.com/chrislusf/seaweedfs/weed && go install -tags 5BytesOffset
FROM alpine AS final
LABEL author="Chris Lu"
COPY --from=builder /root/go/bin/weed /usr/bin/
RUN mkdir -p /etc/seaweedfs
COPY --from=builder /go/src/github.com/chrislusf/seaweedfs/docker/filer.toml /etc/seaweedfs/filer.toml
COPY --from=builder /go/src/github.com/chrislusf/seaweedfs/docker/entrypoint.sh /entrypoint.sh
# volume server gprc port
EXPOSE 18080
# volume server http port
EXPOSE 8080
# filer server gprc port
EXPOSE 18888
# filer server http port
EXPOSE 8888
# master server shared gprc port
EXPOSE 19333
# master server shared http port
EXPOSE 9333
# s3 server http port
EXPOSE 8333
RUN mkdir -p /data/filerldb2
VOLUME /data
RUN chmod +x /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]

12
go.mod

@ -6,10 +6,9 @@ require (
cloud.google.com/go v0.44.3
github.com/Azure/azure-pipeline-go v0.2.2 // indirect
github.com/Azure/azure-storage-blob-go v0.8.0
github.com/DataDog/zstd v1.4.1 // indirect
github.com/OneOfOne/xxhash v1.2.2
github.com/Shopify/sarama v1.23.1
github.com/aws/aws-sdk-go v1.23.13
github.com/aws/aws-sdk-go v1.33.5
github.com/buraksezer/consistent v0.0.0-20191006190839-693edf70fd72
github.com/cespare/xxhash v1.1.0
github.com/chrislusf/raft v1.0.1
@ -25,10 +24,9 @@ require (
github.com/facebookgo/subset v0.0.0-20200203212716-c811ad88dec4 // indirect
github.com/frankban/quicktest v1.7.2 // indirect
github.com/go-redis/redis v6.15.7+incompatible
github.com/go-sql-driver/mysql v1.4.1
github.com/go-sql-driver/mysql v1.5.0
github.com/gocql/gocql v0.0.0-20190829130954-e163eff7a8c6
github.com/gogo/protobuf v1.2.2-0.20190730201129-28a6bbf47e48 // indirect
github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6 // indirect
github.com/golang/protobuf v1.4.2
github.com/google/btree v1.0.0
github.com/google/uuid v1.1.1
@ -37,6 +35,7 @@ require (
github.com/grpc-ecosystem/grpc-gateway v1.11.0 // indirect
github.com/hashicorp/golang-lru v0.5.3 // indirect
github.com/jcmturner/gofork v1.0.0 // indirect
github.com/json-iterator/go v1.1.10
github.com/karlseguin/ccache v2.0.3+incompatible
github.com/karlseguin/expect v1.0.1 // indirect
github.com/klauspost/compress v1.10.9
@ -49,6 +48,7 @@ require (
github.com/mattn/go-ieproxy v0.0.0-20190805055040-f9202b1cfdeb // indirect
github.com/mattn/go-runewidth v0.0.4 // indirect
github.com/nats-io/nats-server/v2 v2.0.4 // indirect
github.com/olivere/elastic/v7 v7.0.19
github.com/onsi/ginkgo v1.10.1 // indirect
github.com/onsi/gomega v1.7.0 // indirect
github.com/peterh/liner v1.1.0
@ -64,7 +64,7 @@ require (
github.com/spf13/jwalterweatherman v1.1.0 // indirect
github.com/spf13/viper v1.4.0
github.com/streadway/amqp v0.0.0-20190827072141-edfb9018d271 // indirect
github.com/stretchr/testify v1.4.0
github.com/stretchr/testify v1.5.1
github.com/syndtr/goleveldb v1.0.0
github.com/tidwall/gjson v1.3.2
github.com/tidwall/match v1.0.1
@ -77,7 +77,7 @@ require (
gocloud.dev/pubsub/natspubsub v0.16.0
gocloud.dev/pubsub/rabbitpubsub v0.16.0
golang.org/x/image v0.0.0-20200119044424-58c23975cae1 // indirect
golang.org/x/net v0.0.0-20190909003024-a7b16738d86b
golang.org/x/net v0.0.0-20200202094626-16171245cfb2
golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5
golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5
google.golang.org/api v0.9.0

30
go.sum

@ -32,8 +32,6 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/DataDog/zstd v1.3.6-0.20190409195224-796139022798 h1:2T/jmrHeTezcCM58lvEQXs0UpQJCo5SoGAcg+mbSTIg=
github.com/DataDog/zstd v1.3.6-0.20190409195224-796139022798/go.mod h1:1jcaCB/ufaK+sKp1NBhlGmpz41jOoPQ35bpF36t7BBo=
github.com/DataDog/zstd v1.4.1 h1:3oxKN3wbHibqx897utPC2LTQU4J+IHWWJO+glkAkpFM=
github.com/DataDog/zstd v1.4.1/go.mod h1:1jcaCB/ufaK+sKp1NBhlGmpz41jOoPQ35bpF36t7BBo=
github.com/GoogleCloudPlatform/cloudsql-proxy v0.0.0-20190605020000-c4ba1fdf4d36/go.mod h1:aJ4qN3TfrelA6NZ6AXsXRfmEVaYin3EDbSPJrKS8OXo=
github.com/OneOfOne/xxhash v1.2.2 h1:KMrpdQIwFcEqXDklaen+P1axHaj9BSKzvpUUfnHldSE=
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
@ -49,6 +47,8 @@ github.com/aws/aws-sdk-go v1.19.18/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpi
github.com/aws/aws-sdk-go v1.19.45/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo=
github.com/aws/aws-sdk-go v1.23.13 h1:l/NG+mgQFRGG3dsFzEj0jw9JIs/zYdtU6MXhY1WIDmM=
github.com/aws/aws-sdk-go v1.23.13/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo=
github.com/aws/aws-sdk-go v1.33.5 h1:p2fr1ryvNTU6avUWLI+/H7FGv0TBIjzVM5WDgXBBv4U=
github.com/aws/aws-sdk-go v1.33.5/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZveU8YkpAk0=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
github.com/beorn7/perks v1.0.0 h1:HWo1m869IqiPhD389kmkxeTalrjNbbJTC8LXupb+sl0=
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
@ -140,6 +140,8 @@ github.com/go-redis/redis v6.15.7+incompatible h1:3skhDh95XQMpnqeqNftPkQD9jL9e5e
github.com/go-redis/redis v6.15.7+incompatible/go.mod h1:NAIEuMOZ/fxfXJIrKDQDz8wamY7mA7PouImQ2Jvg6kA=
github.com/go-sql-driver/mysql v1.4.1 h1:g24URVg0OFbNUTx9qqY1IRZ9D9z3iPyi5zKhQZpNwpA=
github.com/go-sql-driver/mysql v1.4.1/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w=
github.com/go-sql-driver/mysql v1.5.0 h1:ozyZYNQW3x3HtqT1jira07DN2PArx2v7/mN66gGcHOs=
github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
github.com/go-stack/stack v1.8.0 h1:5SgMzNM5HxrEjV0ww2lTmX6E2Izsfxas4+YHWRs3Lsk=
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
github.com/gobuffalo/attrs v0.0.0-20190224210810-a9411de4debd/go.mod h1:4duuawTqi2wkkpB4ePgWMaai6/Kc6WEz83bhFwpHzj0=
@ -212,6 +214,8 @@ github.com/google/go-cmp v0.3.1 h1:Xye71clBPdm5HgqGwUkwhbynsUJZhDbS20FvLhQ2izg=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.0 h1:/QaMHBdZ26BB3SSst0Iwl10Epc+xhTquomWX0oZEB6w=
github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-replayers/grpcreplay v0.1.0 h1:eNb1y9rZFmY4ax45uEEECSa8fsxGRU+8Bil52ASAwic=
github.com/google/go-replayers/grpcreplay v0.1.0/go.mod h1:8Ig2Idjpr6gifRd6pNVggX6TC1Zw6Jx74AKp7QNH2QE=
github.com/google/go-replayers/httpreplay v0.1.0 h1:AX7FUb4BjrrzNvblr/OlgwrmFiep6soj5K2QSDW7BGk=
@ -234,8 +238,6 @@ github.com/googleapis/gax-go v2.0.2+incompatible/go.mod h1:SFVmujtThgffbyetf+mdk
github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
github.com/googleapis/gax-go/v2 v2.0.5 h1:sjZBwGj9Jlw33ImPtvFviGYvseOtDM7hkSKB7+Tv3SM=
github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=
github.com/gorilla/mux v1.7.3 h1:gnP5JzjVOuiZD07fKKToCAOjS0yOpj/qPETTXCCS6hw=
github.com/gorilla/mux v1.7.3/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs=
github.com/gorilla/mux v1.7.4 h1:VuZ8uybHlWmqV03+zRzdwKL4tUnIp1MAQtp1mIFE1bc=
github.com/gorilla/mux v1.7.4/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So=
github.com/gorilla/websocket v0.0.0-20170926233335-4201258b820c/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ=
@ -277,12 +279,16 @@ github.com/jcmturner/gofork v1.0.0/go.mod h1:MK8+TM0La+2rjBD4jE12Kj1pCCxK7d2LK/U
github.com/jmespath/go-jmespath v0.0.0-20160202185014-0b12d6b521d8/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k=
github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af h1:pmfjZENx5imkbgOkpRUYLnmbU7UEFbjtDA2hxJ1ichM=
github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k=
github.com/jmespath/go-jmespath v0.3.0 h1:OS12ieG61fsCg5+qLJ+SsW9NicxNkg3b25OyT2yCeUc=
github.com/jmespath/go-jmespath v0.3.0/go.mod h1:9QtRXoHjLGCJ5IBSaohpXITPlowMeeYCZ7fLUTSywik=
github.com/joho/godotenv v1.3.0/go.mod h1:7hK45KPybAkOC6peb+G5yklZfMxEjkZhHbwpqxOKXbg=
github.com/jonboulle/clockwork v0.1.0 h1:VKV+ZcuP6l3yW9doeqz6ziZGgcynBVQO+obU0+0hcPo=
github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo=
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/json-iterator/go v1.1.7 h1:KfgG9LzI+pYjr4xvmz/5H4FXjokeP+rlHLhv3iH62Fo=
github.com/json-iterator/go v1.1.7/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/json-iterator/go v1.1.10 h1:Kz6Cvnvv2wGdaG/V8yMvfkmNiXq9Ya2KUv4rouJJr68=
github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
github.com/karlseguin/ccache v2.0.3+incompatible h1:j68C9tWOROiOLWTS/kCGg9IcJG+ACqn5+0+t8Oh83UU=
@ -323,6 +329,8 @@ github.com/magiconair/properties v1.8.0 h1:LLgXmsheXeRoUOBOjtwPQCWIYqM/LU1ayDtDe
github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
github.com/magiconair/properties v1.8.1 h1:ZC2Vc7/ZFkGmsVC9KvOjumD+G5lXy2RtTKyzRKO2BQ4=
github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
github.com/mailru/easyjson v0.7.1 h1:mdxE1MF9o53iCb2Ghj1VfWvh7ZOwHpnVG/xwXrV90U8=
github.com/mailru/easyjson v0.7.1/go.mod h1:KAzv3t3aY1NaHWoQz1+4F1ccyAH66Jk7yos7ldAVICs=
github.com/markbates/oncer v0.0.0-20181203154359-bf2de49a0be2/go.mod h1:Ld9puTsIW75CHf65OeIOkyKbteujpZVXDpWK6YGZbxE=
github.com/markbates/safe v1.0.1/go.mod h1:nAqgmRi7cY2nqMc92/bSEeQA+R4OheNU2T1kNSCBdG0=
github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=
@ -368,6 +376,8 @@ github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U=
github.com/olekukonko/tablewriter v0.0.0-20170122224234-a0225b3f23b5/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo=
github.com/olivere/elastic/v7 v7.0.19 h1:w4F6JpqOISadhYf/n0NR1cNj73xHqh4pzPwD1Gkidts=
github.com/olivere/elastic/v7 v7.0.19/go.mod h1:4Jqt5xvjqpjCqgnTcHwl3j8TLs8mvoOK8NYgo/qEOu4=
github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/ginkgo v1.7.0 h1:WSHQ+IS43OoUrWtD1/bbclrwK8TTH5hzp+umCiuxHgs=
github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
@ -377,6 +387,7 @@ github.com/onsi/gomega v1.4.3 h1:RE1xgDvH7imwFD45h+u2SgIfERHlS2yNG4DObb5BSKU=
github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
github.com/onsi/gomega v1.7.0 h1:XPnZz8VVBHjVsy1vzJmRwIcSwiUO+JFfrv/xGiigmME=
github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc=
github.com/pelletier/go-toml v1.2.0 h1:T5zMGML61Wp+FlcbWjRDT7yAxhJNAiPPLOFECq181zc=
github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
github.com/pelletier/go-toml v1.4.0 h1:u3Z1r+oOXJIkxqw34zVhyPgjBsm6X2wn21NWs/HfSeg=
@ -389,6 +400,8 @@ github.com/pierrec/lz4 v2.2.7+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/profile v1.2.1/go.mod h1:hJw3o1OdXxsrSjjVksARp5W95eeEaEfptyVZyv6JUPA=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
@ -437,6 +450,9 @@ github.com/sirupsen/logrus v1.4.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPx
github.com/sirupsen/logrus v1.4.1/go.mod h1:ni0Sbl8bgC9z8RoU9G6nDWqqs/fq4eDPysMBDgk/93Q=
github.com/sirupsen/logrus v1.4.2 h1:SPIRibHv4MatM3XXNO2BJeFLZwZ2LvZgfQ5+UNI2im4=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/smartystreets/assertions v1.1.1/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo=
github.com/smartystreets/go-aws-auth v0.0.0-20180515143844-0c1422d1fdb9/go.mod h1:SnhjPscd9TpLiy1LpzGSKh3bXCfxxXuqd9xmQJy3slM=
github.com/smartystreets/gunit v1.3.4/go.mod h1:ZjM1ozSIMJlAz/ay4SG8PeKF00ckUp+zMHZXV9/bvak=
github.com/soheilhy/cmux v0.1.4 h1:0HKaf1o97UwFjHH9o5XsHUOF+tqmdA7KEzXLpiyaw0E=
github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
@ -472,6 +488,8 @@ github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/syndtr/goleveldb v1.0.0 h1:fBdIW9lB4Iz0n9khmH8w27SJ3QEJ7+IgjPEwGSZiFdE=
github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpPAyBWyWuQ=
github.com/tidwall/gjson v1.3.2 h1:+7p3qQFaH3fOMXAJSrdZwGKcOO/lYdGS0HqGhPqDdTI=
@ -512,6 +530,8 @@ go.opencensus.io v0.15.0/go.mod h1:UffZAU+4sDEINUGP/B7UfBBkq4fqLu9zXAX7ke6CHW0=
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
go.opencensus.io v0.22.0 h1:C9hSCOW830chIVkdja34wa6Ky+IzWllkUinR+BtRZd4=
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
go.opencensus.io v0.22.4 h1:LYy1Hy3MJdrCdMwwzxA/dRok4ejH+RwNGbuoD9fCjto=
go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
go.uber.org/atomic v1.4.0 h1:cxzIVoETapQEqDhQu3QfnvXAV4AlzcvUCxkVUFw3+EU=
go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
go.uber.org/atomic v1.6.0 h1:Ezj3JGmsOnG1MoRWQkPBsKLe9DwWD9QeXzTRzzldNVk=
@ -579,6 +599,8 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL
golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190909003024-a7b16738d86b h1:XfVGCX+0T4WOStkaOsJRllbsiImhB2jgVBGc9L0lPGc=
golang.org/x/net v0.0.0-20190909003024-a7b16738d86b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=

2
k8s/seaweedfs/Chart.yaml

@ -1,4 +1,4 @@
apiVersion: v1
description: SeaweedFS
name: seaweedfs
version: 1.87
version: 2.02

22
k8s/seaweedfs/templates/filer-statefulset.yaml

@ -99,6 +99,9 @@ spec:
{{- end }}
filer \
-port={{ .Values.filer.port }} \
{{- if .Values.filer.redirectOnRead }}
-redirectOnRead \
{{- end }}
{{- if .Values.filer.disableHttp }}
-disableHttp \
{{- end }}
@ -106,7 +109,24 @@ spec:
-disableDirListing \
{{- end }}
-dirListLimit={{ .Values.filer.dirListLimit }} \
{{- if .Values.global.enableReplication }}
-defaultReplicaPlacement={{ .Values.global.replicationPlacment }} \
{{- else }}
-defaultReplicaPlacement={{ .Values.filer.defaultReplicaPlacement }} \
{{- end }}
{{- if .Values.filer.disableDirListing }}
-disableDirListing \
{{- end }}
{{- if .Values.filer.maxMB }}
-maxMB={{ .Values.filer.maxMB }} \
{{- end }}
{{- if .Values.filer.encryptVolumeData }}
-encryptVolumeData \
{{- end }}
-ip=${POD_IP} \
{{- if gt (.Values.filer.replicas | int) 1 }}
-peers={{ range $index := until (.Values.filer.replicas | int) }}${SEAWEEDFS_FULLNAME}-filer-{{ $index }}.${SEAWEEDFS_FULLNAME}-filer:{{ $.Values.filer.port }}{{ if lt $index (sub ($.Values.filer.replicas | int) 1) }},{{ end }}{{ end }}
{{- end }}
-master={{ range $index := until (.Values.master.replicas | int) }}${SEAWEEDFS_FULLNAME}-master-{{ $index }}.${SEAWEEDFS_FULLNAME}-master:{{ $.Values.master.port }}{{ if lt $index (sub ($.Values.master.replicas | int) 1) }},{{ end }}{{ end }}
{{- if or (.Values.global.enableSecurity) (.Values.filer.extraVolumeMounts) }}
volumeMounts:
@ -149,6 +169,7 @@ spec:
periodSeconds: 15
successThreshold: 1
failureThreshold: 100
timeoutSeconds: 3
livenessProbe:
httpGet:
path: /
@ -158,6 +179,7 @@ spec:
periodSeconds: 30
successThreshold: 1
failureThreshold: 5
timeoutSeconds: 3
{{- if .Values.filer.resources }}
resources:
{{ tpl .Values.filer.resources . | nindent 12 | trim }}

30
k8s/seaweedfs/templates/master-statefulset.yaml

@ -70,6 +70,12 @@ spec:
fieldPath: metadata.namespace
- name: SEAWEEDFS_FULLNAME
value: "{{ template "seaweedfs.name" . }}"
{{- if .Values.master.extraEnvironmentVars }}
{{- range $key, $value := .Values.master.extraEnvironmentVars }}
- name: {{ $key }}
value: {{ $value | quote }}
{{- end }}
{{- end }}
command:
- "/bin/sh"
- "-ec"
@ -84,6 +90,11 @@ spec:
-port={{ .Values.master.port }} \
-mdir=/data \
-ip.bind={{ .Values.master.ipBind }} \
{{- if .Values.global.enableReplication }}
-defaultReplication={{ .Values.global.replicationPlacment }} \
{{- else }}
-defaultReplication={{ .Values.master.defaultReplication }} \
{{- end }}
{{- if .Values.master.volumePreallocate }}
-volumePreallocate \
{{- end }}
@ -94,6 +105,15 @@ spec:
{{- if .Values.master.disableHttp }}
-disableHttp \
{{- end }}
{{- if .Values.master.pulseSeconds }}
-pulseSeconds={{ .Values.master.pulseSeconds }} \
{{- end }}
{{- if .Values.master.garbageThreshold }}
-garbageThreshold={{ .Values.master.garbageThreshold }} \
{{- end }}
{{- if .Values.master.metricsIntervalSec }}
-metrics.intervalSeconds={{ .Values.master.metricsIntervalSec }} \
{{- end }}
-ip=${POD_NAME}.${SEAWEEDFS_FULLNAME}-master \
-peers={{ range $index := until (.Values.master.replicas | int) }}${SEAWEEDFS_FULLNAME}-master-{{ $index }}.${SEAWEEDFS_FULLNAME}-master:{{ $.Values.master.port }}{{ if lt $index (sub ($.Values.master.replicas | int) 1) }},{{ end }}{{ end }}
volumeMounts:
@ -133,19 +153,21 @@ spec:
path: /cluster/status
port: {{ .Values.master.port }}
scheme: HTTP
initialDelaySeconds: 5
periodSeconds: 15
initialDelaySeconds: 10
periodSeconds: 45
successThreshold: 2
failureThreshold: 100
timeoutSeconds: 5
livenessProbe:
httpGet:
path: /cluster/status
port: {{ .Values.master.port }}
scheme: HTTP
initialDelaySeconds: 20
periodSeconds: 10
periodSeconds: 30
successThreshold: 1
failureThreshold: 6
failureThreshold: 4
timeoutSeconds: 5
{{- if .Values.master.resources }}
resources:
{{ tpl .Values.master.resources . | nindent 12 | trim }}

2
k8s/seaweedfs/templates/s3-deployment.yaml

@ -116,6 +116,7 @@ spec:
periodSeconds: 15
successThreshold: 1
failureThreshold: 100
timeoutSeconds: 3
livenessProbe:
httpGet:
path: /
@ -125,6 +126,7 @@ spec:
periodSeconds: 60
successThreshold: 1
failureThreshold: 20
timeoutSeconds: 3
{{- if .Values.s3.resources }}
resources:
{{ tpl .Values.s3.resources . | nindent 12 | trim }}

4
k8s/seaweedfs/templates/seaweefs-grafana-dashboard.yaml

@ -91,7 +91,7 @@ data:
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Filer Request Duration 95th percentile",
"title": "Filer Request Duration 80th percentile",
"tooltip": {
"msResolution": true,
"shared": true,
@ -1349,4 +1349,4 @@ data:
"title": "SeaweedFS",
"version": 3
}
{{- end }}
{{- end }}

25
k8s/seaweedfs/templates/volume-statefulset.yaml

@ -12,6 +12,7 @@ metadata:
spec:
serviceName: {{ template "seaweedfs.name" . }}-volume
replicas: {{ .Values.volume.replicas }}
podManagementPolicy: Parallel
selector:
matchLabels:
app: {{ template "seaweedfs.name" . }}
@ -33,7 +34,7 @@ spec:
restartPolicy: {{ default .Values.global.restartPolicy .Values.volume.restartPolicy }}
{{- if .Values.volume.tolerations }}
tolerations:
{{ tpl .Values.volume.tolerations . | nindent 8 | trim }}
{{ tpl .Values.volume.tolerations . | nindent 8 | trim }}
{{- end }}
{{- if .Values.global.imagePullSecrets }}
imagePullSecrets:
@ -62,7 +63,7 @@ spec:
fieldRef:
fieldPath: status.hostIP
- name: SEAWEEDFS_FULLNAME
value: "{{ template "seaweedfs.name" . }}"
value: "{{ template "seaweedfs.name" . }}"
command:
- "/bin/sh"
- "-ec"
@ -91,6 +92,16 @@ spec:
{{- if .Values.volume.imagesFixOrientation }}
-images.fix.orientation \
{{- end }}
{{- if .Values.volume.pulseSeconds }}
-pulseSeconds={{ .Values.volume.pulseSeconds }} \
{{- end }}
{{- if .Values.volume.index }}
-index={{ .Values.volume.index }} \
{{- end }}
{{- if .Values.volume.fileSizeLimitMB }}
-fileSizeLimitMB={{ .Values.volume.fileSizeLimitMB }} \
{{- end }}
-minFreeSpacePercent={{ .Values.volume.minFreeSpacePercent }} \
-ip=${POD_NAME}.${SEAWEEDFS_FULLNAME}-volume \
-compactionMBps={{ .Values.volume.compactionMBps }} \
-mserver={{ range $index := until (.Values.master.replicas | int) }}${SEAWEEDFS_FULLNAME}-master-{{ $index }}.${SEAWEEDFS_FULLNAME}-master:{{ $.Values.master.port }}{{ if lt $index (sub ($.Values.master.replicas | int) 1) }},{{ end }}{{ end }}
@ -131,19 +142,21 @@ spec:
path: /status
port: {{ .Values.volume.port }}
scheme: HTTP
initialDelaySeconds: 5
periodSeconds: 15
initialDelaySeconds: 15
periodSeconds: 90
successThreshold: 1
failureThreshold: 100
timeoutSeconds: 5
livenessProbe:
httpGet:
path: /status
port: {{ .Values.volume.port }}
scheme: HTTP
initialDelaySeconds: 20
periodSeconds: 30
periodSeconds: 90
successThreshold: 1
failureThreshold: 10
failureThreshold: 4
timeoutSeconds: 5
{{- if .Values.volume.resources }}
resources:
{{ tpl .Values.volume.resources . | nindent 12 | trim }}

58
k8s/seaweedfs/values.yaml

@ -4,7 +4,7 @@ global:
registry: ""
repository: ""
imageName: chrislusf/seaweedfs
imageTag: "1.87"
imageTag: "2.02"
imagePullPolicy: IfNotPresent
imagePullSecrets: imagepullsecret
restartPolicy: Always
@ -14,6 +14,13 @@ global:
enabled: false
gatewayHost: null
gatewayPort: null
# if enabled will use global.replicationPlacment and override master & filer defaultReplicaPlacement config
enableReplication: false
# replication type is XYZ:
# X number of replica in other data centers
# Y number of replica in other racks in the same data center
# Z number of replica in other servers in the same rack
replicationPlacment: "001"
image:
registry: ""
@ -31,8 +38,20 @@ master:
grpcPort: 19333
ipBind: "0.0.0.0"
volumePreallocate: false
#Master stops directing writes to oversized volumes
volumeSizeLimitMB: 30000
loggingOverrideLevel: null
#number of seconds between heartbeats, default 5
pulseSeconds: null
#threshold to vacuum and reclaim spaces, default 0.3 (30%)
garbageThreshold: null
#Prometheus push interval in seconds, default 15
metricsIntervalSec: 15
# replication type is XYZ:
# X number of replica in other data centers
# Y number of replica in other racks in the same data center
# Z number of replica in other servers in the same rack
defaultReplication: "000"
# Disable http request, only gRpc operations are allowed
disableHttp: false
@ -87,6 +106,11 @@ master:
# ref: https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/
priorityClassName: ""
extraEnvironmentVars:
WEED_MASTER_VOLUME_GROWTH_COPY_1: 7
WEED_MASTER_VOLUME_GROWTH_COPY_2: 6
WEED_MASTER_VOLUME_GROWTH_COPY_3: 3
WEED_MASTER_VOLUME_GROWTH_COPY_OTHER: 1
volume:
enabled: true
@ -100,9 +124,18 @@ volume:
ipBind: "0.0.0.0"
replicas: 1
loggingOverrideLevel: null
# number of seconds between heartbeats, must be smaller than or equal to the master's setting
pulseSeconds: null
# Choose [memory|leveldb|leveldbMedium|leveldbLarge] mode for memory~performance balance., default memory
index: null
# limit file size to avoid out of memory, default 256mb
fileSizeLimitMB: null
# minimum free disk space(in percents). If free disk space lower this value - all volumes marks as ReadOnly
minFreeSpacePercent: 1
# limit background compaction or copying speed in mega bytes per second
compactionMBps: "40"
compactionMBps: "50"
# Directories to store data files. dir[,dir]... (default "/tmp")
dir: "/data"
@ -177,6 +210,20 @@ filer:
port: 8888
grpcPort: 18888
loggingOverrideLevel: null
# replication type is XYZ:
# X number of replica in other data centers
# Y number of replica in other racks in the same data center
# Z number of replica in other servers in the same rack
defaultReplicaPlacement: "000"
# turn off directory listing
disableDirListing: false
# split files larger than the limit, default 32
maxMB: null
# encrypt data on volume servers
encryptVolumeData: false
# Whether proxy or redirect to volume server during file GET request
redirectOnRead: false
# Limit sub dir listing size (default 100000)
dirListLimit: 100000
@ -237,11 +284,6 @@ filer:
# ref: https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/
priorityClassName: ""
dbSchema:
imageName: db-schema
imageTag: "development"
imageOverride: ""
# extraEnvVars is a list of extra enviroment variables to set with the stateful set.
extraEnvironmentVars:
WEED_MYSQL_ENABLED: "true"
@ -260,6 +302,8 @@ filer:
WEED_FILER_BUCKETS_FOLDER: "/buckets"
# directories under this folder will be store message queue data
WEED_FILER_QUEUES_FOLDER: "/queues"
# WEED_FILER_OPTIONS_BUCKETS_FSYNC a list of buckets names with all write requests fsync=true
WEED_FILER_OPTIONS_BUCKETS_FSYNC: []
s3:
enabled: true

2
other/java/client/pom.xml

@ -5,7 +5,7 @@
<groupId>com.github.chrislusf</groupId>
<artifactId>seaweedfs-client</artifactId>
<version>1.4.4</version>
<version>1.4.8</version>
<parent>
<groupId>org.sonatype.oss</groupId>

2
other/java/client/pom.xml.deploy

@ -5,7 +5,7 @@
<groupId>com.github.chrislusf</groupId>
<artifactId>seaweedfs-client</artifactId>
<version>1.4.4</version>
<version>1.4.8</version>
<parent>
<groupId>org.sonatype.oss</groupId>

2
other/java/client/pom_debug.xml

@ -5,7 +5,7 @@
<groupId>com.github.chrislusf</groupId>
<artifactId>seaweedfs-client</artifactId>
<version>1.4.4</version>
<version>1.4.8</version>
<parent>
<groupId>org.sonatype.oss</groupId>

55
other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java

@ -23,7 +23,7 @@ public class SeaweedRead {
// returns bytesRead
public static long read(FilerGrpcClient filerGrpcClient, List<VisibleInterval> visibleIntervals,
final long position, final byte[] buffer, final int bufferOffset,
final int bufferLength) throws IOException {
final int bufferLength, final long fileSize) throws IOException {
List<ChunkView> chunkViews = viewFromVisibles(visibleIntervals, position, bufferLength);
@ -42,6 +42,14 @@ public class SeaweedRead {
long readCount = 0;
int startOffset = bufferOffset;
for (ChunkView chunkView : chunkViews) {
if (startOffset < chunkView.logicOffset) {
long gap = chunkView.logicOffset - startOffset;
LOG.debug("zero [{},{})", startOffset, startOffset + gap);
readCount += gap;
startOffset += gap;
}
FilerProto.Locations locations = vid2Locations.get(parseVolumeId(chunkView.fileId));
if (locations == null || locations.getLocationsCount() == 0) {
LOG.error("failed to locate {}", chunkView.fileId);
@ -51,11 +59,22 @@ public class SeaweedRead {
int len = readChunkView(position, buffer, startOffset, chunkView, locations);
LOG.debug("read [{},{}) {} size {}", startOffset, startOffset + len, chunkView.fileId, chunkView.size);
readCount += len;
startOffset += len;
}
long limit = Math.min(bufferLength, fileSize);
if (startOffset < limit) {
long gap = limit - startOffset;
LOG.debug("zero2 [{},{})", startOffset, startOffset + gap);
readCount += gap;
startOffset += gap;
}
return readCount;
}
@ -71,7 +90,7 @@ public class SeaweedRead {
int len = (int) chunkView.size;
LOG.debug("readChunkView fid:{} chunkData.length:{} chunkView.offset:{} buffer.length:{} startOffset:{} len:{}",
chunkView.fileId, chunkData.length, chunkView.offset, buffer.length, startOffset, len);
System.arraycopy(chunkData, (int) chunkView.offset, buffer, startOffset, len);
System.arraycopy(chunkData, startOffset - (int) (chunkView.logicOffset - chunkView.offset), buffer, startOffset, len);
return len;
}
@ -93,7 +112,7 @@ public class SeaweedRead {
Header contentEncodingHeader = entity.getContentEncoding();
if (contentEncodingHeader != null) {
HeaderElement[] encodings =contentEncodingHeader.getElements();
HeaderElement[] encodings = contentEncodingHeader.getElements();
for (int i = 0; i < encodings.length; i++) {
if (encodings[i].getName().equalsIgnoreCase("gzip")) {
entity = new GzipDecompressingEntity(entity);
@ -134,18 +153,19 @@ public class SeaweedRead {
long stop = offset + size;
for (VisibleInterval chunk : visibleIntervals) {
if (chunk.start <= offset && offset < chunk.stop && offset < stop) {
long chunkStart = Math.max(offset, chunk.start);
long chunkStop = Math.min(stop, chunk.stop);
if (chunkStart < chunkStop) {
boolean isFullChunk = chunk.isFullChunk && chunk.start == offset && chunk.stop <= stop;
views.add(new ChunkView(
chunk.fileId,
offset - chunk.start,
Math.min(chunk.stop, stop) - offset,
offset,
chunkStart - chunk.start + chunk.chunkOffset,
chunkStop - chunkStart,
chunkStart,
isFullChunk,
chunk.cipherKey,
chunk.isCompressed
));
offset = Math.min(chunk.stop, stop);
}
}
return views;
@ -160,7 +180,13 @@ public class SeaweedRead {
Arrays.sort(chunks, new Comparator<FilerProto.FileChunk>() {
@Override
public int compare(FilerProto.FileChunk a, FilerProto.FileChunk b) {
return (int) (a.getMtime() - b.getMtime());
// if just a.getMtime() - b.getMtime(), it will overflow!
if (a.getMtime() < b.getMtime()) {
return -1;
} else if (a.getMtime() > b.getMtime()) {
return 1;
}
return 0;
}
});
@ -181,6 +207,7 @@ public class SeaweedRead {
chunk.getOffset() + chunk.getSize(),
chunk.getFileId(),
chunk.getMtime(),
0,
true,
chunk.getCipherKey().toByteArray(),
chunk.getIsCompressed()
@ -203,6 +230,7 @@ public class SeaweedRead {
chunk.getOffset(),
v.fileId,
v.modifiedTime,
v.chunkOffset,
false,
v.cipherKey,
v.isCompressed
@ -215,6 +243,7 @@ public class SeaweedRead {
v.stop,
v.fileId,
v.modifiedTime,
v.chunkOffset + (chunkStop - v.start),
false,
v.cipherKey,
v.isCompressed
@ -247,6 +276,10 @@ public class SeaweedRead {
return fileId;
}
public static long fileSize(FilerProto.Entry entry) {
return Math.max(totalSize(entry.getChunksList()), entry.getAttributes().getFileSize());
}
public static long totalSize(List<FilerProto.FileChunk> chunksList) {
long size = 0;
for (FilerProto.FileChunk chunk : chunksList) {
@ -263,15 +296,17 @@ public class SeaweedRead {
public final long stop;
public final long modifiedTime;
public final String fileId;
public final long chunkOffset;
public final boolean isFullChunk;
public final byte[] cipherKey;
public final boolean isCompressed;
public VisibleInterval(long start, long stop, String fileId, long modifiedTime, boolean isFullChunk, byte[] cipherKey, boolean isCompressed) {
public VisibleInterval(long start, long stop, String fileId, long modifiedTime, long chunkOffset, boolean isFullChunk, byte[] cipherKey, boolean isCompressed) {
this.start = start;
this.stop = stop;
this.modifiedTime = modifiedTime;
this.fileId = fileId;
this.chunkOffset = chunkOffset;
this.isFullChunk = isFullChunk;
this.cipherKey = cipherKey;
this.isCompressed = isCompressed;

32
other/java/client/src/main/proto/filer.proto

@ -58,6 +58,12 @@ service SeaweedFiler {
rpc LocateBroker (LocateBrokerRequest) returns (LocateBrokerResponse) {
}
rpc KvGet (KvGetRequest) returns (KvGetResponse) {
}
rpc KvPut (KvPutRequest) returns (KvPutResponse) {
}
}
//////////////////////////////////////////////////
@ -89,6 +95,8 @@ message Entry {
repeated FileChunk chunks = 3;
FuseAttributes attributes = 4;
map<string, bytes> extended = 5;
bytes hard_link_id = 7;
int32 hard_link_counter = 8; // only exists in hard link meta data
}
message FullEntry {
@ -102,6 +110,7 @@ message EventNotification {
bool delete_chunks = 3;
string new_parent_path = 4;
bool is_from_other_cluster = 5;
repeated int32 signatures = 6;
}
message FileChunk {
@ -150,6 +159,7 @@ message CreateEntryRequest {
Entry entry = 2;
bool o_excl = 3;
bool is_from_other_cluster = 4;
repeated int32 signatures = 5;
}
message CreateEntryResponse {
@ -160,6 +170,7 @@ message UpdateEntryRequest {
string directory = 1;
Entry entry = 2;
bool is_from_other_cluster = 3;
repeated int32 signatures = 4;
}
message UpdateEntryResponse {
}
@ -180,6 +191,7 @@ message DeleteEntryRequest {
bool is_recursive = 5;
bool ignore_recursive_error = 6;
bool is_from_other_cluster = 7;
repeated int32 signatures = 8;
}
message DeleteEntryResponse {
@ -262,12 +274,16 @@ message GetFilerConfigurationResponse {
uint32 max_mb = 4;
string dir_buckets = 5;
bool cipher = 7;
int32 signature = 8;
string metrics_address = 9;
int32 metrics_interval_sec = 10;
}
message SubscribeMetadataRequest {
string client_name = 1;
string path_prefix = 2;
int64 since_ns = 3;
int32 signature = 4;
}
message SubscribeMetadataResponse {
string directory = 1;
@ -302,3 +318,19 @@ message LocateBrokerResponse {
}
repeated Resource resources = 2;
}
// Key-Value operations
message KvGetRequest {
bytes key = 1;
}
message KvGetResponse {
bytes value = 1;
string error = 2;
}
message KvPutRequest {
bytes key = 1;
bytes value = 2;
}
message KvPutResponse {
string error = 1;
}

176
other/java/hdfs2/dependency-reduced-pom.xml

@ -120,6 +120,180 @@
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.9.2</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<artifactId>hadoop-hdfs-client</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-mapreduce-client-app</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-yarn-api</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-mapreduce-client-jobclient</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-annotations</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.9.2</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<artifactId>commons-cli</artifactId>
<groupId>commons-cli</groupId>
</exclusion>
<exclusion>
<artifactId>commons-math3</artifactId>
<groupId>org.apache.commons</groupId>
</exclusion>
<exclusion>
<artifactId>xmlenc</artifactId>
<groupId>xmlenc</groupId>
</exclusion>
<exclusion>
<artifactId>commons-io</artifactId>
<groupId>commons-io</groupId>
</exclusion>
<exclusion>
<artifactId>commons-net</artifactId>
<groupId>commons-net</groupId>
</exclusion>
<exclusion>
<artifactId>commons-collections</artifactId>
<groupId>commons-collections</groupId>
</exclusion>
<exclusion>
<artifactId>servlet-api</artifactId>
<groupId>javax.servlet</groupId>
</exclusion>
<exclusion>
<artifactId>jetty</artifactId>
<groupId>org.mortbay.jetty</groupId>
</exclusion>
<exclusion>
<artifactId>jetty-util</artifactId>
<groupId>org.mortbay.jetty</groupId>
</exclusion>
<exclusion>
<artifactId>jetty-sslengine</artifactId>
<groupId>org.mortbay.jetty</groupId>
</exclusion>
<exclusion>
<artifactId>jsp-api</artifactId>
<groupId>javax.servlet.jsp</groupId>
</exclusion>
<exclusion>
<artifactId>jersey-core</artifactId>
<groupId>com.sun.jersey</groupId>
</exclusion>
<exclusion>
<artifactId>jersey-json</artifactId>
<groupId>com.sun.jersey</groupId>
</exclusion>
<exclusion>
<artifactId>jersey-server</artifactId>
<groupId>com.sun.jersey</groupId>
</exclusion>
<exclusion>
<artifactId>log4j</artifactId>
<groupId>log4j</groupId>
</exclusion>
<exclusion>
<artifactId>jets3t</artifactId>
<groupId>net.java.dev.jets3t</groupId>
</exclusion>
<exclusion>
<artifactId>commons-lang</artifactId>
<groupId>commons-lang</groupId>
</exclusion>
<exclusion>
<artifactId>commons-configuration</artifactId>
<groupId>commons-configuration</groupId>
</exclusion>
<exclusion>
<artifactId>commons-lang3</artifactId>
<groupId>org.apache.commons</groupId>
</exclusion>
<exclusion>
<artifactId>slf4j-log4j12</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
<exclusion>
<artifactId>jackson-core-asl</artifactId>
<groupId>org.codehaus.jackson</groupId>
</exclusion>
<exclusion>
<artifactId>jackson-mapper-asl</artifactId>
<groupId>org.codehaus.jackson</groupId>
</exclusion>
<exclusion>
<artifactId>avro</artifactId>
<groupId>org.apache.avro</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-auth</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>jsch</artifactId>
<groupId>com.jcraft</groupId>
</exclusion>
<exclusion>
<artifactId>curator-client</artifactId>
<groupId>org.apache.curator</groupId>
</exclusion>
<exclusion>
<artifactId>curator-recipes</artifactId>
<groupId>org.apache.curator</groupId>
</exclusion>
<exclusion>
<artifactId>htrace-core4</artifactId>
<groupId>org.apache.htrace</groupId>
</exclusion>
<exclusion>
<artifactId>zookeeper</artifactId>
<groupId>org.apache.zookeeper</groupId>
</exclusion>
<exclusion>
<artifactId>commons-compress</artifactId>
<groupId>org.apache.commons</groupId>
</exclusion>
<exclusion>
<artifactId>stax2-api</artifactId>
<groupId>org.codehaus.woodstox</groupId>
</exclusion>
<exclusion>
<artifactId>woodstox-core</artifactId>
<groupId>com.fasterxml.woodstox</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-annotations</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
<distributionManagement>
<snapshotRepository>
<id>ossrh</id>
@ -127,7 +301,7 @@
</snapshotRepository>
</distributionManagement>
<properties>
<seaweedfs.client.version>1.4.4</seaweedfs.client.version>
<seaweedfs.client.version>1.4.8</seaweedfs.client.version>
<hadoop.version>2.9.2</hadoop.version>
</properties>
</project>

4
other/java/hdfs2/pom.xml

@ -5,7 +5,7 @@
<modelVersion>4.0.0</modelVersion>
<properties>
<seaweedfs.client.version>1.4.4</seaweedfs.client.version>
<seaweedfs.client.version>1.4.8</seaweedfs.client.version>
<hadoop.version>2.9.2</hadoop.version>
</properties>
@ -147,6 +147,7 @@
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.github.chrislusf</groupId>
@ -157,6 +158,7 @@
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
</dependency>
</dependencies>

29
other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedFileSystem.java

@ -5,7 +5,6 @@ import org.apache.hadoop.fs.*;
import org.apache.hadoop.fs.permission.AclEntry;
import org.apache.hadoop.fs.permission.AclStatus;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.Progressable;
import org.slf4j.Logger;
@ -14,20 +13,19 @@ import seaweedfs.client.FilerProto;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import java.util.EnumSet;
import java.util.List;
import java.util.Map;
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY;
public class SeaweedFileSystem extends FileSystem {
public static final int FS_SEAWEED_DEFAULT_PORT = 8888;
public static final String FS_SEAWEED_FILER_HOST = "fs.seaweed.filer.host";
public static final String FS_SEAWEED_FILER_PORT = "fs.seaweed.filer.port";
public static final int FS_SEAWEED_DEFAULT_PORT = 8888;
public static final String FS_SEAWEED_BUFFER_SIZE = "fs.seaweed.buffer.size";
public static final int FS_SEAWEED_DEFAULT_BUFFER_SIZE = 4 * 1024 * 1024;
private static final Logger LOG = LoggerFactory.getLogger(SeaweedFileSystem.class);
@ -75,8 +73,9 @@ public class SeaweedFileSystem extends FileSystem {
path = qualify(path);
try {
FSInputStream inputStream = seaweedFileSystemStore.openFileForRead(path, statistics, bufferSize);
return new FSDataInputStream(new BufferedFSInputStream(inputStream, 16 * 1024 * 1024));
int seaweedBufferSize = this.getConf().getInt(FS_SEAWEED_BUFFER_SIZE, FS_SEAWEED_DEFAULT_BUFFER_SIZE);
FSInputStream inputStream = seaweedFileSystemStore.openFileForRead(path, statistics, seaweedBufferSize);
return new FSDataInputStream(new BufferedFSInputStream(inputStream, 4 * seaweedBufferSize));
} catch (Exception ex) {
LOG.warn("open path: {} bufferSize:{}", path, bufferSize, ex);
return null;
@ -93,7 +92,8 @@ public class SeaweedFileSystem extends FileSystem {
try {
String replicaPlacement = String.format("%03d", replication - 1);
OutputStream outputStream = seaweedFileSystemStore.createFile(path, overwrite, permission, bufferSize, replicaPlacement);
int seaweedBufferSize = this.getConf().getInt(FS_SEAWEED_BUFFER_SIZE, FS_SEAWEED_DEFAULT_BUFFER_SIZE);
OutputStream outputStream = seaweedFileSystemStore.createFile(path, overwrite, permission, seaweedBufferSize, replicaPlacement);
return new FSDataOutputStream(outputStream, statistics);
} catch (Exception ex) {
LOG.warn("create path: {} bufferSize:{} blockSize:{}", path, bufferSize, blockSize, ex);
@ -103,8 +103,9 @@ public class SeaweedFileSystem extends FileSystem {
/**
* {@inheritDoc}
*
* @throws FileNotFoundException if the parent directory is not present -or
* is not a directory.
* is not a directory.
*/
@Override
public FSDataOutputStream createNonRecursive(Path path,
@ -121,9 +122,10 @@ public class SeaweedFileSystem extends FileSystem {
throw new FileAlreadyExistsException("Not a directory: " + parent);
}
}
int seaweedBufferSize = this.getConf().getInt(FS_SEAWEED_BUFFER_SIZE, FS_SEAWEED_DEFAULT_BUFFER_SIZE);
return create(path, permission,
flags.contains(CreateFlag.OVERWRITE), bufferSize,
replication, blockSize, progress);
replication, seaweedBufferSize, progress);
}
@Override
@ -133,7 +135,8 @@ public class SeaweedFileSystem extends FileSystem {
path = qualify(path);
try {
OutputStream outputStream = seaweedFileSystemStore.createFile(path, false, null, bufferSize, "");
int seaweedBufferSize = this.getConf().getInt(FS_SEAWEED_BUFFER_SIZE, FS_SEAWEED_DEFAULT_BUFFER_SIZE);
OutputStream outputStream = seaweedFileSystemStore.createFile(path, false, null, seaweedBufferSize, "");
return new FSDataOutputStream(outputStream, statistics);
} catch (Exception ex) {
LOG.warn("append path: {} bufferSize:{}", path, bufferSize, ex);
@ -338,9 +341,7 @@ public class SeaweedFileSystem extends FileSystem {
@Override
public void createSymlink(final Path target, final Path link,
final boolean createParent) throws AccessControlException,
FileAlreadyExistsException, FileNotFoundException,
ParentNotDirectoryException, UnsupportedFileSystemException,
final boolean createParent) throws
IOException {
// Supporting filesystems should override this method
throw new UnsupportedOperationException(

11
other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedFileSystemStore.java

@ -8,14 +8,10 @@ import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.security.UserGroupInformation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import seaweedfs.client.FilerClient;
import seaweedfs.client.FilerGrpcClient;
import seaweedfs.client.FilerProto;
import seaweedfs.client.SeaweedRead;
import seaweedfs.client.*;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Arrays;
@ -124,7 +120,7 @@ public class SeaweedFileSystemStore {
private FileStatus doGetFileStatus(Path path, FilerProto.Entry entry) {
FilerProto.FuseAttributes attributes = entry.getAttributes();
long length = SeaweedRead.totalSize(entry.getChunksList());
long length = SeaweedRead.fileSize(entry);
boolean isDir = entry.getIsDirectory();
int block_replication = 1;
int blocksize = 512;
@ -185,7 +181,7 @@ public class SeaweedFileSystemStore {
entry.mergeFrom(existingEntry);
entry.getAttributesBuilder().setMtime(now);
LOG.debug("createFile merged entry path:{} entry:{} from:{}", path, entry, existingEntry);
writePosition = SeaweedRead.totalSize(existingEntry.getChunksList());
writePosition = SeaweedRead.fileSize(existingEntry);
replication = existingEntry.getAttributes().getReplication();
}
}
@ -202,6 +198,7 @@ public class SeaweedFileSystemStore {
.clearGroupName()
.addAllGroupName(Arrays.asList(userGroupInformation.getGroupNames()))
);
SeaweedWrite.writeMeta(filerGrpcClient, getParentDirectory(path), entry);
}
return new SeaweedOutputStream(filerGrpcClient, path, entry, writePosition, bufferSize, replication);

4
other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedInputStream.java

@ -41,7 +41,7 @@ public class SeaweedInputStream extends FSInputStream {
this.statistics = statistics;
this.path = path;
this.entry = entry;
this.contentLength = SeaweedRead.totalSize(entry.getChunksList());
this.contentLength = SeaweedRead.fileSize(entry);
this.bufferSize = bufferSize;
this.visibleIntervalList = SeaweedRead.nonOverlappingVisibleIntervals(filerGrpcClient, entry.getChunksList());
@ -87,7 +87,7 @@ public class SeaweedInputStream extends FSInputStream {
throw new IllegalArgumentException("requested read length is more than will fit after requested offset in buffer");
}
long bytesRead = SeaweedRead.read(this.filerGrpcClient, this.visibleIntervalList, this.position, b, off, len);
long bytesRead = SeaweedRead.read(this.filerGrpcClient, this.visibleIntervalList, this.position, b, off, len, SeaweedRead.fileSize(entry));
if (bytesRead > Integer.MAX_VALUE) {
throw new IOException("Unexpected Content-Length");
}

184
other/java/hdfs3/dependency-reduced-pom.xml

@ -120,6 +120,188 @@
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.1.1</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<artifactId>hadoop-hdfs-client</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-yarn-api</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-yarn-client</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-mapreduce-client-jobclient</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-annotations</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.1.1</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<artifactId>commons-cli</artifactId>
<groupId>commons-cli</groupId>
</exclusion>
<exclusion>
<artifactId>commons-math3</artifactId>
<groupId>org.apache.commons</groupId>
</exclusion>
<exclusion>
<artifactId>commons-io</artifactId>
<groupId>commons-io</groupId>
</exclusion>
<exclusion>
<artifactId>commons-net</artifactId>
<groupId>commons-net</groupId>
</exclusion>
<exclusion>
<artifactId>commons-collections</artifactId>
<groupId>commons-collections</groupId>
</exclusion>
<exclusion>
<artifactId>javax.servlet-api</artifactId>
<groupId>javax.servlet</groupId>
</exclusion>
<exclusion>
<artifactId>jetty-server</artifactId>
<groupId>org.eclipse.jetty</groupId>
</exclusion>
<exclusion>
<artifactId>jetty-util</artifactId>
<groupId>org.eclipse.jetty</groupId>
</exclusion>
<exclusion>
<artifactId>jetty-servlet</artifactId>
<groupId>org.eclipse.jetty</groupId>
</exclusion>
<exclusion>
<artifactId>jetty-webapp</artifactId>
<groupId>org.eclipse.jetty</groupId>
</exclusion>
<exclusion>
<artifactId>jsp-api</artifactId>
<groupId>javax.servlet.jsp</groupId>
</exclusion>
<exclusion>
<artifactId>jersey-core</artifactId>
<groupId>com.sun.jersey</groupId>
</exclusion>
<exclusion>
<artifactId>jersey-servlet</artifactId>
<groupId>com.sun.jersey</groupId>
</exclusion>
<exclusion>
<artifactId>jersey-json</artifactId>
<groupId>com.sun.jersey</groupId>
</exclusion>
<exclusion>
<artifactId>jersey-server</artifactId>
<groupId>com.sun.jersey</groupId>
</exclusion>
<exclusion>
<artifactId>log4j</artifactId>
<groupId>log4j</groupId>
</exclusion>
<exclusion>
<artifactId>commons-lang</artifactId>
<groupId>commons-lang</groupId>
</exclusion>
<exclusion>
<artifactId>commons-beanutils</artifactId>
<groupId>commons-beanutils</groupId>
</exclusion>
<exclusion>
<artifactId>commons-configuration2</artifactId>
<groupId>org.apache.commons</groupId>
</exclusion>
<exclusion>
<artifactId>commons-lang3</artifactId>
<groupId>org.apache.commons</groupId>
</exclusion>
<exclusion>
<artifactId>slf4j-log4j12</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
<exclusion>
<artifactId>avro</artifactId>
<groupId>org.apache.avro</groupId>
</exclusion>
<exclusion>
<artifactId>re2j</artifactId>
<groupId>com.google.re2j</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-auth</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>jsch</artifactId>
<groupId>com.jcraft</groupId>
</exclusion>
<exclusion>
<artifactId>curator-client</artifactId>
<groupId>org.apache.curator</groupId>
</exclusion>
<exclusion>
<artifactId>curator-recipes</artifactId>
<groupId>org.apache.curator</groupId>
</exclusion>
<exclusion>
<artifactId>htrace-core4</artifactId>
<groupId>org.apache.htrace</groupId>
</exclusion>
<exclusion>
<artifactId>zookeeper</artifactId>
<groupId>org.apache.zookeeper</groupId>
</exclusion>
<exclusion>
<artifactId>commons-compress</artifactId>
<groupId>org.apache.commons</groupId>
</exclusion>
<exclusion>
<artifactId>kerb-simplekdc</artifactId>
<groupId>org.apache.kerby</groupId>
</exclusion>
<exclusion>
<artifactId>jackson-databind</artifactId>
<groupId>com.fasterxml.jackson.core</groupId>
</exclusion>
<exclusion>
<artifactId>stax2-api</artifactId>
<groupId>org.codehaus.woodstox</groupId>
</exclusion>
<exclusion>
<artifactId>woodstox-core</artifactId>
<groupId>com.fasterxml.woodstox</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-annotations</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
<distributionManagement>
<snapshotRepository>
<id>ossrh</id>
@ -127,7 +309,7 @@
</snapshotRepository>
</distributionManagement>
<properties>
<seaweedfs.client.version>1.4.4</seaweedfs.client.version>
<seaweedfs.client.version>1.4.8</seaweedfs.client.version>
<hadoop.version>3.1.1</hadoop.version>
</properties>
</project>

4
other/java/hdfs3/pom.xml

@ -5,7 +5,7 @@
<modelVersion>4.0.0</modelVersion>
<properties>
<seaweedfs.client.version>1.4.4</seaweedfs.client.version>
<seaweedfs.client.version>1.4.8</seaweedfs.client.version>
<hadoop.version>3.1.1</hadoop.version>
</properties>
@ -147,6 +147,7 @@
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.github.chrislusf</groupId>
@ -157,6 +158,7 @@
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
</dependency>
</dependencies>

29
other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedFileSystem.java

@ -5,7 +5,6 @@ import org.apache.hadoop.fs.*;
import org.apache.hadoop.fs.permission.AclEntry;
import org.apache.hadoop.fs.permission.AclStatus;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.Progressable;
import org.slf4j.Logger;
@ -14,20 +13,19 @@ import seaweedfs.client.FilerProto;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import java.util.EnumSet;
import java.util.List;
import java.util.Map;
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY;
public class SeaweedFileSystem extends FileSystem {
public static final int FS_SEAWEED_DEFAULT_PORT = 8888;
public static final String FS_SEAWEED_FILER_HOST = "fs.seaweed.filer.host";
public static final String FS_SEAWEED_FILER_PORT = "fs.seaweed.filer.port";
public static final int FS_SEAWEED_DEFAULT_PORT = 8888;
public static final String FS_SEAWEED_BUFFER_SIZE = "fs.seaweed.buffer.size";
public static final int FS_SEAWEED_DEFAULT_BUFFER_SIZE = 4 * 1024 * 1024;
private static final Logger LOG = LoggerFactory.getLogger(SeaweedFileSystem.class);
@ -75,8 +73,9 @@ public class SeaweedFileSystem extends FileSystem {
path = qualify(path);
try {
FSInputStream inputStream = seaweedFileSystemStore.openFileForRead(path, statistics, bufferSize);
return new FSDataInputStream(new BufferedFSInputStream(inputStream, 16 * 1024 * 1024));
int seaweedBufferSize = this.getConf().getInt(FS_SEAWEED_BUFFER_SIZE, FS_SEAWEED_DEFAULT_BUFFER_SIZE);
FSInputStream inputStream = seaweedFileSystemStore.openFileForRead(path, statistics, seaweedBufferSize);
return new FSDataInputStream(new BufferedFSInputStream(inputStream, 4 * seaweedBufferSize));
} catch (Exception ex) {
LOG.warn("open path: {} bufferSize:{}", path, bufferSize, ex);
return null;
@ -93,7 +92,8 @@ public class SeaweedFileSystem extends FileSystem {
try {
String replicaPlacement = String.format("%03d", replication - 1);
OutputStream outputStream = seaweedFileSystemStore.createFile(path, overwrite, permission, bufferSize, replicaPlacement);
int seaweedBufferSize = this.getConf().getInt(FS_SEAWEED_BUFFER_SIZE, FS_SEAWEED_DEFAULT_BUFFER_SIZE);
OutputStream outputStream = seaweedFileSystemStore.createFile(path, overwrite, permission, seaweedBufferSize, replicaPlacement);
return new FSDataOutputStream(outputStream, statistics);
} catch (Exception ex) {
LOG.warn("create path: {} bufferSize:{} blockSize:{}", path, bufferSize, blockSize, ex);
@ -103,8 +103,9 @@ public class SeaweedFileSystem extends FileSystem {
/**
* {@inheritDoc}
*
* @throws FileNotFoundException if the parent directory is not present -or
* is not a directory.
* is not a directory.
*/
@Override
public FSDataOutputStream createNonRecursive(Path path,
@ -121,9 +122,10 @@ public class SeaweedFileSystem extends FileSystem {
throw new FileAlreadyExistsException("Not a directory: " + parent);
}
}
int seaweedBufferSize = this.getConf().getInt(FS_SEAWEED_BUFFER_SIZE, FS_SEAWEED_DEFAULT_BUFFER_SIZE);
return create(path, permission,
flags.contains(CreateFlag.OVERWRITE), bufferSize,
replication, blockSize, progress);
replication, seaweedBufferSize, progress);
}
@Override
@ -133,7 +135,8 @@ public class SeaweedFileSystem extends FileSystem {
path = qualify(path);
try {
OutputStream outputStream = seaweedFileSystemStore.createFile(path, false, null, bufferSize, "");
int seaweedBufferSize = this.getConf().getInt(FS_SEAWEED_BUFFER_SIZE, FS_SEAWEED_DEFAULT_BUFFER_SIZE);
OutputStream outputStream = seaweedFileSystemStore.createFile(path, false, null, seaweedBufferSize, "");
return new FSDataOutputStream(outputStream, statistics);
} catch (Exception ex) {
LOG.warn("append path: {} bufferSize:{}", path, bufferSize, ex);
@ -338,9 +341,7 @@ public class SeaweedFileSystem extends FileSystem {
@Override
public void createSymlink(final Path target, final Path link,
final boolean createParent) throws AccessControlException,
FileAlreadyExistsException, FileNotFoundException,
ParentNotDirectoryException, UnsupportedFileSystemException,
final boolean createParent) throws
IOException {
// Supporting filesystems should override this method
throw new UnsupportedOperationException(

11
other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedFileSystemStore.java

@ -8,14 +8,10 @@ import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.security.UserGroupInformation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import seaweedfs.client.FilerClient;
import seaweedfs.client.FilerGrpcClient;
import seaweedfs.client.FilerProto;
import seaweedfs.client.SeaweedRead;
import seaweedfs.client.*;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Arrays;
@ -124,7 +120,7 @@ public class SeaweedFileSystemStore {
private FileStatus doGetFileStatus(Path path, FilerProto.Entry entry) {
FilerProto.FuseAttributes attributes = entry.getAttributes();
long length = SeaweedRead.totalSize(entry.getChunksList());
long length = SeaweedRead.fileSize(entry);
boolean isDir = entry.getIsDirectory();
int block_replication = 1;
int blocksize = 512;
@ -185,7 +181,7 @@ public class SeaweedFileSystemStore {
entry.mergeFrom(existingEntry);
entry.getAttributesBuilder().setMtime(now);
LOG.debug("createFile merged entry path:{} entry:{} from:{}", path, entry, existingEntry);
writePosition = SeaweedRead.totalSize(existingEntry.getChunksList());
writePosition = SeaweedRead.fileSize(existingEntry);
replication = existingEntry.getAttributes().getReplication();
}
}
@ -202,6 +198,7 @@ public class SeaweedFileSystemStore {
.clearGroupName()
.addAllGroupName(Arrays.asList(userGroupInformation.getGroupNames()))
);
SeaweedWrite.writeMeta(filerGrpcClient, getParentDirectory(path), entry);
}
return new SeaweedOutputStream(filerGrpcClient, path, entry, writePosition, bufferSize, replication);

4
other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedInputStream.java

@ -41,7 +41,7 @@ public class SeaweedInputStream extends FSInputStream {
this.statistics = statistics;
this.path = path;
this.entry = entry;
this.contentLength = SeaweedRead.totalSize(entry.getChunksList());
this.contentLength = SeaweedRead.fileSize(entry);
this.bufferSize = bufferSize;
this.visibleIntervalList = SeaweedRead.nonOverlappingVisibleIntervals(filerGrpcClient, entry.getChunksList());
@ -87,7 +87,7 @@ public class SeaweedInputStream extends FSInputStream {
throw new IllegalArgumentException("requested read length is more than will fit after requested offset in buffer");
}
long bytesRead = SeaweedRead.read(this.filerGrpcClient, this.visibleIntervalList, this.position, b, off, len);
long bytesRead = SeaweedRead.read(this.filerGrpcClient, this.visibleIntervalList, this.position, b, off, len, SeaweedRead.fileSize(entry));
if (bytesRead > Integer.MAX_VALUE) {
throw new IOException("Unexpected Content-Length");
}

1856
other/metrics/grafana_seaweedfs.json
File diff suppressed because it is too large
View File

58
test/random_access/pom.xml

@ -0,0 +1,58 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.seaweedfs.test</groupId>
<artifactId>random_access</artifactId>
<packaging>jar</packaging>
<version>1.0-SNAPSHOT</version>
<properties>
<guava.version>28.0-jre</guava.version>
</properties>
<dependencies>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>${guava.version}</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.25</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.esotericsoftware.kryo</groupId>
<artifactId>kryo</artifactId>
<version>2.24.0</version>
</dependency>
</dependencies>
<build>
<extensions>
<extension>
<groupId>kr.motd.maven</groupId>
<artifactId>os-maven-plugin</artifactId>
<version>1.6.2</version>
</extension>
</extensions>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>8</source>
<target>8</target>
</configuration>
</plugin>
</plugins>
</build>
</project>

753
test/random_access/src/main/java/seaweedfs/client/btree/BTreePersistentIndexedCache.java

@ -0,0 +1,753 @@
/*
* Copyright 2010 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import com.google.common.collect.ImmutableSet;
import seaweedfs.client.btree.serialize.Serializer;
import seaweedfs.client.btree.serialize.kryo.KryoBackedDecoder;
import seaweedfs.client.btree.serialize.kryo.KryoBackedEncoder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
// todo - stream serialised value to file
// todo - handle hash collisions (properly, this time)
// todo - don't store null links to child blocks in leaf index blocks
// todo - align block boundaries
// todo - thread safety control
// todo - merge small values into a single data block
// todo - discard when file corrupt
// todo - include data directly in index entry when serializer can guarantee small fixed sized data
// todo - free list leaks disk space
// todo - merge adjacent free blocks
// todo - use more efficient lookup for free block with nearest size
@SuppressWarnings("unchecked")
public class BTreePersistentIndexedCache<K, V> {
private static final Logger LOGGER = LoggerFactory.getLogger(BTreePersistentIndexedCache.class);
private final File cacheFile;
private final KeyHasher<K> keyHasher;
private final Serializer<V> serializer;
private final short maxChildIndexEntries;
private final int minIndexChildNodes;
private final StateCheckBlockStore store;
private HeaderBlock header;
public BTreePersistentIndexedCache(File cacheFile, Serializer<K> keySerializer, Serializer<V> valueSerializer) {
this(cacheFile, keySerializer, valueSerializer, (short) 512, 512);
}
public BTreePersistentIndexedCache(File cacheFile, Serializer<K> keySerializer, Serializer<V> valueSerializer,
short maxChildIndexEntries, int maxFreeListEntries) {
this.cacheFile = cacheFile;
this.keyHasher = new KeyHasher<K>(keySerializer);
this.serializer = valueSerializer;
this.maxChildIndexEntries = maxChildIndexEntries;
this.minIndexChildNodes = maxChildIndexEntries / 2;
BlockStore cachingStore = new CachingBlockStore(new FileBackedBlockStore(cacheFile), ImmutableSet.of(IndexBlock.class, FreeListBlockStore.FreeListBlock.class));
this.store = new StateCheckBlockStore(new FreeListBlockStore(cachingStore, maxFreeListEntries));
try {
open();
} catch (Exception e) {
throw new UncheckedIOException(String.format("Could not open %s.", this), e);
}
}
@Override
public String toString() {
return "cache " + cacheFile.getName() + " (" + cacheFile + ")";
}
private void open() throws Exception {
LOGGER.debug("Opening {}", this);
try {
doOpen();
} catch (CorruptedCacheException e) {
rebuild();
}
}
private void doOpen() throws Exception {
BlockStore.Factory factory = new BlockStore.Factory() {
@Override
public Object create(Class<? extends BlockPayload> type) {
if (type == HeaderBlock.class) {
return new HeaderBlock();
}
if (type == IndexBlock.class) {
return new IndexBlock();
}
if (type == DataBlock.class) {
return new DataBlock();
}
throw new UnsupportedOperationException();
}
};
Runnable initAction = new Runnable() {
@Override
public void run() {
header = new HeaderBlock();
store.write(header);
header.index.newRoot();
store.flush();
}
};
store.open(initAction, factory);
header = store.readFirst(HeaderBlock.class);
}
public V get(K key) {
try {
try {
DataBlock block = header.getRoot().get(key);
if (block != null) {
return block.getValue();
}
return null;
} catch (CorruptedCacheException e) {
rebuild();
return null;
}
} catch (Exception e) {
throw new UncheckedIOException(String.format("Could not read entry '%s' from %s.", key, this), e);
}
}
public void put(K key, V value) {
try {
long hashCode = keyHasher.getHashCode(key);
Lookup lookup = header.getRoot().find(hashCode);
DataBlock newBlock = null;
if (lookup.entry != null) {
DataBlock block = store.read(lookup.entry.dataBlock, DataBlock.class);
DataBlockUpdateResult updateResult = block.useNewValue(value);
if (updateResult.isFailed()) {
store.remove(block);
newBlock = new DataBlock(value, updateResult.getSerializedValue());
}
} else {
newBlock = new DataBlock(value);
}
if (newBlock != null) {
store.write(newBlock);
lookup.indexBlock.put(hashCode, newBlock.getPos());
}
store.flush();
} catch (Exception e) {
throw new UncheckedIOException(String.format("Could not add entry '%s' to %s.", key, this), e);
}
}
public void remove(K key) {
try {
Lookup lookup = header.getRoot().find(key);
if (lookup.entry == null) {
return;
}
lookup.indexBlock.remove(lookup.entry);
DataBlock block = store.read(lookup.entry.dataBlock, DataBlock.class);
store.remove(block);
store.flush();
} catch (Exception e) {
throw new UncheckedIOException(String.format("Could not remove entry '%s' from %s.", key, this), e);
}
}
private IndexBlock load(BlockPointer pos, IndexRoot root, IndexBlock parent, int index) {
IndexBlock block = store.read(pos, IndexBlock.class);
block.root = root;
block.parent = parent;
block.parentEntryIndex = index;
return block;
}
public void reset() {
close();
try {
open();
} catch (Exception e) {
throw new UncheckedIOException(e);
}
}
public void close() {
LOGGER.debug("Closing {}", this);
try {
store.close();
} catch (Exception e) {
throw new UncheckedIOException(e);
}
}
public boolean isOpen() {
return store.isOpen();
}
private void rebuild() {
LOGGER.warn("{} is corrupt. Discarding.", this);
try {
clear();
} catch (Exception e) {
LOGGER.warn("{} couldn't be rebuilt. Closing.", this);
close();
}
}
public void verify() {
try {
doVerify();
} catch (Exception e) {
throw new UncheckedIOException(String.format("Some problems were found when checking the integrity of %s.",
this), e);
}
}
private void doVerify() throws Exception {
List<BlockPayload> blocks = new ArrayList<BlockPayload>();
HeaderBlock header = store.readFirst(HeaderBlock.class);
blocks.add(header);
verifyTree(header.getRoot(), "", blocks, Long.MAX_VALUE, true);
Collections.sort(blocks, new Comparator<BlockPayload>() {
@Override
public int compare(BlockPayload block, BlockPayload block1) {
return block.getPos().compareTo(block1.getPos());
}
});
for (int i = 0; i < blocks.size() - 1; i++) {
Block b1 = blocks.get(i).getBlock();
Block b2 = blocks.get(i + 1).getBlock();
if (b1.getPos().getPos() + b1.getSize() > b2.getPos().getPos()) {
throw new IOException(String.format("%s overlaps with %s", b1, b2));
}
}
}
private void verifyTree(IndexBlock current, String prefix, Collection<BlockPayload> blocks, long maxValue,
boolean loadData) throws Exception {
blocks.add(current);
if (!prefix.equals("") && current.entries.size() < maxChildIndexEntries / 2) {
throw new IOException(String.format("Too few entries found in %s", current));
}
if (current.entries.size() > maxChildIndexEntries) {
throw new IOException(String.format("Too many entries found in %s", current));
}
boolean isLeaf = current.entries.size() == 0 || current.entries.get(0).childIndexBlock.isNull();
if (isLeaf ^ current.tailPos.isNull()) {
throw new IOException(String.format("Mismatched leaf/tail-node in %s", current));
}
long min = Long.MIN_VALUE;
for (IndexEntry entry : current.entries) {
if (isLeaf ^ entry.childIndexBlock.isNull()) {
throw new IOException(String.format("Mismatched leaf/non-leaf entry in %s", current));
}
if (entry.hashCode >= maxValue || entry.hashCode <= min) {
throw new IOException(String.format("Out-of-order key in %s", current));
}
min = entry.hashCode;
if (!entry.childIndexBlock.isNull()) {
IndexBlock child = store.read(entry.childIndexBlock, IndexBlock.class);
verifyTree(child, " " + prefix, blocks, entry.hashCode, loadData);
}
if (loadData) {
DataBlock block = store.read(entry.dataBlock, DataBlock.class);
blocks.add(block);
}
}
if (!current.tailPos.isNull()) {
IndexBlock tail = store.read(current.tailPos, IndexBlock.class);
verifyTree(tail, " " + prefix, blocks, maxValue, loadData);
}
}
public void clear() {
store.clear();
close();
try {
doOpen();
} catch (Exception e) {
throw new UncheckedIOException(e);
}
}
private class IndexRoot {
private BlockPointer rootPos = BlockPointer.start();
private HeaderBlock owner;
private IndexRoot(HeaderBlock owner) {
this.owner = owner;
}
public void setRootPos(BlockPointer rootPos) {
this.rootPos = rootPos;
store.write(owner);
}
public IndexBlock getRoot() {
return load(rootPos, this, null, 0);
}
public IndexBlock newRoot() {
IndexBlock block = new IndexBlock();
store.write(block);
setRootPos(block.getPos());
return block;
}
}
private class HeaderBlock extends BlockPayload {
private IndexRoot index;
private HeaderBlock() {
index = new IndexRoot(this);
}
@Override
protected byte getType() {
return 0x55;
}
@Override
protected int getSize() {
return Block.LONG_SIZE + Block.SHORT_SIZE;
}
@Override
protected void read(DataInputStream instr) throws Exception {
index.rootPos = BlockPointer.pos(instr.readLong());
short actualChildIndexEntries = instr.readShort();
if (actualChildIndexEntries != maxChildIndexEntries) {
throw blockCorruptedException();
}
}
@Override
protected void write(DataOutputStream outstr) throws Exception {
outstr.writeLong(index.rootPos.getPos());
outstr.writeShort(maxChildIndexEntries);
}
public IndexBlock getRoot() throws Exception {
return index.getRoot();
}
}
private class IndexBlock extends BlockPayload {
private final List<IndexEntry> entries = new ArrayList<IndexEntry>();
private BlockPointer tailPos = BlockPointer.start();
// Transient fields
private IndexBlock parent;
private int parentEntryIndex;
private IndexRoot root;
@Override
protected byte getType() {
return 0x77;
}
@Override
protected int getSize() {
return Block.INT_SIZE + Block.LONG_SIZE + (3 * Block.LONG_SIZE) * maxChildIndexEntries;
}
@Override
public void read(DataInputStream instr) throws IOException {
int count = instr.readInt();
entries.clear();
for (int i = 0; i < count; i++) {
IndexEntry entry = new IndexEntry();
entry.hashCode = instr.readLong();
entry.dataBlock = BlockPointer.pos(instr.readLong());
entry.childIndexBlock = BlockPointer.pos(instr.readLong());
entries.add(entry);
}
tailPos = BlockPointer.pos(instr.readLong());
}
@Override
public void write(DataOutputStream outstr) throws IOException {
outstr.writeInt(entries.size());
for (IndexEntry entry : entries) {
outstr.writeLong(entry.hashCode);
outstr.writeLong(entry.dataBlock.getPos());
outstr.writeLong(entry.childIndexBlock.getPos());
}
outstr.writeLong(tailPos.getPos());
}
public void put(long hashCode, BlockPointer pos) throws Exception {
int index = Collections.binarySearch(entries, new IndexEntry(hashCode));
IndexEntry entry;
if (index >= 0) {
entry = entries.get(index);
} else {
assert tailPos.isNull();
entry = new IndexEntry();
entry.hashCode = hashCode;
entry.childIndexBlock = BlockPointer.start();
index = -index - 1;
entries.add(index, entry);
}
entry.dataBlock = pos;
store.write(this);
maybeSplit();
}
private void maybeSplit() throws Exception {
if (entries.size() > maxChildIndexEntries) {
int splitPos = entries.size() / 2;
IndexEntry splitEntry = entries.remove(splitPos);
if (parent == null) {
parent = root.newRoot();
}
IndexBlock sibling = new IndexBlock();
store.write(sibling);
List<IndexEntry> siblingEntries = entries.subList(splitPos, entries.size());
sibling.entries.addAll(siblingEntries);
siblingEntries.clear();
sibling.tailPos = tailPos;
tailPos = splitEntry.childIndexBlock;
splitEntry.childIndexBlock = BlockPointer.start();
parent.add(this, splitEntry, sibling);
}
}
private void add(IndexBlock left, IndexEntry entry, IndexBlock right) throws Exception {
int index = left.parentEntryIndex;
if (index < entries.size()) {
IndexEntry parentEntry = entries.get(index);
assert parentEntry.childIndexBlock.equals(left.getPos());
parentEntry.childIndexBlock = right.getPos();
} else {
assert index == entries.size() && (tailPos.isNull() || tailPos.equals(left.getPos()));
tailPos = right.getPos();
}
entries.add(index, entry);
entry.childIndexBlock = left.getPos();
store.write(this);
maybeSplit();
}
public DataBlock get(K key) throws Exception {
Lookup lookup = find(key);
if (lookup.entry == null) {
return null;
}
return store.read(lookup.entry.dataBlock, DataBlock.class);
}
public Lookup find(K key) throws Exception {
long checksum = keyHasher.getHashCode(key);
return find(checksum);
}
private Lookup find(long hashCode) throws Exception {
int index = Collections.binarySearch(entries, new IndexEntry(hashCode));
if (index >= 0) {
return new Lookup(this, entries.get(index));
}
index = -index - 1;
BlockPointer childBlockPos;
if (index == entries.size()) {
childBlockPos = tailPos;
} else {
childBlockPos = entries.get(index).childIndexBlock;
}
if (childBlockPos.isNull()) {
return new Lookup(this, null);
}
IndexBlock childBlock = load(childBlockPos, root, this, index);
return childBlock.find(hashCode);
}
public void remove(IndexEntry entry) throws Exception {
int index = entries.indexOf(entry);
assert index >= 0;
entries.remove(index);
store.write(this);
if (entry.childIndexBlock.isNull()) {
maybeMerge();
} else {
// Not a leaf node. Move up an entry from a leaf node, then possibly merge the leaf node
IndexBlock leafBlock = load(entry.childIndexBlock, root, this, index);
leafBlock = leafBlock.findHighestLeaf();
IndexEntry highestEntry = leafBlock.entries.remove(leafBlock.entries.size() - 1);
highestEntry.childIndexBlock = entry.childIndexBlock;
entries.add(index, highestEntry);
store.write(leafBlock);
leafBlock.maybeMerge();
}
}
private void maybeMerge() throws Exception {
if (parent == null) {
// This is the root block. Can have any number of children <= maxChildIndexEntries
if (entries.size() == 0 && !tailPos.isNull()) {
// This is an empty root block, discard it
header.index.setRootPos(tailPos);
store.remove(this);
}
return;
}
// This is not the root block. Must have children >= minIndexChildNodes
if (entries.size() >= minIndexChildNodes) {
return;
}
// Attempt to merge with the left sibling
IndexBlock left = parent.getPrevious(this);
if (left != null) {
assert entries.size() + left.entries.size() <= maxChildIndexEntries * 2;
if (left.entries.size() > minIndexChildNodes) {
// There are enough entries in this block and the left sibling to make up 2 blocks, so redistribute
// the entries evenly between them
left.mergeFrom(this);
left.maybeSplit();
return;
} else {
// There are only enough entries to make up 1 block, so move the entries of the left sibling into
// this block and discard the left sibling. Might also need to merge the parent
left.mergeFrom(this);
parent.maybeMerge();
return;
}
}
// Attempt to merge with the right sibling
IndexBlock right = parent.getNext(this);
if (right != null) {
assert entries.size() + right.entries.size() <= maxChildIndexEntries * 2;
if (right.entries.size() > minIndexChildNodes) {
// There are enough entries in this block and the right sibling to make up 2 blocks, so redistribute
// the entries evenly between them
mergeFrom(right);
maybeSplit();
return;
} else {
// There are only enough entries to make up 1 block, so move the entries of the right sibling into
// this block and discard this block. Might also need to merge the parent
mergeFrom(right);
parent.maybeMerge();
return;
}
}
// Should not happen
throw new IllegalStateException(String.format("%s does not have any siblings.", getBlock()));
}
private void mergeFrom(IndexBlock right) throws Exception {
IndexEntry newChildEntry = parent.entries.remove(parentEntryIndex);
if (right.getPos().equals(parent.tailPos)) {
parent.tailPos = getPos();
} else {
IndexEntry newParentEntry = parent.entries.get(parentEntryIndex);
assert newParentEntry.childIndexBlock.equals(right.getPos());
newParentEntry.childIndexBlock = getPos();
}
entries.add(newChildEntry);
entries.addAll(right.entries);
newChildEntry.childIndexBlock = tailPos;
tailPos = right.tailPos;
store.write(parent);
store.write(this);
store.remove(right);
}
private IndexBlock getNext(IndexBlock indexBlock) throws Exception {
int index = indexBlock.parentEntryIndex + 1;
if (index > entries.size()) {
return null;
}
if (index == entries.size()) {
return load(tailPos, root, this, index);
}
return load(entries.get(index).childIndexBlock, root, this, index);
}
private IndexBlock getPrevious(IndexBlock indexBlock) throws Exception {
int index = indexBlock.parentEntryIndex - 1;
if (index < 0) {
return null;
}
return load(entries.get(index).childIndexBlock, root, this, index);
}
private IndexBlock findHighestLeaf() throws Exception {
if (tailPos.isNull()) {
return this;
}
return load(tailPos, root, this, entries.size()).findHighestLeaf();
}
}
private static class IndexEntry implements Comparable<IndexEntry> {
long hashCode;
BlockPointer dataBlock;
BlockPointer childIndexBlock;
private IndexEntry() {
}
private IndexEntry(long hashCode) {
this.hashCode = hashCode;
}
@Override
public int compareTo(IndexEntry indexEntry) {
if (hashCode > indexEntry.hashCode) {
return 1;
}
if (hashCode < indexEntry.hashCode) {
return -1;
}
return 0;
}
}
private class Lookup {
final IndexBlock indexBlock;
final IndexEntry entry;
private Lookup(IndexBlock indexBlock, IndexEntry entry) {
this.indexBlock = indexBlock;
this.entry = entry;
}
}
private class DataBlock extends BlockPayload {
private int size;
private StreamByteBuffer buffer;
private V value;
private DataBlock() {
}
public DataBlock(V value) throws Exception {
this.value = value;
setValue(value);
size = buffer.totalBytesUnread();
}
public DataBlock(V value, StreamByteBuffer buffer) throws Exception {
this.value = value;
this.buffer = buffer;
size = buffer.totalBytesUnread();
}
public void setValue(V value) throws Exception {
buffer = StreamByteBuffer.createWithChunkSizeInDefaultRange(size);
KryoBackedEncoder encoder = new KryoBackedEncoder(buffer.getOutputStream());
serializer.write(encoder, value);
encoder.flush();
}
public V getValue() throws Exception {
if (value == null) {
value = serializer.read(new KryoBackedDecoder(buffer.getInputStream()));
buffer = null;
}
return value;
}
@Override
protected byte getType() {
return 0x33;
}
@Override
protected int getSize() {
return 2 * Block.INT_SIZE + size;
}
@Override
public void read(DataInputStream instr) throws Exception {
size = instr.readInt();
int bytes = instr.readInt();
buffer = StreamByteBuffer.of(instr, bytes);
}
@Override
public void write(DataOutputStream outstr) throws Exception {
outstr.writeInt(size);
outstr.writeInt(buffer.totalBytesUnread());
buffer.writeTo(outstr);
buffer = null;
}
public DataBlockUpdateResult useNewValue(V value) throws Exception {
setValue(value);
boolean ok = buffer.totalBytesUnread() <= size;
if (ok) {
this.value = value;
store.write(this);
return DataBlockUpdateResult.success();
} else {
return DataBlockUpdateResult.failed(buffer);
}
}
}
private static class DataBlockUpdateResult {
private static final DataBlockUpdateResult SUCCESS = new DataBlockUpdateResult(true, null);
private final boolean success;
private final StreamByteBuffer serializedValue;
private DataBlockUpdateResult(boolean success, StreamByteBuffer serializedValue) {
this.success = success;
this.serializedValue = serializedValue;
}
static DataBlockUpdateResult success() {
return SUCCESS;
}
static DataBlockUpdateResult failed(StreamByteBuffer serializedValue) {
return new DataBlockUpdateResult(false, serializedValue);
}
public boolean isFailed() {
return !success;
}
public StreamByteBuffer getSerializedValue() {
return serializedValue;
}
}
}

59
test/random_access/src/main/java/seaweedfs/client/btree/Block.java

@ -0,0 +1,59 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
public abstract class Block {
static final int LONG_SIZE = 8;
static final int INT_SIZE = 4;
static final int SHORT_SIZE = 2;
private BlockPayload payload;
protected Block(BlockPayload payload) {
this.payload = payload;
payload.setBlock(this);
}
public BlockPayload getPayload() {
return payload;
}
protected void detach() {
payload.setBlock(null);
payload = null;
}
public abstract BlockPointer getPos();
public abstract int getSize();
public abstract RuntimeException blockCorruptedException();
@Override
public String toString() {
return payload.getClass().getSimpleName() + " " + getPos();
}
public BlockPointer getNextPos() {
return BlockPointer.pos(getPos().getPos() + getSize());
}
public abstract boolean hasPos();
public abstract void setPos(BlockPointer pos);
public abstract void setSize(int size);
}

51
test/random_access/src/main/java/seaweedfs/client/btree/BlockPayload.java

@ -0,0 +1,51 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import java.io.DataInputStream;
import java.io.DataOutputStream;
public abstract class BlockPayload {
private Block block;
public Block getBlock() {
return block;
}
public void setBlock(Block block) {
this.block = block;
}
public BlockPointer getPos() {
return getBlock().getPos();
}
public BlockPointer getNextPos() {
return getBlock().getNextPos();
}
protected abstract int getSize();
protected abstract byte getType();
protected abstract void read(DataInputStream inputStream) throws Exception;
protected abstract void write(DataOutputStream outputStream) throws Exception;
protected RuntimeException blockCorruptedException() {
return getBlock().blockCorruptedException();
}
}

75
test/random_access/src/main/java/seaweedfs/client/btree/BlockPointer.java

@ -0,0 +1,75 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import com.google.common.primitives.Longs;
public class BlockPointer implements Comparable<BlockPointer> {
private static final BlockPointer NULL = new BlockPointer(-1);
public static BlockPointer start() {
return NULL;
}
public static BlockPointer pos(long pos) {
if (pos < -1) {
throw new CorruptedCacheException("block pointer must be >= -1, but was" + pos);
}
if (pos == -1) {
return NULL;
}
return new BlockPointer(pos);
}
private final long pos;
private BlockPointer(long pos) {
this.pos = pos;
}
public boolean isNull() {
return pos < 0;
}
public long getPos() {
return pos;
}
@Override
public String toString() {
return String.valueOf(pos);
}
@Override
public boolean equals(Object obj) {
if (obj == null || obj.getClass() != getClass()) {
return false;
}
BlockPointer other = (BlockPointer) obj;
return pos == other.pos;
}
@Override
public int hashCode() {
return Longs.hashCode(pos);
}
@Override
public int compareTo(BlockPointer o) {
return Longs.compare(pos, o.pos);
}
}

68
test/random_access/src/main/java/seaweedfs/client/btree/BlockStore.java

@ -0,0 +1,68 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
public interface BlockStore {
/**
* Opens this store, calling the given action if the store is empty.
*/
void open(Runnable initAction, Factory factory);
/**
* Closes this store.
*/
void close();
/**
* Discards all blocks from this store.
*/
void clear();
/**
* Removes the given block from this store.
*/
void remove(BlockPayload block);
/**
* Reads the first block from this store.
*/
<T extends BlockPayload> T readFirst(Class<T> payloadType);
/**
* Reads a block from this store.
*/
<T extends BlockPayload> T read(BlockPointer pos, Class<T> payloadType);
/**
* Writes a block to this store, adding the block if required.
*/
void write(BlockPayload block);
/**
* Adds a new block to this store. Allocates space for the block, but does not write the contents of the block
* until {@link #write(BlockPayload)} is called.
*/
void attach(BlockPayload block);
/**
* Flushes any pending updates for this store.
*/
void flush();
interface Factory {
Object create(Class<? extends BlockPayload> type);
}
}

30
test/random_access/src/main/java/seaweedfs/client/btree/BufferCaster.java

@ -0,0 +1,30 @@
/*
* Copyright 2018 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import java.nio.Buffer;
public class BufferCaster {
/**
* Without this cast, when the code compiled by Java 9+ is executed on Java 8, it will throw
* java.lang.NoSuchMethodError: Method flip()Ljava/nio/ByteBuffer; does not exist in class java.nio.ByteBuffer
*/
@SuppressWarnings("RedundantCast")
public static <T extends Buffer> Buffer cast(T byteBuffer) {
return (Buffer) byteBuffer;
}
}

74
test/random_access/src/main/java/seaweedfs/client/btree/ByteInput.java

@ -0,0 +1,74 @@
/*
* Copyright 2014 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import com.google.common.io.CountingInputStream;
import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
/**
* Allows a stream of bytes to be read from a particular location of some backing byte stream.
*/
class ByteInput {
private final RandomAccessFile file;
private final ResettableBufferedInputStream bufferedInputStream;
private CountingInputStream countingInputStream;
public ByteInput(RandomAccessFile file) {
this.file = file;
bufferedInputStream = new ResettableBufferedInputStream(new RandomAccessFileInputStream(file));
}
/**
* Starts reading from the given offset.
*/
public DataInputStream start(long offset) throws IOException {
file.seek(offset);
bufferedInputStream.clear();
countingInputStream = new CountingInputStream(bufferedInputStream);
return new DataInputStream(countingInputStream);
}
/**
* Returns the number of bytes read since {@link #start(long)} was called.
*/
public long getBytesRead() {
return countingInputStream.getCount();
}
/**
* Finishes reading, resetting any buffered state.
*/
public void done() {
countingInputStream = null;
}
private static class ResettableBufferedInputStream extends BufferedInputStream {
ResettableBufferedInputStream(InputStream input) {
super(input);
}
void clear() {
count = 0;
pos = 0;
}
}
}

74
test/random_access/src/main/java/seaweedfs/client/btree/ByteOutput.java

@ -0,0 +1,74 @@
/*
* Copyright 2014 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import com.google.common.io.CountingOutputStream;
import java.io.BufferedOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.RandomAccessFile;
/**
* Allows a stream of bytes to be written to a particular location of some backing byte stream.
*/
class ByteOutput {
private final RandomAccessFile file;
private final ResettableBufferedOutputStream bufferedOutputStream;
private CountingOutputStream countingOutputStream;
public ByteOutput(RandomAccessFile file) {
this.file = file;
bufferedOutputStream = new ResettableBufferedOutputStream(new RandomAccessFileOutputStream(file));
}
/**
* Starts writing to the given offset. Can be beyond the current length of the file.
*/
public DataOutputStream start(long offset) throws IOException {
file.seek(offset);
bufferedOutputStream.clear();
countingOutputStream = new CountingOutputStream(bufferedOutputStream);
return new DataOutputStream(countingOutputStream);
}
/**
* Returns the number of byte written since {@link #start(long)} was called.
*/
public long getBytesWritten() {
return countingOutputStream.getCount();
}
/**
* Finishes writing, flushing and resetting any buffered state
*/
public void done() throws IOException {
countingOutputStream.flush();
countingOutputStream = null;
}
private static class ResettableBufferedOutputStream extends BufferedOutputStream {
ResettableBufferedOutputStream(OutputStream output) {
super(output);
}
void clear() {
count = 0;
}
}
}

129
test/random_access/src/main/java/seaweedfs/client/btree/CachingBlockStore.java

@ -0,0 +1,129 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.common.collect.ImmutableSet;
import javax.annotation.Nullable;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
public class CachingBlockStore implements BlockStore {
private final BlockStore store;
private final Map<BlockPointer, BlockPayload> dirty = new LinkedHashMap<BlockPointer, BlockPayload>();
private final Cache<BlockPointer, BlockPayload> indexBlockCache = CacheBuilder.newBuilder().maximumSize(100).concurrencyLevel(1).build();
private final ImmutableSet<Class<? extends BlockPayload>> cacheableBlockTypes;
public CachingBlockStore(BlockStore store, Collection<Class<? extends BlockPayload>> cacheableBlockTypes) {
this.store = store;
this.cacheableBlockTypes = ImmutableSet.copyOf(cacheableBlockTypes);
}
@Override
public void open(Runnable initAction, Factory factory) {
store.open(initAction, factory);
}
@Override
public void close() {
flush();
indexBlockCache.invalidateAll();
store.close();
}
@Override
public void clear() {
dirty.clear();
indexBlockCache.invalidateAll();
store.clear();
}
@Override
public void flush() {
Iterator<BlockPayload> iterator = dirty.values().iterator();
while (iterator.hasNext()) {
BlockPayload block = iterator.next();
iterator.remove();
store.write(block);
}
store.flush();
}
@Override
public void attach(BlockPayload block) {
store.attach(block);
}
@Override
public void remove(BlockPayload block) {
dirty.remove(block.getPos());
if (isCacheable(block)) {
indexBlockCache.invalidate(block.getPos());
}
store.remove(block);
}
@Override
public <T extends BlockPayload> T readFirst(Class<T> payloadType) {
T block = store.readFirst(payloadType);
maybeCache(block);
return block;
}
@Override
public <T extends BlockPayload> T read(BlockPointer pos, Class<T> payloadType) {
T block = payloadType.cast(dirty.get(pos));
if (block != null) {
return block;
}
block = maybeGetFromCache(pos, payloadType);
if (block != null) {
return block;
}
block = store.read(pos, payloadType);
maybeCache(block);
return block;
}
@Nullable
private <T extends BlockPayload> T maybeGetFromCache(BlockPointer pos, Class<T> payloadType) {
if (cacheableBlockTypes.contains(payloadType)) {
return payloadType.cast(indexBlockCache.getIfPresent(pos));
}
return null;
}
@Override
public void write(BlockPayload block) {
store.attach(block);
maybeCache(block);
dirty.put(block.getPos(), block);
}
private <T extends BlockPayload> void maybeCache(T block) {
if (isCacheable(block)) {
indexBlockCache.put(block.getPos(), block);
}
}
private <T extends BlockPayload> boolean isCacheable(T block) {
return cacheableBlockTypes.contains(block.getClass());
}
}

22
test/random_access/src/main/java/seaweedfs/client/btree/CorruptedCacheException.java

@ -0,0 +1,22 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
class CorruptedCacheException extends RuntimeException {
CorruptedCacheException(String message) {
super(message);
}
}

274
test/random_access/src/main/java/seaweedfs/client/btree/FileBackedBlockStore.java

@ -0,0 +1,274 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.RandomAccessFile;
public class FileBackedBlockStore implements BlockStore {
private final File cacheFile;
private RandomAccessFile file;
private ByteOutput output;
private ByteInput input;
private long nextBlock;
private Factory factory;
private long currentFileSize;
public FileBackedBlockStore(File cacheFile) {
this.cacheFile = cacheFile;
}
@Override
public String toString() {
return "cache '" + cacheFile + "'";
}
@Override
public void open(Runnable runnable, Factory factory) {
this.factory = factory;
try {
cacheFile.getParentFile().mkdirs();
file = openRandomAccessFile();
output = new ByteOutput(file);
input = new ByteInput(file);
currentFileSize = file.length();
nextBlock = currentFileSize;
if (currentFileSize == 0) {
runnable.run();
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
private RandomAccessFile openRandomAccessFile() throws FileNotFoundException {
try {
return randomAccessFile("rw");
} catch (FileNotFoundException e) {
return randomAccessFile("r");
}
}
private RandomAccessFile randomAccessFile(String mode) throws FileNotFoundException {
return new RandomAccessFile(cacheFile, mode);
}
@Override
public void close() {
try {
file.close();
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
@Override
public void clear() {
try {
file.setLength(0);
currentFileSize = 0;
} catch (IOException e) {
throw new UncheckedIOException(e);
}
nextBlock = 0;
}
@Override
public void attach(BlockPayload block) {
if (block.getBlock() == null) {
block.setBlock(new BlockImpl(block));
}
}
@Override
public void remove(BlockPayload block) {
BlockImpl blockImpl = (BlockImpl) block.getBlock();
blockImpl.detach();
}
@Override
public void flush() {
}
@Override
public <T extends BlockPayload> T readFirst(Class<T> payloadType) {
return read(BlockPointer.pos(0), payloadType);
}
@Override
public <T extends BlockPayload> T read(BlockPointer pos, Class<T> payloadType) {
assert !pos.isNull();
try {
T payload = payloadType.cast(factory.create(payloadType));
BlockImpl block = new BlockImpl(payload, pos);
block.read();
return payload;
} catch (CorruptedCacheException e) {
throw e;
} catch (Exception e) {
throw new UncheckedIOException(e);
}
}
@Override
public void write(BlockPayload block) {
BlockImpl blockImpl = (BlockImpl) block.getBlock();
try {
blockImpl.write();
} catch (CorruptedCacheException e) {
throw e;
} catch (Exception e) {
throw new UncheckedIOException(e);
}
}
private long alloc(long length) {
long pos = nextBlock;
nextBlock += length;
return pos;
}
private final class BlockImpl extends Block {
private static final int HEADER_SIZE = 1 + INT_SIZE; // type, payload size
private static final int TAIL_SIZE = INT_SIZE;
private BlockPointer pos;
private int payloadSize;
private BlockImpl(BlockPayload payload, BlockPointer pos) {
this(payload);
setPos(pos);
}
public BlockImpl(BlockPayload payload) {
super(payload);
pos = null;
payloadSize = -1;
}
@Override
public boolean hasPos() {
return pos != null;
}
@Override
public BlockPointer getPos() {
if (pos == null) {
pos = BlockPointer.pos(alloc(getSize()));
}
return pos;
}
@Override
public void setPos(BlockPointer pos) {
assert this.pos == null && !pos.isNull();
this.pos = pos;
}
@Override
public int getSize() {
if (payloadSize < 0) {
payloadSize = getPayload().getSize();
}
return payloadSize + HEADER_SIZE + TAIL_SIZE;
}
@Override
public void setSize(int size) {
int newPayloadSize = size - HEADER_SIZE - TAIL_SIZE;
assert newPayloadSize >= payloadSize;
payloadSize = newPayloadSize;
}
public void write() throws Exception {
long pos = getPos().getPos();
DataOutputStream outputStream = output.start(pos);
BlockPayload payload = getPayload();
// Write header
outputStream.writeByte(payload.getType());
outputStream.writeInt(payloadSize);
long finalSize = pos + HEADER_SIZE + TAIL_SIZE + payloadSize;
// Write body
payload.write(outputStream);
// Write count
long bytesWritten = output.getBytesWritten();
if (bytesWritten > Integer.MAX_VALUE) {
throw new IllegalArgumentException("Block payload exceeds maximum size");
}
outputStream.writeInt((int) bytesWritten);
output.done();
// System.out.println(String.format("wrote [%d,%d)", pos, pos + bytesWritten + 4));
// Pad
if (currentFileSize < finalSize) {
// System.out.println(String.format("pad length %d => %d", currentFileSize, finalSize));
file.setLength(finalSize);
currentFileSize = finalSize;
}
}
public void read() throws Exception {
long pos = getPos().getPos();
assert pos >= 0;
if (pos + HEADER_SIZE >= currentFileSize) {
throw blockCorruptedException();
}
DataInputStream inputStream = input.start(pos);
BlockPayload payload = getPayload();
// Read header
byte type = inputStream.readByte();
if (type != payload.getType()) {
throw blockCorruptedException();
}
// Read body
payloadSize = inputStream.readInt();
if (pos + HEADER_SIZE + TAIL_SIZE + payloadSize > currentFileSize) {
throw blockCorruptedException();
}
payload.read(inputStream);
// Read and verify count
long actualCount = input.getBytesRead();
long count = inputStream.readInt();
if (actualCount != count) {
System.out.println(String.format("read expected %d actual %d, pos %d payloadSize %d currentFileSize %d", count, actualCount, pos, payloadSize, currentFileSize));
throw blockCorruptedException();
}
input.done();
}
@Override
public RuntimeException blockCorruptedException() {
return new CorruptedCacheException(String.format("Corrupted %s found in %s.", this,
FileBackedBlockStore.this));
}
}
}

283
test/random_access/src/main/java/seaweedfs/client/btree/FreeListBlockStore.java

@ -0,0 +1,283 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
public class FreeListBlockStore implements BlockStore {
private final BlockStore store;
private final BlockStore freeListStore;
private final int maxBlockEntries;
private FreeListBlock freeListBlock;
public FreeListBlockStore(BlockStore store, int maxBlockEntries) {
this.store = store;
freeListStore = this;
this.maxBlockEntries = maxBlockEntries;
}
@Override
public void open(final Runnable initAction, final Factory factory) {
Runnable freeListInitAction = new Runnable() {
@Override
public void run() {
freeListBlock = new FreeListBlock();
store.write(freeListBlock);
store.flush();
initAction.run();
}
};
Factory freeListFactory = new Factory() {
@Override
public Object create(Class<? extends BlockPayload> type) {
if (type == FreeListBlock.class) {
return new FreeListBlock();
}
return factory.create(type);
}
};
store.open(freeListInitAction, freeListFactory);
freeListBlock = store.readFirst(FreeListBlock.class);
}
@Override
public void close() {
freeListBlock = null;
store.close();
}
@Override
public void clear() {
store.clear();
}
@Override
public void remove(BlockPayload block) {
Block container = block.getBlock();
store.remove(block);
freeListBlock.add(container.getPos(), container.getSize());
}
@Override
public <T extends BlockPayload> T readFirst(Class<T> payloadType) {
return store.read(freeListBlock.getNextPos(), payloadType);
}
@Override
public <T extends BlockPayload> T read(BlockPointer pos, Class<T> payloadType) {
return store.read(pos, payloadType);
}
@Override
public void write(BlockPayload block) {
attach(block);
store.write(block);
}
@Override
public void attach(BlockPayload block) {
store.attach(block);
freeListBlock.alloc(block.getBlock());
}
@Override
public void flush() {
store.flush();
}
private void verify() {
FreeListBlock block = store.readFirst(FreeListBlock.class);
verify(block, Integer.MAX_VALUE);
}
private void verify(FreeListBlock block, int maxValue) {
if (block.largestInNextBlock > maxValue) {
throw new RuntimeException("corrupt free list");
}
int current = 0;
for (FreeListEntry entry : block.entries) {
if (entry.size > maxValue) {
throw new RuntimeException("corrupt free list");
}
if (entry.size < block.largestInNextBlock) {
throw new RuntimeException("corrupt free list");
}
if (entry.size < current) {
throw new RuntimeException("corrupt free list");
}
current = entry.size;
}
if (!block.nextBlock.isNull()) {
verify(store.read(block.nextBlock, FreeListBlock.class), block.largestInNextBlock);
}
}
public class FreeListBlock extends BlockPayload {
private List<FreeListEntry> entries = new ArrayList<FreeListEntry>();
private int largestInNextBlock;
private BlockPointer nextBlock = BlockPointer.start();
// Transient fields
private FreeListBlock prev;
private FreeListBlock next;
@Override
protected int getSize() {
return Block.LONG_SIZE + Block.INT_SIZE + Block.INT_SIZE + maxBlockEntries * (Block.LONG_SIZE
+ Block.INT_SIZE);
}
@Override
protected byte getType() {
return 0x44;
}
@Override
protected void read(DataInputStream inputStream) throws Exception {
nextBlock = BlockPointer.pos(inputStream.readLong());
largestInNextBlock = inputStream.readInt();
int count = inputStream.readInt();
for (int i = 0; i < count; i++) {
BlockPointer pos = BlockPointer.pos(inputStream.readLong());
int size = inputStream.readInt();
entries.add(new FreeListEntry(pos, size));
}
}
@Override
protected void write(DataOutputStream outputStream) throws Exception {
outputStream.writeLong(nextBlock.getPos());
outputStream.writeInt(largestInNextBlock);
outputStream.writeInt(entries.size());
for (FreeListEntry entry : entries) {
outputStream.writeLong(entry.pos.getPos());
outputStream.writeInt(entry.size);
}
}
public void add(BlockPointer pos, int size) {
assert !pos.isNull() && size >= 0;
if (size == 0) {
return;
}
if (size < largestInNextBlock) {
FreeListBlock next = getNextBlock();
next.add(pos, size);
return;
}
FreeListEntry entry = new FreeListEntry(pos, size);
int index = Collections.binarySearch(entries, entry);
if (index < 0) {
index = -index - 1;
}
entries.add(index, entry);
if (entries.size() > maxBlockEntries) {
FreeListBlock newBlock = new FreeListBlock();
newBlock.largestInNextBlock = largestInNextBlock;
newBlock.nextBlock = nextBlock;
newBlock.prev = this;
newBlock.next = next;
next = newBlock;
List<FreeListEntry> newBlockEntries = entries.subList(0, entries.size() / 2);
newBlock.entries.addAll(newBlockEntries);
newBlockEntries.clear();
largestInNextBlock = newBlock.entries.get(newBlock.entries.size() - 1).size;
freeListStore.write(newBlock);
nextBlock = newBlock.getPos();
}
freeListStore.write(this);
}
private FreeListBlock getNextBlock() {
if (next == null) {
next = freeListStore.read(nextBlock, FreeListBlock.class);
next.prev = this;
}
return next;
}
public void alloc(Block block) {
if (block.hasPos()) {
return;
}
int requiredSize = block.getSize();
if (entries.isEmpty() || requiredSize <= largestInNextBlock) {
if (nextBlock.isNull()) {
return;
}
getNextBlock().alloc(block);
return;
}
int index = Collections.binarySearch(entries, new FreeListEntry(null, requiredSize));
if (index < 0) {
index = -index - 1;
}
if (index == entries.size()) {
// Largest free block is too small
return;
}
FreeListEntry entry = entries.remove(index);
block.setPos(entry.pos);
block.setSize(entry.size);
freeListStore.write(this);
if (entries.size() == 0 && prev != null) {
prev.nextBlock = nextBlock;
prev.largestInNextBlock = largestInNextBlock;
prev.next = next;
if (next != null) {
next.prev = prev;
}
freeListStore.write(prev);
freeListStore.remove(this);
}
}
}
private static class FreeListEntry implements Comparable<FreeListEntry> {
final BlockPointer pos;
final int size;
private FreeListEntry(BlockPointer pos, int size) {
this.pos = pos;
this.size = size;
}
@Override
public int compareTo(FreeListEntry o) {
if (size > o.size) {
return 1;
}
if (size < o.size) {
return -1;
}
return 0;
}
}
}

75
test/random_access/src/main/java/seaweedfs/client/btree/KeyHasher.java

@ -0,0 +1,75 @@
/*
* Copyright 2014 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import seaweedfs.client.btree.serialize.Serializer;
import seaweedfs.client.btree.serialize.kryo.KryoBackedEncoder;
import java.io.IOException;
import java.io.OutputStream;
import java.math.BigInteger;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
class KeyHasher<K> {
private final Serializer<K> serializer;
private final MessageDigestStream digestStream = new MessageDigestStream();
private final KryoBackedEncoder encoder = new KryoBackedEncoder(digestStream);
public KeyHasher(Serializer<K> serializer) {
this.serializer = serializer;
}
long getHashCode(K key) throws Exception {
serializer.write(encoder, key);
encoder.flush();
return digestStream.getChecksum();
}
private static class MessageDigestStream extends OutputStream {
MessageDigest messageDigest;
private MessageDigestStream() {
try {
messageDigest = MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException e) {
throw UncheckedException.throwAsUncheckedException(e);
}
}
@Override
public void write(int b) throws IOException {
messageDigest.update((byte) b);
}
@Override
public void write(byte[] b) throws IOException {
messageDigest.update(b);
}
@Override
public void write(byte[] b, int off, int len) throws IOException {
messageDigest.update(b, off, len);
}
long getChecksum() {
byte[] digest = messageDigest.digest();
assert digest.length == 16;
return new BigInteger(digest).longValue();
}
}
}

54
test/random_access/src/main/java/seaweedfs/client/btree/RandomAccessFileInputStream.java

@ -0,0 +1,54 @@
/*
* Copyright 2013 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
/**
* Reads from a {@link RandomAccessFile}. Each operation reads from and advances the current position of the file.
*
* <p>Closing this stream does not close the underlying file.
*/
public class RandomAccessFileInputStream extends InputStream {
private final RandomAccessFile file;
public RandomAccessFileInputStream(RandomAccessFile file) {
this.file = file;
}
@Override
public long skip(long n) throws IOException {
file.seek(file.getFilePointer() + n);
return n;
}
@Override
public int read(byte[] bytes) throws IOException {
return file.read(bytes);
}
@Override
public int read() throws IOException {
return file.read();
}
@Override
public int read(byte[] bytes, int offset, int length) throws IOException {
return file.read(bytes, offset, length);
}
}

48
test/random_access/src/main/java/seaweedfs/client/btree/RandomAccessFileOutputStream.java

@ -0,0 +1,48 @@
/*
* Copyright 2013 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import java.io.IOException;
import java.io.OutputStream;
import java.io.RandomAccessFile;
/**
* Writes to a {@link RandomAccessFile}. Each operation writes to and advances the current position of the file.
*
* <p>Closing this stream does not close the underlying file. Flushing this stream does nothing.
*/
public class RandomAccessFileOutputStream extends OutputStream {
private final RandomAccessFile file;
public RandomAccessFileOutputStream(RandomAccessFile file) {
this.file = file;
}
@Override
public void write(int i) throws IOException {
file.write(i);
}
@Override
public void write(byte[] bytes) throws IOException {
file.write(bytes);
}
@Override
public void write(byte[] bytes, int offset, int length) throws IOException {
file.write(bytes, offset, length);
}
}

87
test/random_access/src/main/java/seaweedfs/client/btree/StateCheckBlockStore.java

@ -0,0 +1,87 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
public class StateCheckBlockStore implements BlockStore {
private final BlockStore blockStore;
private boolean open;
public StateCheckBlockStore(BlockStore blockStore) {
this.blockStore = blockStore;
}
@Override
public void open(Runnable initAction, Factory factory) {
assert !open;
open = true;
blockStore.open(initAction, factory);
}
public boolean isOpen() {
return open;
}
@Override
public void close() {
if (!open) {
return;
}
open = false;
blockStore.close();
}
@Override
public void clear() {
assert open;
blockStore.clear();
}
@Override
public void remove(BlockPayload block) {
assert open;
blockStore.remove(block);
}
@Override
public <T extends BlockPayload> T readFirst(Class<T> payloadType) {
assert open;
return blockStore.readFirst(payloadType);
}
@Override
public <T extends BlockPayload> T read(BlockPointer pos, Class<T> payloadType) {
assert open;
return blockStore.read(pos, payloadType);
}
@Override
public void write(BlockPayload block) {
assert open;
blockStore.write(block);
}
@Override
public void attach(BlockPayload block) {
assert open;
blockStore.attach(block);
}
@Override
public void flush() {
assert open;
blockStore.flush();
}
}

526
test/random_access/src/main/java/seaweedfs/client/btree/StreamByteBuffer.java

@ -0,0 +1,526 @@
/*
* Copyright 2016 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
/**
* An in-memory buffer that provides OutputStream and InputStream interfaces.
*
* This is more efficient than using ByteArrayOutputStream/ByteArrayInputStream
*
* Reading the buffer will clear the buffer.
* This is not thread-safe, it is intended to be used by a single Thread.
*/
public class StreamByteBuffer {
private static final int DEFAULT_CHUNK_SIZE = 4096;
private static final int MAX_CHUNK_SIZE = 1024 * 1024;
private LinkedList<StreamByteBufferChunk> chunks = new LinkedList<StreamByteBufferChunk>();
private StreamByteBufferChunk currentWriteChunk;
private StreamByteBufferChunk currentReadChunk;
private int chunkSize;
private int nextChunkSize;
private int maxChunkSize;
private StreamByteBufferOutputStream output;
private StreamByteBufferInputStream input;
private int totalBytesUnreadInList;
public StreamByteBuffer() {
this(DEFAULT_CHUNK_SIZE);
}
public StreamByteBuffer(int chunkSize) {
this.chunkSize = chunkSize;
this.nextChunkSize = chunkSize;
this.maxChunkSize = Math.max(chunkSize, MAX_CHUNK_SIZE);
currentWriteChunk = new StreamByteBufferChunk(nextChunkSize);
output = new StreamByteBufferOutputStream();
input = new StreamByteBufferInputStream();
}
public static StreamByteBuffer of(InputStream inputStream) throws IOException {
StreamByteBuffer buffer = new StreamByteBuffer(chunkSizeInDefaultRange(inputStream.available()));
buffer.readFully(inputStream);
return buffer;
}
public static StreamByteBuffer of(InputStream inputStream, int len) throws IOException {
StreamByteBuffer buffer = new StreamByteBuffer(chunkSizeInDefaultRange(len));
buffer.readFrom(inputStream, len);
return buffer;
}
public static StreamByteBuffer createWithChunkSizeInDefaultRange(int value) {
return new StreamByteBuffer(chunkSizeInDefaultRange(value));
}
static int chunkSizeInDefaultRange(int value) {
return valueInRange(value, DEFAULT_CHUNK_SIZE, MAX_CHUNK_SIZE);
}
private static int valueInRange(int value, int min, int max) {
return Math.min(Math.max(value, min), max);
}
public OutputStream getOutputStream() {
return output;
}
public InputStream getInputStream() {
return input;
}
public void writeTo(OutputStream target) throws IOException {
while (prepareRead() != -1) {
currentReadChunk.writeTo(target);
}
}
public void readFrom(InputStream inputStream, int len) throws IOException {
int bytesLeft = len;
while (bytesLeft > 0) {
int spaceLeft = allocateSpace();
int limit = Math.min(spaceLeft, bytesLeft);
int readBytes = currentWriteChunk.readFrom(inputStream, limit);
if (readBytes == -1) {
throw new EOFException("Unexpected EOF");
}
bytesLeft -= readBytes;
}
}
public void readFully(InputStream inputStream) throws IOException {
while (true) {
int len = allocateSpace();
int readBytes = currentWriteChunk.readFrom(inputStream, len);
if (readBytes == -1) {
break;
}
}
}
public byte[] readAsByteArray() {
byte[] buf = new byte[totalBytesUnread()];
input.readImpl(buf, 0, buf.length);
return buf;
}
public List<byte[]> readAsListOfByteArrays() {
List<byte[]> listOfByteArrays = new ArrayList<byte[]>(chunks.size() + 1);
byte[] buf;
while ((buf = input.readNextBuffer()) != null) {
if (buf.length > 0) {
listOfByteArrays.add(buf);
}
}
return listOfByteArrays;
}
public String readAsString(String encoding) {
Charset charset = Charset.forName(encoding);
return readAsString(charset);
}
public String readAsString() {
return readAsString(Charset.defaultCharset());
}
public String readAsString(Charset charset) {
try {
return doReadAsString(charset);
} catch (CharacterCodingException e) {
throw new UncheckedIOException(e);
}
}
private String doReadAsString(Charset charset) throws CharacterCodingException {
int unreadSize = totalBytesUnread();
if (unreadSize > 0) {
return readAsCharBuffer(charset).toString();
}
return "";
}
private CharBuffer readAsCharBuffer(Charset charset) throws CharacterCodingException {
CharsetDecoder decoder = charset.newDecoder().onMalformedInput(
CodingErrorAction.REPLACE).onUnmappableCharacter(
CodingErrorAction.REPLACE);
CharBuffer charbuffer = CharBuffer.allocate(totalBytesUnread());
ByteBuffer buf = null;
boolean wasUnderflow = false;
ByteBuffer nextBuf = null;
boolean needsFlush = false;
while (hasRemaining(nextBuf) || hasRemaining(buf) || prepareRead() != -1) {
if (hasRemaining(buf)) {
// handle decoding underflow, multi-byte unicode character at buffer chunk boundary
if (!wasUnderflow) {
throw new IllegalStateException("Unexpected state. Buffer has remaining bytes without underflow in decoding.");
}
if (!hasRemaining(nextBuf) && prepareRead() != -1) {
nextBuf = currentReadChunk.readToNioBuffer();
}
// copy one by one until the underflow has been resolved
buf = ByteBuffer.allocate(buf.remaining() + 1).put(buf);
buf.put(nextBuf.get());
BufferCaster.cast(buf).flip();
} else {
if (hasRemaining(nextBuf)) {
buf = nextBuf;
} else if (prepareRead() != -1) {
buf = currentReadChunk.readToNioBuffer();
if (!hasRemaining(buf)) {
throw new IllegalStateException("Unexpected state. Buffer is empty.");
}
}
nextBuf = null;
}
boolean endOfInput = !hasRemaining(nextBuf) && prepareRead() == -1;
int bufRemainingBefore = buf.remaining();
CoderResult result = decoder.decode(buf, charbuffer, false);
if (bufRemainingBefore > buf.remaining()) {
needsFlush = true;
}
if (endOfInput) {
result = decoder.decode(ByteBuffer.allocate(0), charbuffer, true);
if (!result.isUnderflow()) {
result.throwException();
}
break;
}
wasUnderflow = result.isUnderflow();
}
if (needsFlush) {
CoderResult result = decoder.flush(charbuffer);
if (!result.isUnderflow()) {
result.throwException();
}
}
clear();
// push back remaining bytes of multi-byte unicode character
while (hasRemaining(buf)) {
byte b = buf.get();
try {
getOutputStream().write(b);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
BufferCaster.cast(charbuffer).flip();
return charbuffer;
}
private boolean hasRemaining(ByteBuffer nextBuf) {
return nextBuf != null && nextBuf.hasRemaining();
}
public int totalBytesUnread() {
int total = totalBytesUnreadInList;
if (currentReadChunk != null) {
total += currentReadChunk.bytesUnread();
}
if (currentWriteChunk != currentReadChunk && currentWriteChunk != null) {
total += currentWriteChunk.bytesUnread();
}
return total;
}
protected int allocateSpace() {
int spaceLeft = currentWriteChunk.spaceLeft();
if (spaceLeft == 0) {
addChunk(currentWriteChunk);
currentWriteChunk = new StreamByteBufferChunk(nextChunkSize);
if (nextChunkSize < maxChunkSize) {
nextChunkSize = Math.min(nextChunkSize * 2, maxChunkSize);
}
spaceLeft = currentWriteChunk.spaceLeft();
}
return spaceLeft;
}
protected int prepareRead() {
int bytesUnread = (currentReadChunk != null) ? currentReadChunk.bytesUnread() : 0;
if (bytesUnread == 0) {
if (!chunks.isEmpty()) {
currentReadChunk = chunks.removeFirst();
bytesUnread = currentReadChunk.bytesUnread();
totalBytesUnreadInList -= bytesUnread;
} else if (currentReadChunk != currentWriteChunk) {
currentReadChunk = currentWriteChunk;
bytesUnread = currentReadChunk.bytesUnread();
} else {
bytesUnread = -1;
}
}
return bytesUnread;
}
public static StreamByteBuffer of(List<byte[]> listOfByteArrays) {
StreamByteBuffer buffer = new StreamByteBuffer();
buffer.addChunks(listOfByteArrays);
return buffer;
}
private void addChunks(List<byte[]> listOfByteArrays) {
for (byte[] buf : listOfByteArrays) {
addChunk(new StreamByteBufferChunk(buf));
}
}
private void addChunk(StreamByteBufferChunk chunk) {
chunks.add(chunk);
totalBytesUnreadInList += chunk.bytesUnread();
}
static class StreamByteBufferChunk {
private int pointer;
private byte[] buffer;
private int size;
private int used;
public StreamByteBufferChunk(int size) {
this.size = size;
buffer = new byte[size];
}
public StreamByteBufferChunk(byte[] buf) {
this.size = buf.length;
this.buffer = buf;
this.used = buf.length;
}
public ByteBuffer readToNioBuffer() {
if (pointer < used) {
ByteBuffer result;
if (pointer > 0 || used < size) {
result = ByteBuffer.wrap(buffer, pointer, used - pointer);
} else {
result = ByteBuffer.wrap(buffer);
}
pointer = used;
return result;
}
return null;
}
public boolean write(byte b) {
if (used < size) {
buffer[used++] = b;
return true;
}
return false;
}
public void write(byte[] b, int off, int len) {
System.arraycopy(b, off, buffer, used, len);
used = used + len;
}
public void read(byte[] b, int off, int len) {
System.arraycopy(buffer, pointer, b, off, len);
pointer = pointer + len;
}
public void writeTo(OutputStream target) throws IOException {
if (pointer < used) {
target.write(buffer, pointer, used - pointer);
pointer = used;
}
}
public void reset() {
pointer = 0;
}
public int bytesUsed() {
return used;
}
public int bytesUnread() {
return used - pointer;
}
public int read() {
if (pointer < used) {
return buffer[pointer++] & 0xff;
}
return -1;
}
public int spaceLeft() {
return size - used;
}
public int readFrom(InputStream inputStream, int len) throws IOException {
int readBytes = inputStream.read(buffer, used, len);
if(readBytes > 0) {
used += readBytes;
}
return readBytes;
}
public void clear() {
used = pointer = 0;
}
public byte[] readBuffer() {
if (used == buffer.length && pointer == 0) {
pointer = used;
return buffer;
} else if (pointer < used) {
byte[] buf = new byte[used - pointer];
read(buf, 0, used - pointer);
return buf;
} else {
return new byte[0];
}
}
}
class StreamByteBufferOutputStream extends OutputStream {
private boolean closed;
@Override
public void write(byte[] b, int off, int len) throws IOException {
if (b == null) {
throw new NullPointerException();
}
if ((off < 0) || (off > b.length) || (len < 0)
|| ((off + len) > b.length) || ((off + len) < 0)) {
throw new IndexOutOfBoundsException();
}
if (len == 0) {
return;
}
int bytesLeft = len;
int currentOffset = off;
while (bytesLeft > 0) {
int spaceLeft = allocateSpace();
int writeBytes = Math.min(spaceLeft, bytesLeft);
currentWriteChunk.write(b, currentOffset, writeBytes);
bytesLeft -= writeBytes;
currentOffset += writeBytes;
}
}
@Override
public void close() throws IOException {
closed = true;
}
public boolean isClosed() {
return closed;
}
@Override
public void write(int b) throws IOException {
allocateSpace();
currentWriteChunk.write((byte) b);
}
public StreamByteBuffer getBuffer() {
return StreamByteBuffer.this;
}
}
class StreamByteBufferInputStream extends InputStream {
@Override
public int read() throws IOException {
prepareRead();
return currentReadChunk.read();
}
@Override
public int read(byte[] b, int off, int len) throws IOException {
return readImpl(b, off, len);
}
int readImpl(byte[] b, int off, int len) {
if (b == null) {
throw new NullPointerException();
}
if ((off < 0) || (off > b.length) || (len < 0)
|| ((off + len) > b.length) || ((off + len) < 0)) {
throw new IndexOutOfBoundsException();
}
if (len == 0) {
return 0;
}
int bytesLeft = len;
int currentOffset = off;
int bytesUnread = prepareRead();
int totalBytesRead = 0;
while (bytesLeft > 0 && bytesUnread != -1) {
int readBytes = Math.min(bytesUnread, bytesLeft);
currentReadChunk.read(b, currentOffset, readBytes);
bytesLeft -= readBytes;
currentOffset += readBytes;
totalBytesRead += readBytes;
bytesUnread = prepareRead();
}
if (totalBytesRead > 0) {
return totalBytesRead;
}
return -1;
}
@Override
public int available() throws IOException {
return totalBytesUnread();
}
public StreamByteBuffer getBuffer() {
return StreamByteBuffer.this;
}
public byte[] readNextBuffer() {
if (prepareRead() != -1) {
return currentReadChunk.readBuffer();
}
return null;
}
}
public void clear() {
chunks.clear();
currentReadChunk = null;
totalBytesUnreadInList = 0;
currentWriteChunk.clear();
}
}

88
test/random_access/src/main/java/seaweedfs/client/btree/UncheckedException.java

@ -0,0 +1,88 @@
/*
* Copyright 2010 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.util.concurrent.Callable;
/**
* Wraps a checked exception. Carries no other context.
*/
public final class UncheckedException extends RuntimeException {
private UncheckedException(Throwable cause) {
super(cause);
}
private UncheckedException(String message, Throwable cause) {
super(message, cause);
}
/**
* Note: always throws the failure in some form. The return value is to keep the compiler happy.
*/
public static RuntimeException throwAsUncheckedException(Throwable t) {
return throwAsUncheckedException(t, false);
}
/**
* Note: always throws the failure in some form. The return value is to keep the compiler happy.
*/
public static RuntimeException throwAsUncheckedException(Throwable t, boolean preserveMessage) {
if (t instanceof InterruptedException) {
Thread.currentThread().interrupt();
}
if (t instanceof RuntimeException) {
throw (RuntimeException) t;
}
if (t instanceof Error) {
throw (Error) t;
}
if (t instanceof IOException) {
if (preserveMessage) {
throw new UncheckedIOException(t.getMessage(), t);
} else {
throw new UncheckedIOException(t);
}
}
if (preserveMessage) {
throw new UncheckedException(t.getMessage(), t);
} else {
throw new UncheckedException(t);
}
}
public static <T> T callUnchecked(Callable<T> callable) {
try {
return callable.call();
} catch (Exception e) {
throw throwAsUncheckedException(e);
}
}
/**
* Unwraps passed InvocationTargetException hence making the stack of exceptions cleaner without losing information.
*
* Note: always throws the failure in some form. The return value is to keep the compiler happy.
*
* @param e to be unwrapped
* @return an instance of RuntimeException based on the target exception of the parameter.
*/
public static RuntimeException unwrapAndRethrow(InvocationTargetException e) {
return UncheckedException.throwAsUncheckedException(e.getTargetException());
}
}

36
test/random_access/src/main/java/seaweedfs/client/btree/UncheckedIOException.java

@ -0,0 +1,36 @@
/*
* Copyright 2012 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
/**
* <code>UncheckedIOException</code> is used to wrap an {@link java.io.IOException} into an unchecked exception.
*/
public class UncheckedIOException extends RuntimeException {
public UncheckedIOException() {
}
public UncheckedIOException(String message) {
super(message);
}
public UncheckedIOException(String message, Throwable cause) {
super(message, cause);
}
public UncheckedIOException(Throwable cause) {
super(cause);
}
}

133
test/random_access/src/main/java/seaweedfs/client/btree/serialize/AbstractDecoder.java

@ -0,0 +1,133 @@
/*
* Copyright 2013 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import javax.annotation.Nullable;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
public abstract class AbstractDecoder implements Decoder {
private DecoderStream stream;
@Override
public InputStream getInputStream() {
if (stream == null) {
stream = new DecoderStream();
}
return stream;
}
@Override
public void readBytes(byte[] buffer) throws IOException {
readBytes(buffer, 0, buffer.length);
}
@Override
public byte[] readBinary() throws EOFException, IOException {
int size = readSmallInt();
byte[] result = new byte[size];
readBytes(result);
return result;
}
@Override
public int readSmallInt() throws EOFException, IOException {
return readInt();
}
@Override
public long readSmallLong() throws EOFException, IOException {
return readLong();
}
@Nullable
@Override
public Integer readNullableSmallInt() throws IOException {
if (readBoolean()) {
return readSmallInt();
} else {
return null;
}
}
@Override
public String readNullableString() throws EOFException, IOException {
if (readBoolean()) {
return readString();
} else {
return null;
}
}
@Override
public void skipBytes(long count) throws EOFException, IOException {
long remaining = count;
while (remaining > 0) {
long skipped = maybeSkip(remaining);
if (skipped <= 0) {
break;
}
remaining -= skipped;
}
if (remaining > 0) {
throw new EOFException();
}
}
@Override
public <T> T decodeChunked(DecodeAction<Decoder, T> decodeAction) throws EOFException, Exception {
throw new UnsupportedOperationException();
}
@Override
public void skipChunked() throws EOFException, IOException {
throw new UnsupportedOperationException();
}
protected abstract int maybeReadBytes(byte[] buffer, int offset, int count) throws IOException;
protected abstract long maybeSkip(long count) throws IOException;
private class DecoderStream extends InputStream {
byte[] buffer = new byte[1];
@Override
public long skip(long n) throws IOException {
return maybeSkip(n);
}
@Override
public int read() throws IOException {
int read = maybeReadBytes(buffer, 0, 1);
if (read <= 0) {
return read;
}
return buffer[0] & 0xff;
}
@Override
public int read(byte[] buffer) throws IOException {
return maybeReadBytes(buffer, 0, buffer.length);
}
@Override
public int read(byte[] buffer, int offset, int count) throws IOException {
return maybeReadBytes(buffer, offset, count);
}
}
}

101
test/random_access/src/main/java/seaweedfs/client/btree/serialize/AbstractEncoder.java

@ -0,0 +1,101 @@
/*
* Copyright 2013 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import javax.annotation.Nullable;
import java.io.IOException;
import java.io.OutputStream;
public abstract class AbstractEncoder implements Encoder {
private EncoderStream stream;
@Override
public OutputStream getOutputStream() {
if (stream == null) {
stream = new EncoderStream();
}
return stream;
}
@Override
public void writeBytes(byte[] bytes) throws IOException {
writeBytes(bytes, 0, bytes.length);
}
@Override
public void writeBinary(byte[] bytes) throws IOException {
writeBinary(bytes, 0, bytes.length);
}
@Override
public void writeBinary(byte[] bytes, int offset, int count) throws IOException {
writeSmallInt(count);
writeBytes(bytes, offset, count);
}
@Override
public void encodeChunked(EncodeAction<Encoder> writeAction) throws Exception {
throw new UnsupportedOperationException();
}
@Override
public void writeSmallInt(int value) throws IOException {
writeInt(value);
}
@Override
public void writeSmallLong(long value) throws IOException {
writeLong(value);
}
@Override
public void writeNullableSmallInt(@Nullable Integer value) throws IOException {
if (value == null) {
writeBoolean(false);
} else {
writeBoolean(true);
writeSmallInt(value);
}
}
@Override
public void writeNullableString(@Nullable CharSequence value) throws IOException {
if (value == null) {
writeBoolean(false);
} else {
writeBoolean(true);
writeString(value.toString());
}
}
private class EncoderStream extends OutputStream {
@Override
public void write(byte[] buffer) throws IOException {
writeBytes(buffer);
}
@Override
public void write(byte[] buffer, int offset, int length) throws IOException {
writeBytes(buffer, offset, length);
}
@Override
public void write(int b) throws IOException {
writeByte((byte) b);
}
}
}

40
test/random_access/src/main/java/seaweedfs/client/btree/serialize/AbstractSerializer.java

@ -0,0 +1,40 @@
/*
* Copyright 2016 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import com.google.common.base.Objects;
/**
* This abstract class provide a sensible default implementation for {@code Serializer} equality. This equality
* implementation is required to enable cache instance reuse within the same Gradle runtime. Serializers are used
* as cache parameter which need to be compared to determine compatible cache.
*/
public abstract class AbstractSerializer<T> implements Serializer<T> {
@Override
public boolean equals(Object obj) {
if (obj == null) {
return false;
}
return Objects.equal(obj.getClass(), getClass());
}
@Override
public int hashCode() {
return Objects.hashCode(getClass());
}
}

79
test/random_access/src/main/java/seaweedfs/client/btree/serialize/Cast.java

@ -0,0 +1,79 @@
/*
* Copyright 2012 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import javax.annotation.Nullable;
public abstract class Cast {
/**
* Casts the given object to the given type, providing a better error message than the default.
*
* The standard {@link Class#cast(Object)} method produces unsatisfactory error messages on some platforms
* when it fails. All this method does is provide a better, consistent, error message.
*
* This should be used whenever there is a chance the cast could fail. If in doubt, use this.
*
* @param outputType The type to cast the input to
* @param object The object to be cast (must not be {@code null})
* @param <O> The type to be cast to
* @param <I> The type of the object to be vast
* @return The input object, cast to the output type
*/
public static <O, I> O cast(Class<O> outputType, I object) {
try {
return outputType.cast(object);
} catch (ClassCastException e) {
throw new ClassCastException(String.format(
"Failed to cast object %s of type %s to target type %s", object, object.getClass().getName(), outputType.getName()
));
}
}
/**
* Casts the given object to the given type, providing a better error message than the default.
*
* The standard {@link Class#cast(Object)} method produces unsatisfactory error messages on some platforms
* when it fails. All this method does is provide a better, consistent, error message.
*
* This should be used whenever there is a chance the cast could fail. If in doubt, use this.
*
* @param outputType The type to cast the input to
* @param object The object to be cast
* @param <O> The type to be cast to
* @param <I> The type of the object to be vast
* @return The input object, cast to the output type
*/
@Nullable
public static <O, I> O castNullable(Class<O> outputType, @Nullable I object) {
if (object == null) {
return null;
}
return cast(outputType, object);
}
@SuppressWarnings("unchecked")
@Nullable
public static <T> T uncheckedCast(@Nullable Object object) {
return (T) object;
}
@SuppressWarnings("unchecked")
public static <T> T uncheckedNonnullCast(Object object) {
return (T) object;
}
}

43
test/random_access/src/main/java/seaweedfs/client/btree/serialize/ClassLoaderObjectInputStream.java

@ -0,0 +1,43 @@
/*
* Copyright 2010 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectStreamClass;
public class ClassLoaderObjectInputStream extends ObjectInputStream {
private final ClassLoader loader;
public ClassLoaderObjectInputStream(InputStream in, ClassLoader loader) throws IOException {
super(in);
this.loader = loader;
}
public ClassLoader getClassLoader() {
return loader;
}
@Override
protected Class<?> resolveClass(ObjectStreamClass desc) throws IOException, ClassNotFoundException {
try {
return Class.forName(desc.getName(), false, loader);
} catch (ClassNotFoundException e) {
return super.resolveClass(desc);
}
}
}

140
test/random_access/src/main/java/seaweedfs/client/btree/serialize/Decoder.java

@ -0,0 +1,140 @@
/*
* Copyright 2013 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import javax.annotation.Nullable;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
/**
* Provides a way to decode structured data from a backing byte stream. Implementations may buffer incoming bytes read
* from the backing stream prior to decoding.
*/
public interface Decoder {
/**
* Returns an InputStream which can be used to read raw bytes.
*/
InputStream getInputStream();
/**
* Reads a signed 64 bit long value. Can read any value that was written using {@link Encoder#writeLong(long)}.
*
* @throws EOFException when the end of the byte stream is reached before the long value can be fully read.
*/
long readLong() throws EOFException, IOException;
/**
* Reads a signed 64 bit int value. Can read any value that was written using {@link Encoder#writeSmallLong(long)}.
*
* @throws EOFException when the end of the byte stream is reached before the int value can be fully read.
*/
long readSmallLong() throws EOFException, IOException;
/**
* Reads a signed 32 bit int value. Can read any value that was written using {@link Encoder#writeInt(int)}.
*
* @throws EOFException when the end of the byte stream is reached before the int value can be fully read.
*/
int readInt() throws EOFException, IOException;
/**
* Reads a signed 32 bit int value. Can read any value that was written using {@link Encoder#writeSmallInt(int)}.
*
* @throws EOFException when the end of the byte stream is reached before the int value can be fully read.
*/
int readSmallInt() throws EOFException, IOException;
/**
* Reads a nullable signed 32 bit int value.
*
* @see #readSmallInt()
*/
@Nullable
Integer readNullableSmallInt() throws EOFException, IOException;
/**
* Reads a boolean value. Can read any value that was written using {@link Encoder#writeBoolean(boolean)}.
*
* @throws EOFException when the end of the byte stream is reached before the boolean value can be fully read.
*/
boolean readBoolean() throws EOFException, IOException;
/**
* Reads a non-null string value. Can read any value that was written using {@link Encoder#writeString(CharSequence)}.
*
* @throws EOFException when the end of the byte stream is reached before the string can be fully read.
*/
String readString() throws EOFException, IOException;
/**
* Reads a nullable string value. Can reads any value that was written using {@link Encoder#writeNullableString(CharSequence)}.
*
* @throws EOFException when the end of the byte stream is reached before the string can be fully read.
*/
@Nullable
String readNullableString() throws EOFException, IOException;
/**
* Reads a byte value. Can read any byte value that was written using one of the raw byte methods on {@link Encoder}, such as {@link Encoder#writeByte(byte)} or {@link Encoder#getOutputStream()}
*
* @throws EOFException when the end of the byte stream is reached.
*/
byte readByte() throws EOFException, IOException;
/**
* Reads bytes into the given buffer, filling the buffer. Can read any byte values that were written using one of the raw byte methods on {@link Encoder}, such as {@link
* Encoder#writeBytes(byte[])} or {@link Encoder#getOutputStream()}
*
* @throws EOFException when the end of the byte stream is reached before the buffer is full.
*/
void readBytes(byte[] buffer) throws EOFException, IOException;
/**
* Reads the specified number of bytes into the given buffer. Can read any byte values that were written using one of the raw byte methods on {@link Encoder}, such as {@link
* Encoder#writeBytes(byte[])} or {@link Encoder#getOutputStream()}
*
* @throws EOFException when the end of the byte stream is reached before the specified number of bytes were read.
*/
void readBytes(byte[] buffer, int offset, int count) throws EOFException, IOException;
/**
* Reads a byte array. Can read any byte array written using {@link Encoder#writeBinary(byte[])} or {@link Encoder#writeBinary(byte[], int, int)}.
*
* @throws EOFException when the end of the byte stream is reached before the byte array was fully read.
*/
byte[] readBinary() throws EOFException, IOException;
/**
* Skips the given number of bytes. Can skip over any byte values that were written using one of the raw byte methods on {@link Encoder}.
*/
void skipBytes(long count) throws EOFException, IOException;
/**
* Reads a byte stream written using {@link Encoder#encodeChunked(Encoder.EncodeAction)}.
*/
<T> T decodeChunked(DecodeAction<Decoder, T> decodeAction) throws EOFException, Exception;
/**
* Skips over a byte stream written using {@link Encoder#encodeChunked(Encoder.EncodeAction)}, discarding its content.
*/
void skipChunked() throws EOFException, IOException;
interface DecodeAction<IN, OUT> {
OUT read(IN source) throws Exception;
}
}

73
test/random_access/src/main/java/seaweedfs/client/btree/serialize/DefaultSerializer.java

@ -0,0 +1,73 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import com.google.common.base.Objects;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.io.StreamCorruptedException;
public class DefaultSerializer<T> extends AbstractSerializer<T> {
private ClassLoader classLoader;
public DefaultSerializer() {
classLoader = getClass().getClassLoader();
}
public DefaultSerializer(ClassLoader classLoader) {
this.classLoader = classLoader != null ? classLoader : getClass().getClassLoader();
}
public ClassLoader getClassLoader() {
return classLoader;
}
public void setClassLoader(ClassLoader classLoader) {
this.classLoader = classLoader;
}
@Override
public T read(Decoder decoder) throws Exception {
try {
return Cast.uncheckedNonnullCast(new ClassLoaderObjectInputStream(decoder.getInputStream(), classLoader).readObject());
} catch (StreamCorruptedException e) {
return null;
}
}
@Override
public void write(Encoder encoder, T value) throws IOException {
ObjectOutputStream objectStr = new ObjectOutputStream(encoder.getOutputStream());
objectStr.writeObject(value);
objectStr.flush();
}
@Override
public boolean equals(Object obj) {
if (!super.equals(obj)) {
return false;
}
DefaultSerializer<?> rhs = (DefaultSerializer<?>) obj;
return Objects.equal(classLoader, rhs.classLoader);
}
@Override
public int hashCode() {
return Objects.hashCode(super.hashCode(), classLoader);
}
}

110
test/random_access/src/main/java/seaweedfs/client/btree/serialize/Encoder.java

@ -0,0 +1,110 @@
/*
* Copyright 2013 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import javax.annotation.Nullable;
import java.io.IOException;
import java.io.OutputStream;
/**
* Provides a way to encode structured data to a backing byte stream. Implementations may buffer outgoing encoded bytes prior
* to writing to the backing byte stream.
*/
public interface Encoder {
/**
* Returns an {@link OutputStream) that can be used to write raw bytes to the stream.
*/
OutputStream getOutputStream();
/**
* Writes a raw byte value to the stream.
*/
void writeByte(byte value) throws IOException;
/**
* Writes the given raw bytes to the stream. Does not encode any length information.
*/
void writeBytes(byte[] bytes) throws IOException;
/**
* Writes the given raw bytes to the stream. Does not encode any length information.
*/
void writeBytes(byte[] bytes, int offset, int count) throws IOException;
/**
* Writes the given byte array to the stream. Encodes the bytes and length information.
*/
void writeBinary(byte[] bytes) throws IOException;
/**
* Writes the given byte array to the stream. Encodes the bytes and length information.
*/
void writeBinary(byte[] bytes, int offset, int count) throws IOException;
/**
* Appends an encoded stream to this stream. Encodes the stream as a series of chunks with length information.
*/
void encodeChunked(EncodeAction<Encoder> writeAction) throws Exception;
/**
* Writes a signed 64 bit long value. The implementation may encode the value as a variable number of bytes, not necessarily as 8 bytes.
*/
void writeLong(long value) throws IOException;
/**
* Writes a signed 64 bit long value whose value is likely to be small and positive but may not be. The implementation may encode the value in a way that is more efficient for small positive
* values.
*/
void writeSmallLong(long value) throws IOException;
/**
* Writes a signed 32 bit int value. The implementation may encode the value as a variable number of bytes, not necessarily as 4 bytes.
*/
void writeInt(int value) throws IOException;
/**
* Writes a signed 32 bit int value whose value is likely to be small and positive but may not be. The implementation may encode the value in a way that
* is more efficient for small positive values.
*/
void writeSmallInt(int value) throws IOException;
/**
* Writes a nullable signed 32 bit int value whose value is likely to be small and positive but may not be.
*
* @see #writeSmallInt(int)
*/
void writeNullableSmallInt(@Nullable Integer value) throws IOException;
/**
* Writes a boolean value.
*/
void writeBoolean(boolean value) throws IOException;
/**
* Writes a non-null string value.
*/
void writeString(CharSequence value) throws IOException;
/**
* Writes a nullable string value.
*/
void writeNullableString(@Nullable CharSequence value) throws IOException;
interface EncodeAction<T> {
void write(T target) throws Exception;
}
}

31
test/random_access/src/main/java/seaweedfs/client/btree/serialize/FlushableEncoder.java

@ -0,0 +1,31 @@
/*
* Copyright 2013 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import java.io.Flushable;
import java.io.IOException;
/**
* Represents an {@link Encoder} that buffers encoded data prior to writing to the backing stream.
*/
public interface FlushableEncoder extends Encoder, Flushable {
/**
* Ensures that all buffered data has been written to the backing stream. Does not flush the backing stream.
*/
@Override
void flush() throws IOException;
}

28
test/random_access/src/main/java/seaweedfs/client/btree/serialize/ObjectReader.java

@ -0,0 +1,28 @@
/*
* Copyright 2012 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import java.io.EOFException;
public interface ObjectReader<T> {
/**
* Reads the next object from the stream.
*
* @throws EOFException When the next object cannot be fully read due to reaching the end of stream.
*/
T read() throws EOFException, Exception;
}

21
test/random_access/src/main/java/seaweedfs/client/btree/serialize/ObjectWriter.java

@ -0,0 +1,21 @@
/*
* Copyright 2012 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
public interface ObjectWriter<T> {
void write(T value) throws Exception;
}

33
test/random_access/src/main/java/seaweedfs/client/btree/serialize/Serializer.java

@ -0,0 +1,33 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import java.io.EOFException;
public interface Serializer<T> {
/**
* Reads the next object from the given stream. The implementation must not perform any buffering, so that it reads only those bytes from the input stream that are
* required to deserialize the next object.
*
* @throws EOFException When the next object cannot be fully read due to reaching the end of stream.
*/
T read(Decoder decoder) throws EOFException, Exception;
/**
* Writes the given object to the given stream. The implementation must not perform any buffering.
*/
void write(Encoder encoder, T value) throws Exception;
}

33
test/random_access/src/main/java/seaweedfs/client/btree/serialize/StatefulSerializer.java

@ -0,0 +1,33 @@
/*
* Copyright 2012 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
/**
* Implementations must allow concurrent reading and writing, so that a thread can read and a thread can write at the same time.
* Implementations do not need to support multiple read threads or multiple write threads.
*/
public interface StatefulSerializer<T> {
/**
* Should not perform any buffering
*/
ObjectReader<T> newReader(Decoder decoder);
/**
* Should not perform any buffering
*/
ObjectWriter<T> newWriter(Encoder encoder);
}

210
test/random_access/src/main/java/seaweedfs/client/btree/serialize/kryo/KryoBackedDecoder.java

@ -0,0 +1,210 @@
/*
* Copyright 2013 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize.kryo;
import com.esotericsoftware.kryo.KryoException;
import com.esotericsoftware.kryo.io.Input;
import seaweedfs.client.btree.serialize.AbstractDecoder;
import seaweedfs.client.btree.serialize.Decoder;
import java.io.Closeable;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
/**
* Note that this decoder uses buffering, so will attempt to read beyond the end of the encoded data. This means you should use this type only when this decoder will be used to decode the entire
* stream.
*/
public class KryoBackedDecoder extends AbstractDecoder implements Decoder, Closeable {
private final Input input;
private final InputStream inputStream;
private long extraSkipped;
private KryoBackedDecoder nested;
public KryoBackedDecoder(InputStream inputStream) {
this(inputStream, 4096);
}
public KryoBackedDecoder(InputStream inputStream, int bufferSize) {
this.inputStream = inputStream;
input = new Input(this.inputStream, bufferSize);
}
@Override
protected int maybeReadBytes(byte[] buffer, int offset, int count) {
return input.read(buffer, offset, count);
}
@Override
protected long maybeSkip(long count) throws IOException {
// Work around some bugs in Input.skip()
int remaining = input.limit() - input.position();
if (remaining == 0) {
long skipped = inputStream.skip(count);
if (skipped > 0) {
extraSkipped += skipped;
}
return skipped;
} else if (count <= remaining) {
input.setPosition(input.position() + (int) count);
return count;
} else {
input.setPosition(input.limit());
return remaining;
}
}
private RuntimeException maybeEndOfStream(KryoException e) throws EOFException {
if (e.getMessage().equals("Buffer underflow.")) {
throw (EOFException) (new EOFException().initCause(e));
}
throw e;
}
@Override
public byte readByte() throws EOFException {
try {
return input.readByte();
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public void readBytes(byte[] buffer, int offset, int count) throws EOFException {
try {
input.readBytes(buffer, offset, count);
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public long readLong() throws EOFException {
try {
return input.readLong();
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public long readSmallLong() throws EOFException, IOException {
try {
return input.readLong(true);
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public int readInt() throws EOFException {
try {
return input.readInt();
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public int readSmallInt() throws EOFException {
try {
return input.readInt(true);
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public boolean readBoolean() throws EOFException {
try {
return input.readBoolean();
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public String readString() throws EOFException {
return readNullableString();
}
@Override
public String readNullableString() throws EOFException {
try {
return input.readString();
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public void skipChunked() throws EOFException, IOException {
while (true) {
int count = readSmallInt();
if (count == 0) {
break;
}
skipBytes(count);
}
}
@Override
public <T> T decodeChunked(DecodeAction<Decoder, T> decodeAction) throws EOFException, Exception {
if (nested == null) {
nested = new KryoBackedDecoder(new InputStream() {
@Override
public int read() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int read(byte[] buffer, int offset, int length) throws IOException {
int count = readSmallInt();
if (count == 0) {
// End of stream has been reached
return -1;
}
if (count > length) {
// For now, assume same size buffers used to read and write
throw new UnsupportedOperationException();
}
readBytes(buffer, offset, count);
return count;
}
});
}
T value = decodeAction.read(nested);
if (readSmallInt() != 0) {
throw new IllegalStateException("Expecting the end of nested stream.");
}
return value;
}
/**
* Returns the total number of bytes consumed by this decoder. Some additional bytes may also be buffered by this decoder but have not been consumed.
*/
public long getReadPosition() {
return input.total() + extraSkipped;
}
@Override
public void close() throws IOException {
input.close();
}
}

134
test/random_access/src/main/java/seaweedfs/client/btree/serialize/kryo/KryoBackedEncoder.java

@ -0,0 +1,134 @@
/*
* Copyright 2013 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize.kryo;
import com.esotericsoftware.kryo.io.Output;
import seaweedfs.client.btree.serialize.AbstractEncoder;
import seaweedfs.client.btree.serialize.Encoder;
import seaweedfs.client.btree.serialize.FlushableEncoder;
import javax.annotation.Nullable;
import java.io.Closeable;
import java.io.IOException;
import java.io.OutputStream;
public class KryoBackedEncoder extends AbstractEncoder implements FlushableEncoder, Closeable {
private final Output output;
private KryoBackedEncoder nested;
public KryoBackedEncoder(OutputStream outputStream) {
this(outputStream, 4096);
}
public KryoBackedEncoder(OutputStream outputStream, int bufferSize) {
output = new Output(outputStream, bufferSize);
}
@Override
public void writeByte(byte value) {
output.writeByte(value);
}
@Override
public void writeBytes(byte[] bytes, int offset, int count) {
output.writeBytes(bytes, offset, count);
}
@Override
public void writeLong(long value) {
output.writeLong(value);
}
@Override
public void writeSmallLong(long value) {
output.writeLong(value, true);
}
@Override
public void writeInt(int value) {
output.writeInt(value);
}
@Override
public void writeSmallInt(int value) {
output.writeInt(value, true);
}
@Override
public void writeBoolean(boolean value) {
output.writeBoolean(value);
}
@Override
public void writeString(CharSequence value) {
if (value == null) {
throw new IllegalArgumentException("Cannot encode a null string.");
}
output.writeString(value);
}
@Override
public void writeNullableString(@Nullable CharSequence value) {
output.writeString(value);
}
@Override
public void encodeChunked(EncodeAction<Encoder> writeAction) throws Exception {
if (nested == null) {
nested = new KryoBackedEncoder(new OutputStream() {
@Override
public void write(byte[] buffer, int offset, int length) {
if (length == 0) {
return;
}
writeSmallInt(length);
writeBytes(buffer, offset, length);
}
@Override
public void write(byte[] buffer) throws IOException {
write(buffer, 0, buffer.length);
}
@Override
public void write(int b) {
throw new UnsupportedOperationException();
}
});
}
writeAction.write(nested);
nested.flush();
writeSmallInt(0);
}
/**
* Returns the total number of bytes written by this encoder, some of which may still be buffered.
*/
public long getWritePosition() {
return output.total();
}
@Override
public void flush() {
output.flush();
}
@Override
public void close() {
output.close();
}
}

188
test/random_access/src/main/java/seaweedfs/client/btree/serialize/kryo/StringDeduplicatingKryoBackedDecoder.java

@ -0,0 +1,188 @@
/*
* Copyright 2018 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize.kryo;
import com.esotericsoftware.kryo.KryoException;
import com.esotericsoftware.kryo.io.Input;
import seaweedfs.client.btree.serialize.AbstractDecoder;
import seaweedfs.client.btree.serialize.Decoder;
import java.io.Closeable;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
/**
* Note that this decoder uses buffering, so will attempt to read beyond the end of the encoded data. This means you should use this type only when this decoder will be used to decode the entire
* stream.
*/
public class StringDeduplicatingKryoBackedDecoder extends AbstractDecoder implements Decoder, Closeable {
public static final int INITIAL_CAPACITY = 32;
private final Input input;
private final InputStream inputStream;
private String[] strings;
private long extraSkipped;
public StringDeduplicatingKryoBackedDecoder(InputStream inputStream) {
this(inputStream, 4096);
}
public StringDeduplicatingKryoBackedDecoder(InputStream inputStream, int bufferSize) {
this.inputStream = inputStream;
input = new Input(this.inputStream, bufferSize);
}
@Override
protected int maybeReadBytes(byte[] buffer, int offset, int count) {
return input.read(buffer, offset, count);
}
@Override
protected long maybeSkip(long count) throws IOException {
// Work around some bugs in Input.skip()
int remaining = input.limit() - input.position();
if (remaining == 0) {
long skipped = inputStream.skip(count);
if (skipped > 0) {
extraSkipped += skipped;
}
return skipped;
} else if (count <= remaining) {
input.setPosition(input.position() + (int) count);
return count;
} else {
input.setPosition(input.limit());
return remaining;
}
}
private RuntimeException maybeEndOfStream(KryoException e) throws EOFException {
if (e.getMessage().equals("Buffer underflow.")) {
throw (EOFException) (new EOFException().initCause(e));
}
throw e;
}
@Override
public byte readByte() throws EOFException {
try {
return input.readByte();
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public void readBytes(byte[] buffer, int offset, int count) throws EOFException {
try {
input.readBytes(buffer, offset, count);
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public long readLong() throws EOFException {
try {
return input.readLong();
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public long readSmallLong() throws EOFException, IOException {
try {
return input.readLong(true);
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public int readInt() throws EOFException {
try {
return input.readInt();
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public int readSmallInt() throws EOFException {
try {
return input.readInt(true);
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public boolean readBoolean() throws EOFException {
try {
return input.readBoolean();
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public String readString() throws EOFException {
return readNullableString();
}
@Override
public String readNullableString() throws EOFException {
try {
int idx = readInt();
if (idx == -1) {
return null;
}
if (strings == null) {
strings = new String[INITIAL_CAPACITY];
}
String string = null;
if (idx >= strings.length) {
String[] grow = new String[strings.length * 3 / 2];
System.arraycopy(strings, 0, grow, 0, strings.length);
strings = grow;
} else {
string = strings[idx];
}
if (string == null) {
string = input.readString();
strings[idx] = string;
}
return string;
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
/**
* Returns the total number of bytes consumed by this decoder. Some additional bytes may also be buffered by this decoder but have not been consumed.
*/
public long getReadPosition() {
return input.total() + extraSkipped;
}
@Override
public void close() throws IOException {
strings = null;
input.close();
}
}

128
test/random_access/src/main/java/seaweedfs/client/btree/serialize/kryo/StringDeduplicatingKryoBackedEncoder.java

@ -0,0 +1,128 @@
/*
* Copyright 2018 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize.kryo;
import com.esotericsoftware.kryo.io.Output;
import com.google.common.collect.Maps;
import seaweedfs.client.btree.serialize.AbstractEncoder;
import seaweedfs.client.btree.serialize.FlushableEncoder;
import javax.annotation.Nullable;
import java.io.Closeable;
import java.io.OutputStream;
import java.util.Map;
public class StringDeduplicatingKryoBackedEncoder extends AbstractEncoder implements FlushableEncoder, Closeable {
private Map<String, Integer> strings;
private final Output output;
public StringDeduplicatingKryoBackedEncoder(OutputStream outputStream) {
this(outputStream, 4096);
}
public StringDeduplicatingKryoBackedEncoder(OutputStream outputStream, int bufferSize) {
output = new Output(outputStream, bufferSize);
}
@Override
public void writeByte(byte value) {
output.writeByte(value);
}
@Override
public void writeBytes(byte[] bytes, int offset, int count) {
output.writeBytes(bytes, offset, count);
}
@Override
public void writeLong(long value) {
output.writeLong(value);
}
@Override
public void writeSmallLong(long value) {
output.writeLong(value, true);
}
@Override
public void writeInt(int value) {
output.writeInt(value);
}
@Override
public void writeSmallInt(int value) {
output.writeInt(value, true);
}
@Override
public void writeBoolean(boolean value) {
output.writeBoolean(value);
}
@Override
public void writeString(CharSequence value) {
if (value == null) {
throw new IllegalArgumentException("Cannot encode a null string.");
}
writeNullableString(value);
}
@Override
public void writeNullableString(@Nullable CharSequence value) {
if (value == null) {
output.writeInt(-1);
return;
} else {
if (strings == null) {
strings = Maps.newHashMapWithExpectedSize(1024);
}
}
String key = value.toString();
Integer index = strings.get(key);
if (index == null) {
index = strings.size();
output.writeInt(index);
strings.put(key, index);
output.writeString(key);
} else {
output.writeInt(index);
}
}
/**
* Returns the total number of bytes written by this encoder, some of which may still be buffered.
*/
public long getWritePosition() {
return output.total();
}
@Override
public void flush() {
output.flush();
}
@Override
public void close() {
output.close();
}
public void done() {
strings = null;
}
}

51
test/random_access/src/main/java/seaweedfs/client/btree/serialize/kryo/TypeSafeSerializer.java

@ -0,0 +1,51 @@
/*
* Copyright 2012 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize.kryo;
import seaweedfs.client.btree.serialize.*;
public class TypeSafeSerializer<T> implements StatefulSerializer<Object> {
private final Class<T> type;
private final StatefulSerializer<T> serializer;
public TypeSafeSerializer(Class<T> type, StatefulSerializer<T> serializer) {
this.type = type;
this.serializer = serializer;
}
@Override
public ObjectReader<Object> newReader(Decoder decoder) {
final ObjectReader<T> reader = serializer.newReader(decoder);
return new ObjectReader<Object>() {
@Override
public Object read() throws Exception {
return reader.read();
}
};
}
@Override
public ObjectWriter<Object> newWriter(Encoder encoder) {
final ObjectWriter<T> writer = serializer.newWriter(encoder);
return new ObjectWriter<Object>() {
@Override
public void write(Object value) throws Exception {
writer.write(type.cast(value));
}
};
}
}

476
test/random_access/src/test/java/seaweedfs/client/btree/BTreePersistentIndexedCacheTest.java

@ -0,0 +1,476 @@
/*
* Copyright 2010 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import seaweedfs.client.btree.serialize.DefaultSerializer;
import seaweedfs.client.btree.serialize.Serializer;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import static org.hamcrest.CoreMatchers.*;
import static org.junit.Assert.assertNull;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.Assert.assertTrue;
public class BTreePersistentIndexedCacheTest {
private final Serializer<String> stringSerializer = new DefaultSerializer<String>();
private final Serializer<Integer> integerSerializer = new DefaultSerializer<Integer>();
private BTreePersistentIndexedCache<String, Integer> cache;
private File cacheFile;
@Before
public void setup() {
cacheFile = tmpDirFile("cache.bin");
}
public File tmpDirFile(String filename) {
File f = new File("/Users/chris/tmp/mm/dev/btree_test");
// File f = new File("/tmp/btree_test");
f.mkdirs();
return new File(f, filename);
}
private void createCache() {
cache = new BTreePersistentIndexedCache<String, Integer>(cacheFile, stringSerializer, integerSerializer, (short) 4, 100);
}
private void verifyAndCloseCache() {
cache.verify();
cache.close();
}
@Test
public void getReturnsNullWhenEntryDoesNotExist() {
createCache();
assertNull(cache.get("unknown"));
verifyAndCloseCache();
}
@Test
public void persistsAddedEntries() {
createCache();
checkAdds(1, 2, 3, 4, 5);
verifyAndCloseCache();
}
@Test
public void persistsAddedEntriesInReverseOrder() {
createCache();
checkAdds(5, 4, 3, 2, 1);
verifyAndCloseCache();
}
@Test
public void persistsAddedEntriesOverMultipleIndexBlocks() {
createCache();
checkAdds(3, 2, 11, 5, 7, 1, 10, 8, 9, 4, 6, 0);
verifyAndCloseCache();
}
@Test
public void persistsUpdates() {
createCache();
checkUpdates(3, 2, 11, 5, 7, 1, 10, 8, 9, 4, 6, 0);
verifyAndCloseCache();
}
@Test
public void handlesUpdatesWhenBlockSizeDecreases() {
BTreePersistentIndexedCache<String, List<Integer>> cache =
new BTreePersistentIndexedCache<String, List<Integer>>(
tmpDirFile("listcache.bin"), stringSerializer,
new DefaultSerializer<List<Integer>>(), (short) 4, 100);
List<Integer> values = Arrays.asList(3, 2, 11, 5, 7, 1, 10, 8, 9, 4, 6, 0);
Map<Integer, List<Integer>> updated = new LinkedHashMap<Integer, List<Integer>>();
for (int i = 10; i > 0; i--) {
for (Integer value : values) {
String key = String.format("key_%d", value);
List<Integer> newValue = new ArrayList<Integer>(i);
for (int j = 0; j < i * 2; j++) {
newValue.add(j);
}
cache.put(key, newValue);
updated.put(value, newValue);
}
checkListEntries(cache, updated);
}
cache.reset();
checkListEntries(cache, updated);
cache.verify();
cache.close();
}
private void checkListEntries(BTreePersistentIndexedCache<String, List<Integer>> cache, Map<Integer, List<Integer>> updated) {
for (Map.Entry<Integer, List<Integer>> entry : updated.entrySet()) {
String key = String.format("key_%d", entry.getKey());
assertThat(cache.get(key), equalTo(entry.getValue()));
}
}
@Test
public void handlesUpdatesWhenBlockSizeIncreases() {
BTreePersistentIndexedCache<String, List<Integer>> cache =
new BTreePersistentIndexedCache<String, List<Integer>>(
tmpDirFile("listcache.bin"), stringSerializer,
new DefaultSerializer<List<Integer>>(), (short) 4, 100);
List<Integer> values = Arrays.asList(3, 2, 11, 5, 7, 1, 10, 8, 9, 4, 6, 0);
Map<Integer, List<Integer>> updated = new LinkedHashMap<Integer, List<Integer>>();
for (int i = 1; i < 10; i++) {
for (Integer value : values) {
String key = String.format("key_%d", value);
List<Integer> newValue = new ArrayList<Integer>(i);
for (int j = 0; j < i * 2; j++) {
newValue.add(j);
}
cache.put(key, newValue);
updated.put(value, newValue);
}
checkListEntries(cache, updated);
}
cache.reset();
checkListEntries(cache, updated);
cache.verify();
cache.close();
}
@Test
public void persistsAddedEntriesAfterReopen() {
createCache();
checkAdds(1, 2, 3, 4);
cache.reset();
checkAdds(5, 6, 7, 8);
verifyAndCloseCache();
}
@Test
public void persistsReplacedEntries() {
createCache();
cache.put("key_1", 1);
cache.put("key_2", 2);
cache.put("key_3", 3);
cache.put("key_4", 4);
cache.put("key_5", 5);
cache.put("key_1", 1);
cache.put("key_4", 12);
assertThat(cache.get("key_1"), equalTo(1));
assertThat(cache.get("key_2"), equalTo(2));
assertThat(cache.get("key_3"), equalTo(3));
assertThat(cache.get("key_4"), equalTo(12));
assertThat(cache.get("key_5"), equalTo(5));
cache.reset();
assertThat(cache.get("key_1"), equalTo(1));
assertThat(cache.get("key_2"), equalTo(2));
assertThat(cache.get("key_3"), equalTo(3));
assertThat(cache.get("key_4"), equalTo(12));
assertThat(cache.get("key_5"), equalTo(5));
verifyAndCloseCache();
}
@Test
public void reusesEmptySpaceWhenPuttingEntries() {
BTreePersistentIndexedCache<String, String> cache = new BTreePersistentIndexedCache<String, String>(cacheFile, stringSerializer, stringSerializer, (short) 4, 100);
long beforeLen = cacheFile.length();
if (beforeLen>0){
System.out.println(String.format("cache %s: %s", "key_new", cache.get("key_new")));
}
cache.put("key_1", "abcd");
cache.put("key_2", "abcd");
cache.put("key_3", "abcd");
cache.put("key_4", "abcd");
cache.put("key_5", "abcd");
long len = cacheFile.length();
assertTrue(len > 0L);
System.out.println(String.format("cache file size %d => %d", beforeLen, len));
cache.put("key_1", "1234");
assertThat(cacheFile.length(), equalTo(len));
cache.remove("key_1");
cache.put("key_new", "a1b2");
assertThat(cacheFile.length(), equalTo(len));
cache.put("key_new", "longer value assertThat(cacheFile.length(), equalTo(len))");
System.out.println(String.format("cache file size %d beforeLen %d", cacheFile.length(), len));
// assertTrue(cacheFile.length() > len);
len = cacheFile.length();
cache.put("key_1", "1234");
assertThat(cacheFile.length(), equalTo(len));
cache.close();
}
@Test
public void canHandleLargeNumberOfEntries() {
createCache();
int count = 2000;
List<Integer> values = new ArrayList<Integer>();
for (int i = 0; i < count; i++) {
values.add(i);
}
checkAddsAndRemoves(null, values);
long len = cacheFile.length();
checkAddsAndRemoves(Collections.reverseOrder(), values);
// need to make this better
assertTrue(cacheFile.length() < (long)(1.4 * len));
checkAdds(values);
// need to make this better
assertTrue(cacheFile.length() < (long) (1.4 * 1.4 * len));
cache.close();
}
@Test
public void persistsRemovalOfEntries() {
createCache();
checkAddsAndRemoves(1, 2, 3, 4, 5);
verifyAndCloseCache();
}
@Test
public void persistsRemovalOfEntriesInReverse() {
createCache();
checkAddsAndRemoves(Collections.<Integer>reverseOrder(), 1, 2, 3, 4, 5);
verifyAndCloseCache();
}
@Test
public void persistsRemovalOfEntriesOverMultipleIndexBlocks() {
createCache();
checkAddsAndRemoves(4, 12, 9, 1, 3, 10, 11, 7, 8, 2, 5, 6);
verifyAndCloseCache();
}
@Test
public void removalRedistributesRemainingEntriesWithLeftSibling() {
createCache();
// Ends up with: 1 2 3 -> 4 <- 5 6
checkAdds(1, 2, 5, 6, 4, 3);
cache.verify();
cache.remove("key_5");
verifyAndCloseCache();
}
@Test
public void removalMergesRemainingEntriesIntoLeftSibling() {
createCache();
// Ends up with: 1 2 -> 3 <- 4 5
checkAdds(1, 2, 4, 5, 3);
cache.verify();
cache.remove("key_4");
verifyAndCloseCache();
}
@Test
public void removalRedistributesRemainingEntriesWithRightSibling() {
createCache();
// Ends up with: 1 2 -> 3 <- 4 5 6
checkAdds(1, 2, 4, 5, 3, 6);
cache.verify();
cache.remove("key_2");
verifyAndCloseCache();
}
@Test
public void removalMergesRemainingEntriesIntoRightSibling() {
createCache();
// Ends up with: 1 2 -> 3 <- 4 5
checkAdds(1, 2, 4, 5, 3);
cache.verify();
cache.remove("key_2");
verifyAndCloseCache();
}
@Test
public void handlesOpeningATruncatedCacheFile() throws IOException {
BTreePersistentIndexedCache<String, Integer> cache = new BTreePersistentIndexedCache<String, Integer>(cacheFile, stringSerializer, integerSerializer);
assertNull(cache.get("key_1"));
cache.put("key_1", 99);
RandomAccessFile file = new RandomAccessFile(cacheFile, "rw");
file.setLength(file.length() - 10);
file.close();
cache.reset();
assertNull(cache.get("key_1"));
cache.verify();
cache.close();
}
@Test
public void canUseFileAsKey() {
BTreePersistentIndexedCache<File, Integer> cache = new BTreePersistentIndexedCache<File, Integer>(cacheFile, new DefaultSerializer<File>(), integerSerializer);
cache.put(new File("file"), 1);
cache.put(new File("dir/file"), 2);
cache.put(new File("File"), 3);
assertThat(cache.get(new File("file")), equalTo(1));
assertThat(cache.get(new File("dir/file")), equalTo(2));
assertThat(cache.get(new File("File")), equalTo(3));
cache.close();
}
@Test
public void handlesKeysWithSameHashCode() {
createCache();
String key1 = new String(new byte[]{2, 31});
String key2 = new String(new byte[]{1, 62});
cache.put(key1, 1);
cache.put(key2, 2);
assertThat(cache.get(key1), equalTo(1));
assertThat(cache.get(key2), equalTo(2));
cache.close();
}
private void checkAdds(Integer... values) {
checkAdds(Arrays.asList(values));
}
private Map<String, Integer> checkAdds(Iterable<Integer> values) {
Map<String, Integer> added = new LinkedHashMap<String, Integer>();
for (Integer value : values) {
String key = String.format("key_%d", value);
cache.put(key, value);
added.put(String.format("key_%d", value), value);
}
for (Map.Entry<String, Integer> entry : added.entrySet()) {
assertThat(cache.get(entry.getKey()), equalTo(entry.getValue()));
}
cache.reset();
for (Map.Entry<String, Integer> entry : added.entrySet()) {
assertThat(cache.get(entry.getKey()), equalTo(entry.getValue()));
}
return added;
}
private void checkUpdates(Integer... values) {
checkUpdates(Arrays.asList(values));
}
private Map<Integer, Integer> checkUpdates(Iterable<Integer> values) {
Map<Integer, Integer> updated = new LinkedHashMap<Integer, Integer>();
for (int i = 0; i < 10; i++) {
for (Integer value : values) {
String key = String.format("key_%d", value);
int newValue = value + (i * 100);
cache.put(key, newValue);
updated.put(value, newValue);
}
for (Map.Entry<Integer, Integer> entry : updated.entrySet()) {
String key = String.format("key_%d", entry.getKey());
assertThat(cache.get(key), equalTo(entry.getValue()));
}
}
cache.reset();
for (Map.Entry<Integer, Integer> entry : updated.entrySet()) {
String key = String.format("key_%d", entry.getKey());
assertThat(cache.get(key), equalTo(entry.getValue()));
}
return updated;
}
private void checkAddsAndRemoves(Integer... values) {
checkAddsAndRemoves(null, values);
}
private void checkAddsAndRemoves(Comparator<Integer> comparator, Integer... values) {
checkAddsAndRemoves(comparator, Arrays.asList(values));
}
private void checkAddsAndRemoves(Comparator<Integer> comparator, Collection<Integer> values) {
checkAdds(values);
List<Integer> deleteValues = new ArrayList<Integer>(values);
Collections.sort(deleteValues, comparator);
for (Integer value : deleteValues) {
String key = String.format("key_%d", value);
assertThat(cache.get(key), notNullValue());
cache.remove(key);
assertThat(cache.get(key), nullValue());
}
cache.reset();
cache.verify();
for (Integer value : deleteValues) {
String key = String.format("key_%d", value);
assertThat(cache.get(key), nullValue());
}
}
}

143
test/random_access/src/test/java/seaweedfs/file/MmapFileTest.java

@ -0,0 +1,143 @@
package seaweedfs.file;
import org.junit.Test;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
public class MmapFileTest {
static File dir = new File("/Users/chris/tmp/mm/dev");
@Test
public void testMmap() {
try {
System.out.println("starting ...");
File f = new File(dir, "mmap_file.txt");
RandomAccessFile raf = new RandomAccessFile(f, "rw");
FileChannel fc = raf.getChannel();
MappedByteBuffer mbf = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size());
fc.close();
raf.close();
FileOutputStream fos = new FileOutputStream(f);
fos.write("abcdefg".getBytes());
fos.close();
System.out.println("completed!");
} catch (Exception e) {
e.printStackTrace();
}
}
@Test
public void testBigMmap() throws IOException {
/*
// new file
I0817 09:48:02 25175 dir.go:147] create /dev/mmap_big.txt: OpenReadWrite+OpenCreate
I0817 09:48:02 25175 wfs.go:116] AcquireHandle /dev/mmap_big.txt uid=502 gid=20
I0817 09:48:02 25175 file.go:62] file Attr /dev/mmap_big.txt, open:1, size: 0
I0817 09:48:02 25175 meta_cache_subscribe.go:32] creating /dev/mmap_big.txt
//get channel
I0817 09:48:26 25175 file.go:62] file Attr /dev/mmap_big.txt, open:1, size: 0
I0817 09:48:32 25175 file.go:62] file Attr /dev/mmap_big.txt, open:1, size: 0
I0817 09:48:32 25175 wfs.go:116] AcquireHandle /dev/mmap_big.txt uid=0 gid=0
I0817 09:48:32 25175 filehandle.go:160] Release /dev/mmap_big.txt fh 14968871991130164560
//fileChannel.map
I0817 09:49:18 25175 file.go:62] file Attr /dev/mmap_big.txt, open:1, size: 0
I0817 09:49:18 25175 file.go:112] /dev/mmap_big.txt file setattr set size=262144 chunks=0
I0817 09:49:18 25175 file.go:62] file Attr /dev/mmap_big.txt, open:1, size: 262144
I0817 09:49:18 25175 file.go:62] file Attr /dev/mmap_big.txt, open:1, size: 262144
I0817 09:49:18 25175 file.go:62] file Attr /dev/mmap_big.txt, open:1, size: 262144
// buffer.put
I0817 09:49:49 25175 filehandle.go:57] /dev/mmap_big.txt read fh 14968871991130164560: [0,32768) size 32768 resp.Data len=0 cap=32768
I0817 09:49:49 25175 reader_at.go:113] zero2 [0,32768)
I0817 09:49:50 25175 file.go:62] file Attr /dev/mmap_big.txt, open:1, size: 262144
I0817 09:49:53 25175 file.go:233] /dev/mmap_big.txt fsync file Fsync [ID=0x4 Node=0xe Uid=0 Gid=0 Pid=0] Handle 0x2 Flags 1
//close
I0817 09:50:14 25175 file.go:62] file Attr /dev/mmap_big.txt, open:1, size: 262144
I0817 09:50:14 25175 dirty_page.go:130] saveToStorage /dev/mmap_big.txt 1,315b69812039e5 [0,4096) of 262144 bytes
I0817 09:50:14 25175 file.go:274] /dev/mmap_big.txt existing 0 chunks adds 1 more
I0817 09:50:14 25175 filehandle.go:218] /dev/mmap_big.txt set chunks: 1
I0817 09:50:14 25175 filehandle.go:220] /dev/mmap_big.txt chunks 0: 1,315b69812039e5 [0,4096)
I0817 09:50:14 25175 meta_cache_subscribe.go:23] deleting /dev/mmap_big.txt
I0817 09:50:14 25175 meta_cache_subscribe.go:32] creating /dev/mmap_big.txt
// end of test
I0817 09:50:41 25175 file.go:62] file Attr /dev/mmap_big.txt, open:1, size: 262144
I0817 09:50:41 25175 filehandle.go:160] Release /dev/mmap_big.txt fh 14968871991130164560
*/
// Create file object
File file = new File(dir, "mmap_big.txt");
try (RandomAccessFile randomAccessFile = new RandomAccessFile(file, "rw")) {
// Get file channel in read-write mode
FileChannel fileChannel = randomAccessFile.getChannel();
// Get direct byte buffer access using channel.map() operation
MappedByteBuffer buffer = fileChannel.map(FileChannel.MapMode.READ_WRITE, 0, 4096 * 8 * 8);
//Write the content using put methods
buffer.put("howtodoinjava.com".getBytes());
}
/*
> meta.cat /dev/mmap_big.txt
{
"name": "mmap_big.txt",
"isDirectory": false,
"chunks": [
{
"fileId": "1,315b69812039e5",
"offset": "0",
"size": "4096",
"mtime": "1597683014026365000",
"eTag": "985ab0ac",
"sourceFileId": "",
"fid": {
"volumeId": 1,
"fileKey": "3234665",
"cookie": 2166372837
},
"sourceFid": null,
"cipherKey": null,
"isCompressed": true,
"isChunkManifest": false
}
],
"attributes": {
"fileSize": "262144",
"mtime": "1597683014",
"fileMode": 420,
"uid": 502,
"gid": 20,
"crtime": "1597682882",
"mime": "application/octet-stream",
"replication": "",
"collection": "",
"ttlSec": 0,
"userName": "",
"groupName": [
],
"symlinkTarget": "",
"md5": null
},
"extended": {
}
}
*/
}
}

70
test/random_access/src/test/java/seaweedfs/file/RandomeAccessFileTest.java

@ -0,0 +1,70 @@
package seaweedfs.file;
import org.junit.Assert;
import org.junit.Test;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.util.Random;
public class RandomeAccessFileTest {
@Test
public void testRandomWriteAndRead() throws IOException {
File f = new File(MmapFileTest.dir, "mmap_file.txt");
RandomAccessFile af = new RandomAccessFile(f, "rw");
af.setLength(0);
af.close();
Random r = new Random();
int maxLength = 5000;
byte[] data = new byte[maxLength];
byte[] readData = new byte[maxLength];
for (int i = 4096; i < maxLength; i++) {
RandomAccessFile raf = new RandomAccessFile(f, "rw");
long fileSize = raf.length();
raf.readFully(readData, 0, (int)fileSize);
for (int x=0;x<fileSize;x++){
Assert.assertEquals(data[x], readData[x]);
}
int start = r.nextInt(i);
int stop = r.nextInt(i);
if (start > stop) {
int t = stop;
stop = start;
start = t;
}
if (stop > fileSize) {
fileSize = stop;
raf.setLength(fileSize);
}
randomize(r, data, start, stop);
raf.seek(start);
raf.write(data, start, stop-start);
raf.close();
}
}
private static void randomize(Random r, byte[] bytes, int start, int stop) {
for (int i = start; i < stop; i++) {
int rnd = r.nextInt();
bytes[i] = (byte) rnd;
}
}
}

21
test/s3/basic/basic_test.go

@ -61,7 +61,7 @@ func TestCreateBucket(t *testing.T) {
}
func TestListBuckets(t *testing.T) {
func TestPutObject(t *testing.T) {
input := &s3.PutObjectInput{
ACL: aws.String("authenticated-read"),
@ -89,7 +89,7 @@ func TestListBuckets(t *testing.T) {
}
func TestPutObject(t *testing.T) {
func TestListBucket(t *testing.T) {
result, err := svc.ListBuckets(nil)
if err != nil {
@ -105,6 +105,23 @@ func TestPutObject(t *testing.T) {
}
func TestListObjectV2(t *testing.T) {
listObj, err := svc.ListObjectsV2(&s3.ListObjectsV2Input{
Bucket: aws.String(Bucket),
Prefix: aws.String("foo"),
Delimiter: aws.String("/"),
})
if err != nil {
exitErrorf("Unable to list objects, %v", err)
}
for _, content := range listObj.Contents {
fmt.Println(aws.StringValue(content.Key))
}
fmt.Printf("list: %s\n", listObj)
}
func exitErrorf(msg string, args ...interface{}) {
fmt.Fprintf(os.Stderr, msg+"\n", args...)
os.Exit(1)

175
test/s3/multipart/aws_upload.go

@ -0,0 +1,175 @@
package main
// copied from https://github.com/apoorvam/aws-s3-multipart-upload
import (
"bytes"
"flag"
"fmt"
"net/http"
"os"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/awserr"
"github.com/aws/aws-sdk-go/aws/credentials"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3"
)
const (
maxPartSize = int64(5 * 1024 * 1024)
maxRetries = 3
awsAccessKeyID = "Your access key"
awsSecretAccessKey = "Your secret key"
awsBucketRegion = "S3 bucket region"
awsBucketName = "newBucket"
)
var (
filename = flag.String("f", "", "the file name")
)
func main() {
flag.Parse()
creds := credentials.NewStaticCredentials(awsAccessKeyID, awsSecretAccessKey, "")
_, err := creds.Get()
if err != nil {
fmt.Printf("bad credentials: %s", err)
}
cfg := aws.NewConfig().WithRegion(awsBucketRegion).WithCredentials(creds).WithDisableSSL(true).WithEndpoint("localhost:8333")
svc := s3.New(session.New(), cfg)
file, err := os.Open(*filename)
if err != nil {
fmt.Printf("err opening file: %s", err)
return
}
defer file.Close()
fileInfo, _ := file.Stat()
size := fileInfo.Size()
buffer := make([]byte, size)
fileType := http.DetectContentType(buffer)
file.Read(buffer)
path := "/media/" + file.Name()
input := &s3.CreateMultipartUploadInput{
Bucket: aws.String(awsBucketName),
Key: aws.String(path),
ContentType: aws.String(fileType),
}
resp, err := svc.CreateMultipartUpload(input)
if err != nil {
fmt.Println(err.Error())
return
}
fmt.Println("Created multipart upload request")
var curr, partLength int64
var remaining = size
var completedParts []*s3.CompletedPart
partNumber := 1
for curr = 0; remaining != 0; curr += partLength {
if remaining < maxPartSize {
partLength = remaining
} else {
partLength = maxPartSize
}
completedPart, err := uploadPart(svc, resp, buffer[curr:curr+partLength], partNumber)
if err != nil {
fmt.Println(err.Error())
err := abortMultipartUpload(svc, resp)
if err != nil {
fmt.Println(err.Error())
}
return
}
remaining -= partLength
partNumber++
completedParts = append(completedParts, completedPart)
}
// list parts
parts, err := svc.ListParts(&s3.ListPartsInput{
Bucket: input.Bucket,
Key: input.Key,
MaxParts: nil,
PartNumberMarker: nil,
RequestPayer: nil,
UploadId: resp.UploadId,
})
if err != nil {
fmt.Println(err.Error())
return
}
fmt.Printf("list parts: %d\n", len(parts.Parts))
for i, part := range parts.Parts {
fmt.Printf("part %d: %v\n", i, part)
}
completeResponse, err := completeMultipartUpload(svc, resp, completedParts)
if err != nil {
fmt.Println(err.Error())
return
}
fmt.Printf("Successfully uploaded file: %s\n", completeResponse.String())
}
func completeMultipartUpload(svc *s3.S3, resp *s3.CreateMultipartUploadOutput, completedParts []*s3.CompletedPart) (*s3.CompleteMultipartUploadOutput, error) {
completeInput := &s3.CompleteMultipartUploadInput{
Bucket: resp.Bucket,
Key: resp.Key,
UploadId: resp.UploadId,
MultipartUpload: &s3.CompletedMultipartUpload{
Parts: completedParts,
},
}
return svc.CompleteMultipartUpload(completeInput)
}
func uploadPart(svc *s3.S3, resp *s3.CreateMultipartUploadOutput, fileBytes []byte, partNumber int) (*s3.CompletedPart, error) {
tryNum := 1
partInput := &s3.UploadPartInput{
Body: bytes.NewReader(fileBytes),
Bucket: resp.Bucket,
Key: resp.Key,
PartNumber: aws.Int64(int64(partNumber)),
UploadId: resp.UploadId,
ContentLength: aws.Int64(int64(len(fileBytes))),
}
for tryNum <= maxRetries {
uploadResult, err := svc.UploadPart(partInput)
if err != nil {
if tryNum == maxRetries {
if aerr, ok := err.(awserr.Error); ok {
return nil, aerr
}
return nil, err
}
fmt.Printf("Retrying to upload part #%v\n", partNumber)
tryNum++
} else {
fmt.Printf("Uploaded part #%v\n", partNumber)
return &s3.CompletedPart{
ETag: uploadResult.ETag,
PartNumber: aws.Int64(int64(partNumber)),
}, nil
}
}
return nil, nil
}
func abortMultipartUpload(svc *s3.S3, resp *s3.CreateMultipartUploadOutput) error {
fmt.Println("Aborting multipart upload for UploadId#" + *resp.UploadId)
abortInput := &s3.AbortMultipartUploadInput{
Bucket: resp.Bucket,
Key: resp.Key,
UploadId: resp.UploadId,
}
_, err := svc.AbortMultipartUpload(abortInput)
return err
}

10
unmaintained/diff_volume_servers/diff_volume_servers.go

@ -118,13 +118,15 @@ const (
type needleState struct {
state uint8
size uint32
size types.Size
}
func getVolumeFiles(v uint32, addr string) (map[types.NeedleId]needleState, int64, error) {
var idxFile *bytes.Reader
err := operation.WithVolumeServerClient(addr, grpcDialOption, func(vs volume_server_pb.VolumeServerClient) error {
copyFileClient, err := vs.CopyFile(context.Background(), &volume_server_pb.CopyFileRequest{
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
copyFileClient, err := vs.CopyFile(ctx, &volume_server_pb.CopyFileRequest{
VolumeId: v,
Ext: ".idx",
CompactionRevision: math.MaxUint32,
@ -154,8 +156,8 @@ func getVolumeFiles(v uint32, addr string) (map[types.NeedleId]needleState, int6
var maxOffset int64
files := map[types.NeedleId]needleState{}
err = idx.WalkIndexFile(idxFile, func(key types.NeedleId, offset types.Offset, size uint32) error {
if offset.IsZero() || size == types.TombstoneFileSize {
err = idx.WalkIndexFile(idxFile, func(key types.NeedleId, offset types.Offset, size types.Size) error {
if offset.IsZero() || size.IsDeleted() {
files[key] = needleState{
state: stateDeleted,
size: size,

4
unmaintained/fix_dat/fix_dat.go

@ -98,7 +98,7 @@ func iterateEntries(datBackend backend.BackendStorageFile, idxFile *os.File, vis
// parse index file entry
key := util.BytesToUint64(bytes[0:8])
offsetFromIndex := util.BytesToUint32(bytes[8:12])
sizeFromIndex := util.BytesToUint32(bytes[12:16])
sizeFromIndex := types.BytesToSize(bytes[12:16])
count, _ = idxFile.ReadAt(bytes, readerOffset)
readerOffset += int64(count)
@ -123,7 +123,7 @@ func iterateEntries(datBackend backend.BackendStorageFile, idxFile *os.File, vis
}
}()
if n.Size <= n.DataSize {
if n.Size <= types.Size(n.DataSize) {
continue
}
visitNeedle(n, offset)

73
unmaintained/s3/presigned_put/presigned_put.go

@ -0,0 +1,73 @@
package main
import (
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3"
"encoding/base64"
"fmt"
"crypto/md5"
"strings"
"time"
"net/http"
)
// Downloads an item from an S3 Bucket in the region configured in the shared config
// or AWS_REGION environment variable.
//
// Usage:
// go run presigned_put.go
// For this exampl to work, the domainName is needd
// weed s3 -domainName=localhost
func main() {
h := md5.New()
content := strings.NewReader(stringContent)
content.WriteTo(h)
// Initialize a session in us-west-2 that the SDK will use to load
// credentials from the shared credentials file ~/.aws/credentials.
sess, err := session.NewSession(&aws.Config{
Region: aws.String("us-east-1"),
Endpoint: aws.String("http://localhost:8333"),
})
// Create S3 service client
svc := s3.New(sess)
putRequest, output := svc.PutObjectRequest(&s3.PutObjectInput{
Bucket: aws.String("dev"),
Key: aws.String("testKey"),
})
fmt.Printf("output: %+v\n", output)
md5s := base64.StdEncoding.EncodeToString(h.Sum(nil))
putRequest.HTTPRequest.Header.Set("Content-MD5", md5s)
url, err := putRequest.Presign(15 * time.Minute)
if err != nil {
fmt.Println("error presigning request", err)
return
}
fmt.Println(url)
req, err := http.NewRequest("PUT", url, strings.NewReader(stringContent))
req.Header.Set("Content-MD5", md5s)
if err != nil {
fmt.Println("error creating request", url)
return
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
fmt.Printf("error put request: %v\n", err)
return
}
fmt.Printf("response: %+v\n", resp)
}
var stringContent = `Generate a Pre-Signed URL for an Amazon S3 PUT Operation with a Specific Payload
You can generate a pre-signed URL for a PUT operation that checks whether users upload the correct content. When the SDK pre-signs a request, it computes the checksum of the request body and generates an MD5 checksum that is included in the pre-signed URL. Users must upload the same content that produces the same MD5 checksum generated by the SDK; otherwise, the operation fails. This is not the Content-MD5, but the signature. To enforce Content-MD5, simply add the header to the request.
The following example adds a Body field to generate a pre-signed PUT operation that requires a specific payload to be uploaded by users.
`

83
unmaintained/see_dat/see_dat_gzip.go

@ -1,83 +0,0 @@
package main
import (
"bytes"
"compress/gzip"
"crypto/md5"
"flag"
"io"
"io/ioutil"
"net/http"
"time"
"github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/storage"
"github.com/chrislusf/seaweedfs/weed/storage/needle"
"github.com/chrislusf/seaweedfs/weed/storage/super_block"
"github.com/chrislusf/seaweedfs/weed/util"
)
type VolumeFileScanner4SeeDat struct {
version needle.Version
}
func (scanner *VolumeFileScanner4SeeDat) VisitSuperBlock(superBlock super_block.SuperBlock) error {
scanner.version = superBlock.Version
return nil
}
func (scanner *VolumeFileScanner4SeeDat) ReadNeedleBody() bool {
return true
}
var (
files = int64(0)
filebytes = int64(0)
diffbytes = int64(0)
)
func Compresssion(data []byte) float64 {
if len(data) <= 128 {
return 100.0
}
compressed, _ := util.GzipData(data[0:128])
return float64(len(compressed)*10) / 1280.0
}
func (scanner *VolumeFileScanner4SeeDat) VisitNeedle(n *needle.Needle, offset int64, needleHeader, needleBody []byte) error {
t := time.Unix(int64(n.AppendAtNs)/int64(time.Second), int64(n.AppendAtNs)%int64(time.Second))
glog.V(0).Info("----------------------------------------------------------------------------------")
glog.V(0).Infof("%d,%s%x offset %d size %d(%s) cookie %x appendedAt %v hasmime[%t] mime[%s] (len: %d)",
*volumeId, n.Id, n.Cookie, offset, n.Size, util.BytesToHumanReadable(uint64(n.Size)), n.Cookie, t, n.HasMime(), string(n.Mime), len(n.Mime))
r, err := gzip.NewReader(bytes.NewReader(n.Data))
if err == nil {
buf := bytes.Buffer{}
h := md5.New()
c, _ := io.Copy(&buf, r)
d := buf.Bytes()
io.Copy(h, bytes.NewReader(d))
diff := (int64(n.DataSize) - int64(c))
diffbytes += diff
glog.V(0).Infof("was gzip! stored_size: %d orig_size: %d diff: %d(%d) mime:%s compression-of-128: %.2f md5: %x", n.DataSize, c, diff, diffbytes, http.DetectContentType(d), Compresssion(d), h.Sum(nil))
} else {
glog.V(0).Infof("no gzip!")
}
return nil
}
var (
_ = ioutil.ReadAll
volumePath = flag.String("dir", "/tmp", "data directory to store files")
volumeCollection = flag.String("collection", "", "the volume collection name")
volumeId = flag.Int("volumeId", -1, "a volume id. The volume should already exist in the dir. The volume index file should not exist.")
)
func main() {
flag.Parse()
vid := needle.VolumeId(*volumeId)
glog.V(0).Info("Starting")
scanner := &VolumeFileScanner4SeeDat{}
err := storage.ScanVolumeFile(*volumePath, *volumeCollection, vid, storage.NeedleMapInMemory, scanner)
if err != nil {
glog.Fatalf("Reading Volume File [ERROR] %s\n", err)
}
}

2
unmaintained/see_idx/see_idx.go

@ -36,7 +36,7 @@ func main() {
}
defer indexFile.Close()
idx.WalkIndexFile(indexFile, func(key types.NeedleId, offset types.Offset, size uint32) error {
idx.WalkIndexFile(indexFile, func(key types.NeedleId, offset types.Offset, size types.Size) error {
fmt.Printf("key:%v offset:%v size:%v(%v)\n", key, offset, size, util.BytesToHumanReadable(uint64(size)))
return nil
})

4
unmaintained/see_log_entry/see_log_entry.go

@ -9,13 +9,13 @@ import (
"github.com/golang/protobuf/proto"
"github.com/chrislusf/seaweedfs/weed/filer2"
"github.com/chrislusf/seaweedfs/weed/filer"
"github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
"github.com/chrislusf/seaweedfs/weed/util"
)
var (
logdataFile = flag.String("logdata", "", "log data file saved under "+ filer2.SystemLogDir)
logdataFile = flag.String("logdata", "", "log data file saved under "+ filer.SystemLogDir)
)
func main() {

27
weed/Makefile

@ -0,0 +1,27 @@
BINARY = weed
SOURCE_DIR = .
all: debug_mount
.PHONY : clean debug_mount
clean:
go clean $(SOURCE_DIR)
rm -f $(BINARY)
debug_shell:
go build -gcflags="all=-N -l"
dlv --listen=:2345 --headless=true --api-version=2 --accept-multiclient exec weed -- shell
debug_mount:
go build -gcflags="all=-N -l"
dlv --listen=:2345 --headless=true --api-version=2 --accept-multiclient exec weed -- mount -dir=~/tmp/mm
debug_server:
go build -gcflags="all=-N -l"
dlv --listen=:2345 --headless=true --api-version=2 --accept-multiclient exec weed -- server -dir=/Volumes/mobile_disk/99 -filer -volume.port=8343 -s3 -volume.max=0
debug_volume:
go build -gcflags="all=-N -l"
dlv --listen=:2345 --headless=true --api-version=2 --accept-multiclient exec weed -- volume -dir=/Volumes/mobile_disk/100 -port 8564 -max=30 -preStopSeconds=2

1
weed/command/command.go

@ -16,6 +16,7 @@ var Commands = []*Command{
cmdExport,
cmdFiler,
cmdFilerReplicate,
cmdFilerSynchronize,
cmdFix,
cmdMaster,
cmdMount,

14
weed/command/download.go

@ -4,6 +4,7 @@ import (
"fmt"
"io"
"io/ioutil"
"net/http"
"os"
"path"
"strings"
@ -59,7 +60,7 @@ func downloadToFile(server, fileId, saveDir string) error {
if err != nil {
return err
}
defer rc.Close()
defer util.CloseResponse(rc)
if filename == "" {
filename = fileId
}
@ -71,12 +72,11 @@ func downloadToFile(server, fileId, saveDir string) error {
}
f, err := os.OpenFile(path.Join(saveDir, filename), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, os.ModePerm)
if err != nil {
io.Copy(ioutil.Discard, rc)
return err
}
defer f.Close()
if isFileList {
content, err := ioutil.ReadAll(rc)
content, err := ioutil.ReadAll(rc.Body)
if err != nil {
return err
}
@ -95,7 +95,7 @@ func downloadToFile(server, fileId, saveDir string) error {
}
}
} else {
if _, err = io.Copy(f, rc); err != nil {
if _, err = io.Copy(f, rc.Body); err != nil {
return err
}
@ -108,12 +108,12 @@ func fetchContent(server string, fileId string) (filename string, content []byte
if lookupError != nil {
return "", nil, lookupError
}
var rc io.ReadCloser
var rc *http.Response
if filename, _, rc, e = util.DownloadFile(fileUrl); e != nil {
return "", nil, e
}
content, e = ioutil.ReadAll(rc)
rc.Close()
defer util.CloseResponse(rc)
content, e = ioutil.ReadAll(rc.Body)
return
}

24
weed/command/export.go

@ -23,7 +23,7 @@ import (
)
const (
defaultFnFormat = `{{.Mime}}/{{.Id}}:{{.Name}}`
defaultFnFormat = `{{.Id}}_{{.Name}}{{.Ext}}`
timeFormat = "2006-01-02T15:04:05"
)
@ -56,7 +56,7 @@ func init() {
var (
output = cmdExport.Flag.String("o", "", "output tar file name, must ends with .tar, or just a \"-\" for stdout")
format = cmdExport.Flag.String("fileNameFormat", defaultFnFormat, "filename formatted with {{.Mime}} {{.Id}} {{.Name}} {{.Ext}}")
format = cmdExport.Flag.String("fileNameFormat", defaultFnFormat, "filename formatted with {{.Id}} {{.Name}} {{.Ext}}")
newer = cmdExport.Flag.String("newer", "", "export only files newer than this time, default is all files. Must be specified in RFC3339 without timezone, e.g. 2006-01-02T15:04:05")
showDeleted = cmdExport.Flag.Bool("deleted", false, "export deleted files. only applies if -o is not specified")
limit = cmdExport.Flag.Int("limit", 0, "only show first n entries if specified")
@ -70,13 +70,13 @@ var (
localLocation, _ = time.LoadLocation("Local")
)
func printNeedle(vid needle.VolumeId, n *needle.Needle, version needle.Version, deleted bool) {
func printNeedle(vid needle.VolumeId, n *needle.Needle, version needle.Version, deleted bool, offset int64, onDiskSize int64) {
key := needle.NewFileIdFromNeedle(vid, n).String()
size := n.DataSize
size := int32(n.DataSize)
if version == needle.Version1 {
size = n.Size
size = int32(n.Size)
}
fmt.Printf("%s\t%s\t%d\t%t\t%s\t%s\t%s\t%t\n",
fmt.Printf("%s\t%s\t%d\t%t\t%s\t%s\t%s\t%t\t%d\t%d\n",
key,
n.Name,
size,
@ -85,6 +85,8 @@ func printNeedle(vid needle.VolumeId, n *needle.Needle, version needle.Version,
n.LastModifiedString(),
n.Ttl.String(),
deleted,
offset,
offset+onDiskSize,
)
}
@ -111,7 +113,7 @@ func (scanner *VolumeFileScanner4Export) VisitNeedle(n *needle.Needle, offset in
nv, ok := needleMap.Get(n.Id)
glog.V(3).Infof("key %d offset %d size %d disk_size %d compressed %v ok %v nv %+v",
n.Id, offset, n.Size, n.DiskSize(scanner.version), n.IsCompressed(), ok, nv)
if ok && nv.Size > 0 && nv.Size != types.TombstoneFileSize && nv.Offset.ToAcutalOffset() == offset {
if *showDeleted && n.Size > 0 || ok && nv.Size.IsValid() && nv.Offset.ToAcutalOffset() == offset {
if newerThanUnix >= 0 && n.HasLastModifiedDate() && n.LastModified < uint64(newerThanUnix) {
glog.V(3).Infof("Skipping this file, as it's old enough: LastModified %d vs %d",
n.LastModified, newerThanUnix)
@ -124,17 +126,17 @@ func (scanner *VolumeFileScanner4Export) VisitNeedle(n *needle.Needle, offset in
if tarOutputFile != nil {
return writeFile(vid, n)
} else {
printNeedle(vid, n, scanner.version, false)
printNeedle(vid, n, scanner.version, false, offset, n.DiskSize(scanner.version))
return nil
}
}
if !ok {
if *showDeleted && tarOutputFile == nil {
if n.DataSize > 0 {
printNeedle(vid, n, scanner.version, true)
printNeedle(vid, n, scanner.version, true, offset, n.DiskSize(scanner.version))
} else {
n.Name = []byte("*tombstone")
printNeedle(vid, n, scanner.version, true)
printNeedle(vid, n, scanner.version, true, offset, n.DiskSize(scanner.version))
}
}
glog.V(2).Infof("This seems deleted %d size %d", n.Id, n.Size)
@ -208,7 +210,7 @@ func runExport(cmd *Command, args []string) bool {
}
if tarOutputFile == nil {
fmt.Printf("key\tname\tsize\tgzip\tmime\tmodified\tttl\tdeleted\n")
fmt.Printf("key\tname\tsize\tgzip\tmime\tmodified\tttl\tdeleted\tstart\tstop\n")
}
err = storage.ScanVolumeFile(util.ResolvePath(*export.dir), *export.collection, vid, storage.NeedleMapInMemory, volumeFileScanner)

31
weed/command/filer.go

@ -1,6 +1,7 @@
package command
import (
"fmt"
"net/http"
"strconv"
"strings"
@ -13,11 +14,14 @@ import (
"github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
"github.com/chrislusf/seaweedfs/weed/security"
"github.com/chrislusf/seaweedfs/weed/server"
stats_collect "github.com/chrislusf/seaweedfs/weed/stats"
"github.com/chrislusf/seaweedfs/weed/util"
)
var (
f FilerOptions
f FilerOptions
filerStartS3 *bool
filerS3Options S3Options
)
type FilerOptions struct {
@ -36,6 +40,7 @@ type FilerOptions struct {
disableHttp *bool
cipher *bool
peers *string
metricsHttpPort *int
// default leveldb directory, used in "weed server" mode
defaultLevelDbDirectory *string
@ -49,7 +54,7 @@ func init() {
f.bindIp = cmdFiler.Flag.String("ip.bind", "0.0.0.0", "ip address to bind to")
f.port = cmdFiler.Flag.Int("port", 8888, "filer server http listen port")
f.publicPort = cmdFiler.Flag.Int("port.readonly", 0, "readonly port opened to public")
f.defaultReplicaPlacement = cmdFiler.Flag.String("defaultReplicaPlacement", "000", "default replication type if not specified")
f.defaultReplicaPlacement = cmdFiler.Flag.String("defaultReplicaPlacement", "", "default replication type. If not specified, use master setting.")
f.disableDirListing = cmdFiler.Flag.Bool("disableDirListing", false, "turn off directory listing")
f.maxMB = cmdFiler.Flag.Int("maxMB", 32, "split files larger than the limit")
f.dirListingLimit = cmdFiler.Flag.Int("dirListLimit", 100000, "limit sub dir listing size")
@ -57,6 +62,15 @@ func init() {
f.disableHttp = cmdFiler.Flag.Bool("disableHttp", false, "disable http request, only gRpc operations are allowed")
f.cipher = cmdFiler.Flag.Bool("encryptVolumeData", false, "encrypt data on volume servers")
f.peers = cmdFiler.Flag.String("peers", "", "all filers sharing the same filer store in comma separated ip:port list")
f.metricsHttpPort = cmdFiler.Flag.Int("metricsPort", 0, "Prometheus metrics listen port")
// start s3 on filer
filerStartS3 = cmdFiler.Flag.Bool("s3", false, "whether to start S3 gateway")
filerS3Options.port = cmdFiler.Flag.Int("s3.port", 8333, "s3 server http listen port")
filerS3Options.domainName = cmdFiler.Flag.String("s3.domainName", "", "suffix of the host name, {bucket}.{domainName}")
filerS3Options.tlsPrivateKey = cmdFiler.Flag.String("s3.key.file", "", "path to the TLS private key file")
filerS3Options.tlsCertificate = cmdFiler.Flag.String("s3.cert.file", "", "path to the TLS certificate file")
filerS3Options.config = cmdFiler.Flag.String("s3.config", "", "path to the config file")
}
var cmdFiler = &Command{
@ -84,6 +98,17 @@ func runFiler(cmd *Command, args []string) bool {
util.LoadConfiguration("security", false)
go stats_collect.StartMetricsServer(*f.metricsHttpPort)
if *filerStartS3 {
filerAddress := fmt.Sprintf("%s:%d", *f.ip, *f.port)
filerS3Options.filer = &filerAddress
go func() {
time.Sleep(2 * time.Second)
filerS3Options.startS3Server()
}()
}
f.startFiler()
return true
@ -152,7 +177,7 @@ func (fo *FilerOptions) startFiler() {
// starting grpc server
grpcPort := *fo.port + 10000
grpcL, err := util.NewListener(":"+strconv.Itoa(grpcPort), 0)
grpcL, err := util.NewListener(*fo.bindIp+":"+strconv.Itoa(grpcPort), 0)
if err != nil {
glog.Fatalf("failed to listen on grpc port %d: %v", grpcPort, err)
}

2
weed/command/filer_copy.go

@ -72,7 +72,7 @@ var cmdCopy = &Command{
If "maxMB" is set to a positive number, files larger than it would be split into chunks.
`,
`,
}
func runCopy(cmd *Command, args []string) bool {

337
weed/command/filer_sync.go

@ -0,0 +1,337 @@
package command
import (
"context"
"errors"
"fmt"
"github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/pb"
"github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
"github.com/chrislusf/seaweedfs/weed/replication"
"github.com/chrislusf/seaweedfs/weed/replication/sink/filersink"
"github.com/chrislusf/seaweedfs/weed/replication/source"
"github.com/chrislusf/seaweedfs/weed/security"
"github.com/chrislusf/seaweedfs/weed/util"
"github.com/chrislusf/seaweedfs/weed/util/grace"
"google.golang.org/grpc"
"io"
"strings"
"time"
)
type SyncOptions struct {
isActivePassive *bool
filerA *string
filerB *string
aPath *string
bPath *string
aReplication *string
bReplication *string
aCollection *string
bCollection *string
aTtlSec *int
bTtlSec *int
aDebug *bool
bDebug *bool
}
var (
syncOptions SyncOptions
syncCpuProfile *string
syncMemProfile *string
)
func init() {
cmdFilerSynchronize.Run = runFilerSynchronize // break init cycle
syncOptions.isActivePassive = cmdFilerSynchronize.Flag.Bool("isActivePassive", false, "one directional follow if true")
syncOptions.filerA = cmdFilerSynchronize.Flag.String("a", "", "filer A in one SeaweedFS cluster")
syncOptions.filerB = cmdFilerSynchronize.Flag.String("b", "", "filer B in the other SeaweedFS cluster")
syncOptions.aPath = cmdFilerSynchronize.Flag.String("a.path", "/", "directory to sync on filer A")
syncOptions.bPath = cmdFilerSynchronize.Flag.String("b.path", "/", "directory to sync on filer B")
syncOptions.aReplication = cmdFilerSynchronize.Flag.String("a.replication", "", "replication on filer A")
syncOptions.bReplication = cmdFilerSynchronize.Flag.String("b.replication", "", "replication on filer B")
syncOptions.aCollection = cmdFilerSynchronize.Flag.String("a.collection", "", "collection on filer A")
syncOptions.bCollection = cmdFilerSynchronize.Flag.String("b.collection", "", "collection on filer B")
syncOptions.aTtlSec = cmdFilerSynchronize.Flag.Int("a.ttlSec", 0, "ttl in seconds on filer A")
syncOptions.bTtlSec = cmdFilerSynchronize.Flag.Int("b.ttlSec", 0, "ttl in seconds on filer B")
syncOptions.aDebug = cmdFilerSynchronize.Flag.Bool("a.debug", false, "debug mode to print out filer A received files")
syncOptions.bDebug = cmdFilerSynchronize.Flag.Bool("b.debug", false, "debug mode to print out filer B received files")
syncCpuProfile = cmdFilerSynchronize.Flag.String("cpuprofile", "", "cpu profile output file")
syncMemProfile = cmdFilerSynchronize.Flag.String("memprofile", "", "memory profile output file")
}
var cmdFilerSynchronize = &Command{
UsageLine: "filer.sync -a=<oneFilerHost>:<oneFilerPort> -b=<otherFilerHost>:<otherFilerPort>",
Short: "continuously synchronize between two active-active or active-passive SeaweedFS clusters",
Long: `continuously synchronize file changes between two active-active or active-passive filers
filer.sync listens on filer notifications. If any file is updated, it will fetch the updated content,
and write to the other destination. Different from filer.replicate:
* filer.sync only works between two filers.
* filer.sync does not need any special message queue setup.
* filer.sync supports both active-active and active-passive modes.
If restarted, the synchronization will resume from the previous checkpoints, persisted every minute.
A fresh sync will start from the earliest metadata logs.
`,
}
func runFilerSynchronize(cmd *Command, args []string) bool {
grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.client")
grace.SetupProfiling(*syncCpuProfile, *syncMemProfile)
go func() {
for {
err := doSubscribeFilerMetaChanges(grpcDialOption, *syncOptions.filerA, *syncOptions.aPath, *syncOptions.filerB,
*syncOptions.bPath, *syncOptions.bReplication, *syncOptions.bCollection, *syncOptions.bTtlSec, *syncOptions.bDebug)
if err != nil {
glog.Errorf("sync from %s to %s: %v", *syncOptions.filerA, *syncOptions.filerB, err)
time.Sleep(1747 * time.Millisecond)
}
}
}()
if !*syncOptions.isActivePassive {
go func() {
for {
err := doSubscribeFilerMetaChanges(grpcDialOption, *syncOptions.filerB, *syncOptions.bPath, *syncOptions.filerA,
*syncOptions.aPath, *syncOptions.aReplication, *syncOptions.aCollection, *syncOptions.aTtlSec, *syncOptions.aDebug)
if err != nil {
glog.Errorf("sync from %s to %s: %v", *syncOptions.filerB, *syncOptions.filerA, err)
time.Sleep(2147 * time.Millisecond)
}
}
}()
}
select {}
return true
}
func doSubscribeFilerMetaChanges(grpcDialOption grpc.DialOption, sourceFiler, sourcePath, targetFiler, targetPath string,
replicationStr, collection string, ttlSec int, debug bool) error {
// read source filer signature
sourceFilerSignature, sourceErr := replication.ReadFilerSignature(grpcDialOption, sourceFiler)
if sourceErr != nil {
return sourceErr
}
// read target filer signature
targetFilerSignature, targetErr := replication.ReadFilerSignature(grpcDialOption, targetFiler)
if targetErr != nil {
return targetErr
}
// if first time, start from now
// if has previously synced, resume from that point of time
sourceFilerOffsetTsNs, err := readSyncOffset(grpcDialOption, targetFiler, sourceFilerSignature)
if err != nil {
return err
}
glog.V(0).Infof("start sync %s(%d) => %s(%d) from %v(%d)", sourceFiler, sourceFilerSignature, targetFiler, targetFilerSignature, time.Unix(0, sourceFilerOffsetTsNs), sourceFilerOffsetTsNs)
// create filer sink
filerSource := &source.FilerSource{}
filerSource.DoInitialize(pb.ServerToGrpcAddress(sourceFiler), sourcePath)
filerSink := &filersink.FilerSink{}
filerSink.DoInitialize(pb.ServerToGrpcAddress(targetFiler), targetPath, replicationStr, collection, ttlSec, grpcDialOption)
filerSink.SetSourceFiler(filerSource)
processEventFn := func(resp *filer_pb.SubscribeMetadataResponse) error {
message := resp.EventNotification
var sourceOldKey, sourceNewKey util.FullPath
if message.OldEntry != nil {
sourceOldKey = util.FullPath(resp.Directory).Child(message.OldEntry.Name)
}
if message.NewEntry != nil {
sourceNewKey = util.FullPath(message.NewParentPath).Child(message.NewEntry.Name)
}
for _, sig := range message.Signatures {
if sig == targetFilerSignature && targetFilerSignature != 0 {
fmt.Printf("%s skipping %s change to %v\n", targetFiler, sourceFiler, message)
return nil
}
}
if debug {
fmt.Printf("%s check %s change %s,%s sig %v, target sig: %v\n", targetFiler, sourceFiler, sourceOldKey, sourceNewKey, message.Signatures, targetFilerSignature)
}
if !strings.HasPrefix(resp.Directory, sourcePath) {
return nil
}
// handle deletions
if message.OldEntry != nil && message.NewEntry == nil {
if !strings.HasPrefix(string(sourceOldKey), sourcePath) {
return nil
}
key := util.Join(targetPath, string(sourceOldKey)[len(sourcePath):])
return filerSink.DeleteEntry(key, message.OldEntry.IsDirectory, message.DeleteChunks, message.Signatures)
}
// handle new entries
if message.OldEntry == nil && message.NewEntry != nil {
if !strings.HasPrefix(string(sourceNewKey), sourcePath) {
return nil
}
key := util.Join(targetPath, string(sourceNewKey)[len(sourcePath):])
return filerSink.CreateEntry(key, message.NewEntry, message.Signatures)
}
// this is something special?
if message.OldEntry == nil && message.NewEntry == nil {
return nil
}
// handle updates
if strings.HasPrefix(string(sourceOldKey), sourcePath) {
// old key is in the watched directory
if strings.HasPrefix(string(sourceNewKey), sourcePath) {
// new key is also in the watched directory
oldKey := util.Join(targetPath, string(sourceOldKey)[len(sourcePath):])
message.NewParentPath = util.Join(targetPath, message.NewParentPath[len(sourcePath):])
foundExisting, err := filerSink.UpdateEntry(string(oldKey), message.OldEntry, message.NewParentPath, message.NewEntry, message.DeleteChunks, message.Signatures)
if foundExisting {
return err
}
// not able to find old entry
if err = filerSink.DeleteEntry(string(oldKey), message.OldEntry.IsDirectory, false, message.Signatures); err != nil {
return fmt.Errorf("delete old entry %v: %v", oldKey, err)
}
// create the new entry
newKey := util.Join(targetPath, string(sourceNewKey)[len(sourcePath):])
return filerSink.CreateEntry(newKey, message.NewEntry, message.Signatures)
} else {
// new key is outside of the watched directory
key := util.Join(targetPath, string(sourceOldKey)[len(sourcePath):])
return filerSink.DeleteEntry(key, message.OldEntry.IsDirectory, message.DeleteChunks, message.Signatures)
}
} else {
// old key is outside of the watched directory
if strings.HasPrefix(string(sourceNewKey), sourcePath) {
// new key is in the watched directory
key := util.Join(targetPath, string(sourceNewKey)[len(sourcePath):])
return filerSink.CreateEntry(key, message.NewEntry, message.Signatures)
} else {
// new key is also outside of the watched directory
// skip
}
}
return nil
}
return pb.WithFilerClient(sourceFiler, grpcDialOption, func(client filer_pb.SeaweedFilerClient) error {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
stream, err := client.SubscribeMetadata(ctx, &filer_pb.SubscribeMetadataRequest{
ClientName: "syncTo_" + targetFiler,
PathPrefix: sourcePath,
SinceNs: sourceFilerOffsetTsNs,
Signature: targetFilerSignature,
})
if err != nil {
return fmt.Errorf("listen: %v", err)
}
var counter int64
var lastWriteTime time.Time
for {
resp, listenErr := stream.Recv()
if listenErr == io.EOF {
return nil
}
if listenErr != nil {
return listenErr
}
if err := processEventFn(resp); err != nil {
return err
}
counter++
if lastWriteTime.Add(3 * time.Second).Before(time.Now()) {
glog.V(0).Infof("sync %s => %s progressed to %v %0.2f/sec", sourceFiler, targetFiler, time.Unix(0, resp.TsNs), float64(counter)/float64(3))
counter = 0
lastWriteTime = time.Now()
if err := writeSyncOffset(grpcDialOption, targetFiler, sourceFilerSignature, resp.TsNs); err != nil {
return err
}
}
}
})
}
const (
SyncKeyPrefix = "sync."
)
func readSyncOffset(grpcDialOption grpc.DialOption, filer string, filerSignature int32) (lastOffsetTsNs int64, readErr error) {
readErr = pb.WithFilerClient(filer, grpcDialOption, func(client filer_pb.SeaweedFilerClient) error {
syncKey := []byte(SyncKeyPrefix + "____")
util.Uint32toBytes(syncKey[len(SyncKeyPrefix):len(SyncKeyPrefix)+4], uint32(filerSignature))
resp, err := client.KvGet(context.Background(), &filer_pb.KvGetRequest{Key: syncKey})
if err != nil {
return err
}
if len(resp.Error) != 0 {
return errors.New(resp.Error)
}
if len(resp.Value) < 8 {
return nil
}
lastOffsetTsNs = int64(util.BytesToUint64(resp.Value))
return nil
})
return
}
func writeSyncOffset(grpcDialOption grpc.DialOption, filer string, filerSignature int32, offsetTsNs int64) error {
return pb.WithFilerClient(filer, grpcDialOption, func(client filer_pb.SeaweedFilerClient) error {
syncKey := []byte(SyncKeyPrefix + "____")
util.Uint32toBytes(syncKey[len(SyncKeyPrefix):len(SyncKeyPrefix)+4], uint32(filerSignature))
valueBuf := make([]byte, 8)
util.Uint64toBytes(valueBuf, uint64(offsetTsNs))
resp, err := client.KvPut(context.Background(), &filer_pb.KvPutRequest{
Key: syncKey,
Value: valueBuf,
})
if err != nil {
return err
}
if len(resp.Error) != 0 {
return errors.New(resp.Error)
}
return nil
})
}

2
weed/command/fix.go

@ -48,7 +48,7 @@ func (scanner *VolumeFileScanner4Fix) ReadNeedleBody() bool {
func (scanner *VolumeFileScanner4Fix) VisitNeedle(n *needle.Needle, offset int64, needleHeader, needleBody []byte) error {
glog.V(2).Infof("key %d offset %d size %d disk_size %d compressed %v", n.Id, offset, n.Size, n.DiskSize(scanner.version), n.IsCompressed())
if n.Size > 0 && n.Size != types.TombstoneFileSize {
if n.Size.IsValid() {
pe := scanner.nm.Set(n.Id, types.ToOffset(offset), n.Size)
glog.V(2).Infof("saved %d with error %v", n.Size, pe)
} else {

2
weed/command/master.go

@ -57,7 +57,7 @@ func init() {
m.garbageThreshold = cmdMaster.Flag.Float64("garbageThreshold", 0.3, "threshold to vacuum and reclaim spaces")
m.whiteList = cmdMaster.Flag.String("whiteList", "", "comma separated Ip addresses having write permission. No limit if empty.")
m.disableHttp = cmdMaster.Flag.Bool("disableHttp", false, "disable http requests, only gRPC operations are allowed.")
m.metricsAddress = cmdMaster.Flag.String("metrics.address", "", "Prometheus gateway address")
m.metricsAddress = cmdMaster.Flag.String("metrics.address", "", "Prometheus gateway address <host>:<port>")
m.metricsIntervalSec = cmdMaster.Flag.Int("metrics.intervalSeconds", 15, "Prometheus push interval in seconds")
}

4
weed/command/mount.go

@ -20,6 +20,8 @@ type MountOptions struct {
umaskString *string
nonempty *bool
outsideContainerClusterMode *bool
uidMap *string
gidMap *string
}
var (
@ -47,6 +49,8 @@ func init() {
mountCpuProfile = cmdMount.Flag.String("cpuprofile", "", "cpu profile output file")
mountMemProfile = cmdMount.Flag.String("memprofile", "", "memory profile output file")
mountOptions.outsideContainerClusterMode = cmdMount.Flag.Bool("outsideContainerClusterMode", false, "allows other users to access the file system")
mountOptions.uidMap = cmdMount.Flag.String("map.uid", "", "map local uid to uid on filer, comma-separated <local_uid>:<filer_uid>")
mountOptions.gidMap = cmdMount.Flag.String("map.gid", "", "map local gid to gid on filer, comma-separated <local_gid>:<filer_gid>")
}
var cmdMount = &Command{

34
weed/command/mount_std.go

@ -5,8 +5,8 @@ package command
import (
"context"
"fmt"
"github.com/chrislusf/seaweedfs/weed/filesys/meta_cache"
"os"
"os/user"
"path"
"runtime"
"strconv"
@ -86,33 +86,17 @@ func RunMount(option *MountOptions, umask os.FileMode) bool {
fuse.Unmount(dir)
uid, gid := uint32(0), uint32(0)
// detect mount folder mode
if *option.dirAutoCreate {
os.MkdirAll(dir, 0755)
os.MkdirAll(dir, os.FileMode(0777)&^umask)
}
mountMode := os.ModeDir | 0755
fileInfo, err := os.Stat(dir)
if err == nil {
mountMode = os.ModeDir | fileInfo.Mode()
uid, gid = util.GetFileUidGid(fileInfo)
fmt.Printf("mount point owner uid=%d gid=%d mode=%s\n", uid, gid, fileInfo.Mode())
} else {
fmt.Printf("can not stat %s\n", dir)
return false
}
if uid == 0 {
if u, err := user.Current(); err == nil {
if parsedId, pe := strconv.ParseUint(u.Uid, 10, 32); pe == nil {
uid = uint32(parsedId)
}
if parsedId, pe := strconv.ParseUint(u.Gid, 10, 32); pe == nil {
gid = uint32(parsedId)
}
fmt.Printf("current uid=%d gid=%d\n", uid, gid)
}
// mapping uid, gid
uidGidMapper, err := meta_cache.NewUidGidMapper(*option.uidMap, *option.gidMap)
if err != nil {
fmt.Printf("failed to parse %s %s: %v\n", *option.uidMap, *option.gidMap, err)
return false
}
// Ensure target mount point availability
@ -166,14 +150,12 @@ func RunMount(option *MountOptions, umask os.FileMode) bool {
CacheSizeMB: *option.cacheSizeMB,
DataCenter: *option.dataCenter,
EntryCacheTtl: 3 * time.Second,
MountUid: uid,
MountGid: gid,
MountMode: mountMode,
MountCtime: fileInfo.ModTime(),
MountMtime: time.Now(),
Umask: umask,
OutsideContainerClusterMode: *mountOptions.outsideContainerClusterMode,
Cipher: cipher,
UidGidMapper: uidGidMapper,
})
// mount

24
weed/command/s3.go

@ -14,6 +14,7 @@ import (
"github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/s3api"
stats_collect "github.com/chrislusf/seaweedfs/weed/stats"
"github.com/chrislusf/seaweedfs/weed/util"
)
@ -22,12 +23,13 @@ var (
)
type S3Options struct {
filer *string
port *int
config *string
domainName *string
tlsPrivateKey *string
tlsCertificate *string
filer *string
port *int
config *string
domainName *string
tlsPrivateKey *string
tlsCertificate *string
metricsHttpPort *int
}
func init() {
@ -38,6 +40,7 @@ func init() {
s3StandaloneOptions.config = cmdS3.Flag.String("config", "", "path to the config file")
s3StandaloneOptions.tlsPrivateKey = cmdS3.Flag.String("key.file", "", "path to the TLS private key file")
s3StandaloneOptions.tlsCertificate = cmdS3.Flag.String("cert.file", "", "path to the TLS certificate file")
s3StandaloneOptions.metricsHttpPort = cmdS3.Flag.Int("metricsPort", 0, "Prometheus metrics listen port")
}
var cmdS3 = &Command{
@ -112,6 +115,8 @@ func runS3(cmd *Command, args []string) bool {
util.LoadConfiguration("security", false)
go stats_collect.StartMetricsServer(*s3StandaloneOptions.metricsHttpPort)
return s3StandaloneOptions.startS3Server()
}
@ -128,6 +133,10 @@ func (s3opt *S3Options) startS3Server() bool {
grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.client")
// metrics read from the filer
var metricsAddress string
var metricsIntervalSec int
for {
err = pb.WithGrpcFilerClient(filerGrpcAddress, grpcDialOption, func(client filer_pb.SeaweedFilerClient) error {
resp, err := client.GetFilerConfiguration(context.Background(), &filer_pb.GetFilerConfigurationRequest{})
@ -135,6 +144,7 @@ func (s3opt *S3Options) startS3Server() bool {
return fmt.Errorf("get filer %s configuration: %v", filerGrpcAddress, err)
}
filerBucketsPath = resp.DirBuckets
metricsAddress, metricsIntervalSec = resp.MetricsAddress, int(resp.MetricsIntervalSec)
glog.V(0).Infof("S3 read filer buckets dir: %s", filerBucketsPath)
return nil
})
@ -147,6 +157,8 @@ func (s3opt *S3Options) startS3Server() bool {
}
}
go stats_collect.LoopPushingMetric("s3", stats_collect.SourceName(uint32(*s3opt.port)), metricsAddress, metricsIntervalSec)
router := mux.NewRouter().SkipClean(true)
_, s3ApiServer_err := s3api.NewS3ApiServer(router, &s3api.S3ApiServerOption{

18
weed/command/scaffold.go

@ -140,6 +140,8 @@ keyspace="seaweedfs"
hosts=[
"localhost:9042",
]
username=""
password=""
[redis2]
enabled = false
@ -173,6 +175,20 @@ enabled = false
uri = "mongodb://localhost:27017"
option_pool_size = 0
database = "seaweedfs"
[elastic7]
enabled = false
servers = [
"http://localhost1:9200",
"http://localhost2:9200",
"http://localhost3:9200",
]
username = ""
password = ""
sniff_enabled = false
healthcheck_enabled = false
# increase the value is recommend, be sure the value in Elastic is greater or equal here
index.max_result_window = 10000
`
NOTIFICATION_TOML_EXAMPLE = `
@ -377,7 +393,7 @@ default = "localhost:8888" # used by maintenance scripts if the scripts needs
[master.sequencer]
type = "memory" # Choose [memory|etcd] type for storing the file id sequence
type = "raft" # Choose [raft|etcd] type for storing the file id sequence
# when sequencer.type = etcd, set listen client urls of etcd cluster that store file id sequence
# example : http://127.0.0.1:2379,http://127.0.0.1:2389
sequencer_etcd_urls = "http://127.0.0.1:2379"

15
weed/command/server.go

@ -2,6 +2,7 @@ package command
import (
"fmt"
stats_collect "github.com/chrislusf/seaweedfs/weed/stats"
"os"
"runtime"
"runtime/pprof"
@ -56,6 +57,7 @@ var (
volumeDataFolders = cmdServer.Flag.String("dir", os.TempDir(), "directories to store data files. dir[,dir]...")
volumeMaxDataVolumeCounts = cmdServer.Flag.String("volume.max", "8", "maximum numbers of volumes, count[,count]... If set to zero, the limit will be auto configured.")
volumeMinFreeSpacePercent = cmdServer.Flag.String("volume.minFreeSpacePercent", "1", "minimum free disk space (default to 1%). Low disk space will mark all volumes as ReadOnly.")
serverMetricsHttpPort = cmdServer.Flag.Int("metricsPort", 0, "Prometheus metrics listen port")
// pulseSeconds = cmdServer.Flag.Int("pulseSeconds", 5, "number of seconds between heartbeats")
isStartingFiler = cmdServer.Flag.Bool("filer", false, "whether to start filer")
@ -83,7 +85,7 @@ func init() {
filerOptions.collection = cmdServer.Flag.String("filer.collection", "", "all data will be stored in this collection")
filerOptions.port = cmdServer.Flag.Int("filer.port", 8888, "filer server http listen port")
filerOptions.publicPort = cmdServer.Flag.Int("filer.port.public", 0, "filer server public http listen port")
filerOptions.defaultReplicaPlacement = cmdServer.Flag.String("filer.defaultReplicaPlacement", "", "Default replication type if not specified during runtime.")
filerOptions.defaultReplicaPlacement = cmdServer.Flag.String("filer.defaultReplicaPlacement", "", "default replication type. If not specified, use master setting.")
filerOptions.disableDirListing = cmdServer.Flag.Bool("filer.disableDirListing", false, "turn off directory listing")
filerOptions.maxMB = cmdServer.Flag.Int("filer.maxMB", 32, "split files larger than the limit")
filerOptions.dirListingLimit = cmdServer.Flag.Int("filer.dirListLimit", 1000, "limit sub dir listing size")
@ -96,9 +98,10 @@ func init() {
serverOptions.v.fixJpgOrientation = cmdServer.Flag.Bool("volume.images.fix.orientation", false, "Adjust jpg orientation when uploading.")
serverOptions.v.readRedirect = cmdServer.Flag.Bool("volume.read.redirect", true, "Redirect moved or non-local volumes.")
serverOptions.v.compactionMBPerSecond = cmdServer.Flag.Int("volume.compactionMBps", 0, "limit compaction speed in mega bytes per second")
serverOptions.v.fileSizeLimitMB = cmdServer.Flag.Int("volume.fileSizeLimitMB", 256, "limit file size to avoid out of memory")
serverOptions.v.fileSizeLimitMB = cmdServer.Flag.Int("volume.fileSizeLimitMB", 1024, "limit file size to avoid out of memory")
serverOptions.v.publicUrl = cmdServer.Flag.String("volume.publicUrl", "", "publicly accessible address")
serverOptions.v.pprof = &False
serverOptions.v.preStopSeconds = cmdServer.Flag.Int("volume.preStopSeconds", 10, "number of seconds between stop send heartbeats and stop volume server")
serverOptions.v.pprof = cmdServer.Flag.Bool("volume.pprof", false, "enable pprof http handlers. precludes --memprofile and --cpuprofile")
s3Options.port = cmdServer.Flag.Int("s3.port", 8333, "s3 server http listen port")
s3Options.domainName = cmdServer.Flag.String("s3.domainName", "", "suffix of the host name, {bucket}.{domainName}")
@ -135,6 +138,7 @@ func runServer(cmd *Command, args []string) bool {
peers := strings.Join(peerList, ",")
masterOptions.peers = &peers
// ip address
masterOptions.ip = serverIp
masterOptions.ipBind = serverBindIp
filerOptions.masters = &peers
@ -161,11 +165,8 @@ func runServer(cmd *Command, args []string) bool {
s3Options.filer = &filerAddress
msgBrokerOptions.filer = &filerAddress
if *filerOptions.defaultReplicaPlacement == "" {
*filerOptions.defaultReplicaPlacement = *masterOptions.defaultReplication
}
runtime.GOMAXPROCS(runtime.NumCPU())
go stats_collect.StartMetricsServer(*serverMetricsHttpPort)
folders := strings.Split(*volumeDataFolders, ",")

73
weed/command/volume.go

@ -25,6 +25,7 @@ import (
"github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb"
"github.com/chrislusf/seaweedfs/weed/server"
stats_collect "github.com/chrislusf/seaweedfs/weed/stats"
"github.com/chrislusf/seaweedfs/weed/storage"
"github.com/chrislusf/seaweedfs/weed/util"
)
@ -55,6 +56,8 @@ type VolumeServerOptions struct {
fileSizeLimitMB *int
minFreeSpacePercents []float32
pprof *bool
preStopSeconds *int
metricsHttpPort *int
// pulseSeconds *int
}
@ -66,6 +69,7 @@ func init() {
v.publicUrl = cmdVolume.Flag.String("publicUrl", "", "Publicly accessible address")
v.bindIp = cmdVolume.Flag.String("ip.bind", "0.0.0.0", "ip address to bind to")
v.masters = cmdVolume.Flag.String("mserver", "localhost:9333", "comma-separated master servers")
v.preStopSeconds = cmdVolume.Flag.Int("preStopSeconds", 10, "number of seconds between stop send heartbeats and stop volume server")
// v.pulseSeconds = cmdVolume.Flag.Int("pulseSeconds", 5, "number of seconds between heartbeats, must be smaller than or equal to the master's setting")
v.idleConnectionTimeout = cmdVolume.Flag.Int("idleTimeout", 30, "connection idle seconds")
v.dataCenter = cmdVolume.Flag.String("dataCenter", "", "current volume server's data center name")
@ -76,8 +80,9 @@ func init() {
v.cpuProfile = cmdVolume.Flag.String("cpuprofile", "", "cpu profile output file")
v.memProfile = cmdVolume.Flag.String("memprofile", "", "memory profile output file")
v.compactionMBPerSecond = cmdVolume.Flag.Int("compactionMBps", 0, "limit background compaction or copying speed in mega bytes per second")
v.fileSizeLimitMB = cmdVolume.Flag.Int("fileSizeLimitMB", 256, "limit file size to avoid out of memory")
v.fileSizeLimitMB = cmdVolume.Flag.Int("fileSizeLimitMB", 1024, "limit file size to avoid out of memory")
v.pprof = cmdVolume.Flag.Bool("pprof", false, "enable pprof http handlers. precludes --memprofile and --cpuprofile")
v.metricsHttpPort = cmdVolume.Flag.Int("metricsPort", 0, "Prometheus metrics listen port")
}
var cmdVolume = &Command{
@ -107,6 +112,8 @@ func runVolume(cmd *Command, args []string) bool {
grace.SetupProfiling(*v.cpuProfile, *v.memProfile)
}
go stats_collect.StartMetricsServer(*v.metricsHttpPort)
v.startVolumeServer(*volumeFolders, *maxVolumeCounts, *volumeWhiteListOption, *minFreeSpacePercent)
return true
@ -206,7 +213,6 @@ func (v VolumeServerOptions) startVolumeServer(volumeFolders, maxVolumeCounts, v
*v.compactionMBPerSecond,
*v.fileSizeLimitMB,
)
// starting grpc server
grpcS := v.startGrpcService(volumeServer)
@ -222,47 +228,48 @@ func (v VolumeServerOptions) startVolumeServer(volumeFolders, maxVolumeCounts, v
// starting the cluster http server
clusterHttpServer := v.startClusterHttpService(volumeMux)
stopChain := make(chan struct{})
stopChan := make(chan bool)
grace.OnInterrupt(func() {
fmt.Println("volume server has be killed")
var startTime time.Time
// firstly, stop the public http service to prevent from receiving new user request
if nil != publicHttpDown {
startTime = time.Now()
if err := publicHttpDown.Stop(); err != nil {
glog.Warningf("stop the public http server failed, %v", err)
}
delta := time.Now().Sub(startTime).Nanoseconds() / 1e6
glog.V(0).Infof("stop public http server, elapsed %dms", delta)
}
startTime = time.Now()
if err := clusterHttpServer.Stop(); err != nil {
glog.Warningf("stop the cluster http server failed, %v", err)
// Stop heartbeats
if !volumeServer.StopHeartbeat() {
glog.V(0).Infof("stop send heartbeat and wait %d seconds until shutdown ...", *v.preStopSeconds)
time.Sleep(time.Duration(*v.preStopSeconds) * time.Second)
}
delta := time.Now().Sub(startTime).Nanoseconds() / 1e6
glog.V(0).Infof("graceful stop cluster http server, elapsed [%d]", delta)
startTime = time.Now()
grpcS.GracefulStop()
delta = time.Now().Sub(startTime).Nanoseconds() / 1e6
glog.V(0).Infof("graceful stop gRPC, elapsed [%d]", delta)
shutdown(publicHttpDown, clusterHttpServer, grpcS, volumeServer)
stopChan <- true
})
startTime = time.Now()
volumeServer.Shutdown()
delta = time.Now().Sub(startTime).Nanoseconds() / 1e6
glog.V(0).Infof("stop volume server, elapsed [%d]", delta)
select {
case <-stopChan:
}
pprof.StopCPUProfile()
}
close(stopChain) // notify exit
})
func shutdown(publicHttpDown httpdown.Server, clusterHttpServer httpdown.Server, grpcS *grpc.Server, volumeServer *weed_server.VolumeServer) {
select {
case <-stopChain:
// firstly, stop the public http service to prevent from receiving new user request
if nil != publicHttpDown {
glog.V(0).Infof("stop public http server ... ")
if err := publicHttpDown.Stop(); err != nil {
glog.Warningf("stop the public http server failed, %v", err)
}
}
glog.V(0).Infof("graceful stop cluster http server ... ")
if err := clusterHttpServer.Stop(); err != nil {
glog.Warningf("stop the cluster http server failed, %v", err)
}
glog.Warningf("the volume server exit.")
glog.V(0).Infof("graceful stop gRPC ...")
grpcS.GracefulStop()
volumeServer.Shutdown()
pprof.StopCPUProfile()
}
// check whether configure the public port

60
weed/command/watch.go

@ -4,6 +4,8 @@ import (
"context"
"fmt"
"io"
"path/filepath"
"strings"
"time"
"github.com/chrislusf/seaweedfs/weed/pb"
@ -17,7 +19,7 @@ func init() {
}
var cmdWatch = &Command{
UsageLine: "watch <wip> [-filer=localhost:8888] [-target=/]",
UsageLine: "watch [-filer=localhost:8888] [-target=/]",
Short: "see recent changes on a filer",
Long: `See recent changes on a filer.
@ -25,18 +27,61 @@ var cmdWatch = &Command{
}
var (
watchFiler = cmdWatch.Flag.String("filer", "localhost:8888", "filer hostname:port")
watchTarget = cmdWatch.Flag.String("pathPrefix", "/", "path to a folder or file, or common prefix for the folders or files on filer")
watchStart = cmdWatch.Flag.Duration("timeAgo", 0, "start time before now. \"300ms\", \"1.5h\" or \"2h45m\". Valid time units are \"ns\", \"us\" (or \"µs\"), \"ms\", \"s\", \"m\", \"h\"")
watchFiler = cmdWatch.Flag.String("filer", "localhost:8888", "filer hostname:port")
watchTarget = cmdWatch.Flag.String("pathPrefix", "/", "path to a folder or file, or common prefix for the folders or files on filer")
watchStart = cmdWatch.Flag.Duration("timeAgo", 0, "start time before now. \"300ms\", \"1.5h\" or \"2h45m\". Valid time units are \"ns\", \"us\" (or \"µs\"), \"ms\", \"s\", \"m\", \"h\"")
watchPattern = cmdWatch.Flag.String("pattern", "", "full path or just filename pattern, ex: \"/home/?opher\", \"*.pdf\", see https://golang.org/pkg/path/filepath/#Match ")
)
func runWatch(cmd *Command, args []string) bool {
grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.client")
var filterFunc func(dir, fname string) bool
if *watchPattern != "" {
if strings.Contains(*watchPattern, "/") {
println("watch path pattern", *watchPattern)
filterFunc = func(dir, fname string) bool {
matched, err := filepath.Match(*watchPattern, dir+"/"+fname)
if err != nil {
fmt.Printf("error: %v", err)
}
return matched
}
} else {
println("watch file pattern", *watchPattern)
filterFunc = func(dir, fname string) bool {
matched, err := filepath.Match(*watchPattern, fname)
if err != nil {
fmt.Printf("error: %v", err)
}
return matched
}
}
}
shouldPrint := func(resp *filer_pb.SubscribeMetadataResponse) bool {
if filterFunc == nil {
return true
}
if resp.EventNotification.OldEntry == nil && resp.EventNotification.NewEntry == nil {
return false
}
if resp.EventNotification.OldEntry != nil && filterFunc(resp.Directory, resp.EventNotification.OldEntry.Name) {
return true
}
if resp.EventNotification.NewEntry != nil && filterFunc(resp.EventNotification.NewParentPath, resp.EventNotification.NewEntry.Name) {
return true
}
return false
}
watchErr := pb.WithFilerClient(*watchFiler, grpcDialOption, func(client filer_pb.SeaweedFilerClient) error {
stream, err := client.SubscribeMetadata(context.Background(), &filer_pb.SubscribeMetadataRequest{
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
stream, err := client.SubscribeMetadata(ctx, &filer_pb.SubscribeMetadataRequest{
ClientName: "watch",
PathPrefix: *watchTarget,
SinceNs: time.Now().Add(-*watchStart).UnixNano(),
@ -53,7 +98,10 @@ func runWatch(cmd *Command, args []string) bool {
if listenErr != nil {
return listenErr
}
fmt.Printf("events: %+v\n", resp.EventNotification)
if !shouldPrint(resp) {
continue
}
fmt.Printf("dir:%s %+v\n", resp.Directory, resp.EventNotification)
}
})

54
weed/filer2/abstract_sql/abstract_sql_store.go → weed/filer/abstract_sql/abstract_sql_store.go

@ -4,11 +4,11 @@ import (
"context"
"database/sql"
"fmt"
"github.com/chrislusf/seaweedfs/weed/filer2"
"github.com/chrislusf/seaweedfs/weed/filer"
"github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
"github.com/chrislusf/seaweedfs/weed/util"
"strings"
)
type AbstractSqlStore struct {
@ -59,7 +59,7 @@ func (store *AbstractSqlStore) getTxOrDB(ctx context.Context) TxOrDB {
return store.DB
}
func (store *AbstractSqlStore) InsertEntry(ctx context.Context, entry *filer2.Entry) (err error) {
func (store *AbstractSqlStore) InsertEntry(ctx context.Context, entry *filer.Entry) (err error) {
dir, name := entry.FullPath.DirAndName()
meta, err := entry.EncodeAttributesAndChunks()
@ -67,19 +67,36 @@ func (store *AbstractSqlStore) InsertEntry(ctx context.Context, entry *filer2.En
return fmt.Errorf("encode %s: %s", entry.FullPath, err)
}
if len(entry.Chunks) > 50 {
meta = util.MaybeGzipData(meta)
}
res, err := store.getTxOrDB(ctx).ExecContext(ctx, store.SqlInsert, util.HashStringToLong(dir), name, dir, meta)
if err == nil {
return
}
if !strings.Contains(strings.ToLower(err.Error()), "duplicate") {
return fmt.Errorf("kv insert: %s", err)
}
// now the insert failed possibly due to duplication constraints
glog.V(1).Infof("insert %s falls back to update: %v", entry.FullPath, err)
res, err = store.getTxOrDB(ctx).ExecContext(ctx, store.SqlUpdate, meta, util.HashStringToLong(dir), name, dir)
if err != nil {
return fmt.Errorf("insert %s: %s", entry.FullPath, err)
return fmt.Errorf("upsert %s: %s", entry.FullPath, err)
}
_, err = res.RowsAffected()
if err != nil {
return fmt.Errorf("insert %s but no rows affected: %s", entry.FullPath, err)
return fmt.Errorf("upsert %s but no rows affected: %s", entry.FullPath, err)
}
return nil
}
func (store *AbstractSqlStore) UpdateEntry(ctx context.Context, entry *filer2.Entry) (err error) {
func (store *AbstractSqlStore) UpdateEntry(ctx context.Context, entry *filer.Entry) (err error) {
dir, name := entry.FullPath.DirAndName()
meta, err := entry.EncodeAttributesAndChunks()
@ -99,19 +116,23 @@ func (store *AbstractSqlStore) UpdateEntry(ctx context.Context, entry *filer2.En
return nil
}
func (store *AbstractSqlStore) FindEntry(ctx context.Context, fullpath util.FullPath) (*filer2.Entry, error) {
func (store *AbstractSqlStore) FindEntry(ctx context.Context, fullpath util.FullPath) (*filer.Entry, error) {
dir, name := fullpath.DirAndName()
row := store.getTxOrDB(ctx).QueryRowContext(ctx, store.SqlFind, util.HashStringToLong(dir), name, dir)
var data []byte
if err := row.Scan(&data); err != nil {
return nil, filer_pb.ErrNotFound
if err == sql.ErrNoRows {
return nil, filer_pb.ErrNotFound
}
return nil, fmt.Errorf("find %s: %v", fullpath, err)
}
entry := &filer2.Entry{
entry := &filer.Entry{
FullPath: fullpath,
}
if err := entry.DecodeAttributesAndChunks(data); err != nil {
if err := entry.DecodeAttributesAndChunks(util.MaybeDecompressData(data)); err != nil {
return entry, fmt.Errorf("decode %s : %v", entry.FullPath, err)
}
@ -150,14 +171,13 @@ func (store *AbstractSqlStore) DeleteFolderChildren(ctx context.Context, fullpat
return nil
}
func (store *AbstractSqlStore) ListDirectoryEntries(ctx context.Context, fullpath util.FullPath, startFileName string, inclusive bool, limit int) (entries []*filer2.Entry, err error) {
func (store *AbstractSqlStore) ListDirectoryPrefixedEntries(ctx context.Context, fullpath util.FullPath, startFileName string, inclusive bool, limit int, prefix string) (entries []*filer.Entry, err error) {
sqlText := store.SqlListExclusive
if inclusive {
sqlText = store.SqlListInclusive
}
rows, err := store.getTxOrDB(ctx).QueryContext(ctx, sqlText, util.HashStringToLong(string(fullpath)), startFileName, string(fullpath), limit)
rows, err := store.getTxOrDB(ctx).QueryContext(ctx, sqlText, util.HashStringToLong(string(fullpath)), startFileName, string(fullpath), prefix+"%", limit)
if err != nil {
return nil, fmt.Errorf("list %s : %v", fullpath, err)
}
@ -171,10 +191,10 @@ func (store *AbstractSqlStore) ListDirectoryEntries(ctx context.Context, fullpat
return nil, fmt.Errorf("scan %s: %v", fullpath, err)
}
entry := &filer2.Entry{
entry := &filer.Entry{
FullPath: util.NewFullPath(string(fullpath), name),
}
if err = entry.DecodeAttributesAndChunks(data); err != nil {
if err = entry.DecodeAttributesAndChunks(util.MaybeDecompressData(data)); err != nil {
glog.V(0).Infof("scan decode %s : %v", entry.FullPath, err)
return nil, fmt.Errorf("scan decode %s : %v", entry.FullPath, err)
}
@ -185,6 +205,10 @@ func (store *AbstractSqlStore) ListDirectoryEntries(ctx context.Context, fullpat
return entries, nil
}
func (store *AbstractSqlStore) ListDirectoryEntries(ctx context.Context, fullpath util.FullPath, startFileName string, inclusive bool, limit int) (entries []*filer.Entry, err error) {
return store.ListDirectoryPrefixedEntries(ctx, fullpath, startFileName, inclusive, limit, "")
}
func (store *AbstractSqlStore) Shutdown() {
store.DB.Close()
}

87
weed/filer/abstract_sql/abstract_sql_store_kv.go

@ -0,0 +1,87 @@
package abstract_sql
import (
"context"
"database/sql"
"fmt"
"strings"
"github.com/chrislusf/seaweedfs/weed/filer"
"github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/util"
)
func (store *AbstractSqlStore) KvPut(ctx context.Context, key []byte, value []byte) (err error) {
dirStr, dirHash, name := genDirAndName(key)
res, err := store.getTxOrDB(ctx).ExecContext(ctx, store.SqlInsert, dirHash, name, dirStr, value)
if err != nil {
if !strings.Contains(strings.ToLower(err.Error()), "duplicate") {
return fmt.Errorf("kv insert: %s", err)
}
}
// now the insert failed possibly due to duplication constraints
glog.V(1).Infof("kv insert falls back to update: %s", err)
res, err = store.getTxOrDB(ctx).ExecContext(ctx, store.SqlUpdate, value, dirHash, name, dirStr)
if err != nil {
return fmt.Errorf("kv upsert: %s", err)
}
_, err = res.RowsAffected()
if err != nil {
return fmt.Errorf("kv upsert no rows affected: %s", err)
}
return nil
}
func (store *AbstractSqlStore) KvGet(ctx context.Context, key []byte) (value []byte, err error) {
dirStr, dirHash, name := genDirAndName(key)
row := store.getTxOrDB(ctx).QueryRowContext(ctx, store.SqlFind, dirHash, name, dirStr)
err = row.Scan(&value)
if err == sql.ErrNoRows {
return nil, filer.ErrKvNotFound
}
if err != nil {
return nil, fmt.Errorf("kv get: %v", err)
}
return
}
func (store *AbstractSqlStore) KvDelete(ctx context.Context, key []byte) (err error) {
dirStr, dirHash, name := genDirAndName(key)
res, err := store.getTxOrDB(ctx).ExecContext(ctx, store.SqlDelete, dirHash, name, dirStr)
if err != nil {
return fmt.Errorf("kv delete: %s", err)
}
_, err = res.RowsAffected()
if err != nil {
return fmt.Errorf("kv delete no rows affected: %s", err)
}
return nil
}
func genDirAndName(key []byte) (dirStr string, dirHash int64, name string) {
for len(key) < 8 {
key = append(key, 0)
}
dirHash = int64(util.BytesToUint64(key[:8]))
dirStr = string(key[:8])
name = string(key[8:])
return
}

0
weed/filer2/cassandra/README.txt → weed/filer/cassandra/README.txt

36
weed/filer2/cassandra/cassandra_store.go → weed/filer/cassandra/cassandra_store.go

@ -3,17 +3,16 @@ package cassandra
import (
"context"
"fmt"
"github.com/gocql/gocql"
"github.com/chrislusf/seaweedfs/weed/filer2"
"github.com/chrislusf/seaweedfs/weed/filer"
"github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
"github.com/chrislusf/seaweedfs/weed/util"
)
func init() {
filer2.Stores = append(filer2.Stores, &CassandraStore{})
filer.Stores = append(filer.Stores, &CassandraStore{})
}
type CassandraStore struct {
@ -29,11 +28,16 @@ func (store *CassandraStore) Initialize(configuration util.Configuration, prefix
return store.initialize(
configuration.GetString(prefix+"keyspace"),
configuration.GetStringSlice(prefix+"hosts"),
configuration.GetString(prefix+"username"),
configuration.GetString(prefix+"password"),
)
}
func (store *CassandraStore) initialize(keyspace string, hosts []string) (err error) {
func (store *CassandraStore) initialize(keyspace string, hosts []string, username string, password string) (err error) {
store.cluster = gocql.NewCluster(hosts...)
if username != "" && password != "" {
store.cluster.Authenticator = gocql.PasswordAuthenticator{Username: username, Password: password}
}
store.cluster.Keyspace = keyspace
store.cluster.Consistency = gocql.LocalQuorum
store.session, err = store.cluster.CreateSession()
@ -53,7 +57,7 @@ func (store *CassandraStore) RollbackTransaction(ctx context.Context) error {
return nil
}
func (store *CassandraStore) InsertEntry(ctx context.Context, entry *filer2.Entry) (err error) {
func (store *CassandraStore) InsertEntry(ctx context.Context, entry *filer.Entry) (err error) {
dir, name := entry.FullPath.DirAndName()
meta, err := entry.EncodeAttributesAndChunks()
@ -61,6 +65,10 @@ func (store *CassandraStore) InsertEntry(ctx context.Context, entry *filer2.Entr
return fmt.Errorf("encode %s: %s", entry.FullPath, err)
}
if len(entry.Chunks) > 50 {
meta = util.MaybeGzipData(meta)
}
if err := store.session.Query(
"INSERT INTO filemeta (directory,name,meta) VALUES(?,?,?) USING TTL ? ",
dir, name, meta, entry.TtlSec).Exec(); err != nil {
@ -70,12 +78,12 @@ func (store *CassandraStore) InsertEntry(ctx context.Context, entry *filer2.Entr
return nil
}
func (store *CassandraStore) UpdateEntry(ctx context.Context, entry *filer2.Entry) (err error) {
func (store *CassandraStore) UpdateEntry(ctx context.Context, entry *filer.Entry) (err error) {
return store.InsertEntry(ctx, entry)
}
func (store *CassandraStore) FindEntry(ctx context.Context, fullpath util.FullPath) (entry *filer2.Entry, err error) {
func (store *CassandraStore) FindEntry(ctx context.Context, fullpath util.FullPath) (entry *filer.Entry, err error) {
dir, name := fullpath.DirAndName()
var data []byte
@ -91,10 +99,10 @@ func (store *CassandraStore) FindEntry(ctx context.Context, fullpath util.FullPa
return nil, filer_pb.ErrNotFound
}
entry = &filer2.Entry{
entry = &filer.Entry{
FullPath: fullpath,
}
err = entry.DecodeAttributesAndChunks(data)
err = entry.DecodeAttributesAndChunks(util.MaybeDecompressData(data))
if err != nil {
return entry, fmt.Errorf("decode %s : %v", entry.FullPath, err)
}
@ -126,8 +134,12 @@ func (store *CassandraStore) DeleteFolderChildren(ctx context.Context, fullpath
return nil
}
func (store *CassandraStore) ListDirectoryPrefixedEntries(ctx context.Context, fullpath util.FullPath, startFileName string, inclusive bool, limit int, prefix string) (entries []*filer.Entry, err error) {
return nil, filer.ErrUnsupportedListDirectoryPrefixed
}
func (store *CassandraStore) ListDirectoryEntries(ctx context.Context, fullpath util.FullPath, startFileName string, inclusive bool,
limit int) (entries []*filer2.Entry, err error) {
limit int) (entries []*filer.Entry, err error) {
cqlStr := "SELECT NAME, meta FROM filemeta WHERE directory=? AND name>? ORDER BY NAME ASC LIMIT ?"
if inclusive {
@ -138,10 +150,10 @@ func (store *CassandraStore) ListDirectoryEntries(ctx context.Context, fullpath
var name string
iter := store.session.Query(cqlStr, string(fullpath), startFileName, limit).Iter()
for iter.Scan(&name, &data) {
entry := &filer2.Entry{
entry := &filer.Entry{
FullPath: util.NewFullPath(string(fullpath), name),
}
if decodeErr := entry.DecodeAttributesAndChunks(data); decodeErr != nil {
if decodeErr := entry.DecodeAttributesAndChunks(util.MaybeDecompressData(data)); decodeErr != nil {
err = decodeErr
glog.V(0).Infof("list %s : %v", entry.FullPath, err)
break

61
weed/filer/cassandra/cassandra_store_kv.go

@ -0,0 +1,61 @@
package cassandra
import (
"context"
"fmt"
"github.com/chrislusf/seaweedfs/weed/filer"
"github.com/gocql/gocql"
)
func (store *CassandraStore) KvPut(ctx context.Context, key []byte, value []byte) (err error) {
dir, name := genDirAndName(key)
if err := store.session.Query(
"INSERT INTO filemeta (directory,name,meta) VALUES(?,?,?) USING TTL ? ",
dir, name, value, 0).Exec(); err != nil {
return fmt.Errorf("kv insert: %s", err)
}
return nil
}
func (store *CassandraStore) KvGet(ctx context.Context, key []byte) (data []byte, err error) {
dir, name := genDirAndName(key)
if err := store.session.Query(
"SELECT meta FROM filemeta WHERE directory=? AND name=?",
dir, name).Consistency(gocql.One).Scan(&data); err != nil {
if err != gocql.ErrNotFound {
return nil, filer.ErrKvNotFound
}
}
if len(data) == 0 {
return nil, filer.ErrKvNotFound
}
return data, nil
}
func (store *CassandraStore) KvDelete(ctx context.Context, key []byte) (err error) {
dir, name := genDirAndName(key)
if err := store.session.Query(
"DELETE FROM filemeta WHERE directory=? AND name=?",
dir, name).Exec(); err != nil {
return fmt.Errorf("kv delete: %v", err)
}
return nil
}
func genDirAndName(key []byte) (dir string, name string) {
for len(key) < 8 {
key = append(key, 0)
}
dir = string(key[:8])
name = string(key[8:])
return
}

2
weed/filer2/configuration.go → weed/filer/configuration.go

@ -1,4 +1,4 @@
package filer2
package filer
import (
"os"

338
weed/filer/elastic/v7/elastic_store.go

@ -0,0 +1,338 @@
package elastic
import (
"context"
"fmt"
"math"
"strings"
"github.com/chrislusf/seaweedfs/weed/filer"
"github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
weed_util "github.com/chrislusf/seaweedfs/weed/util"
jsoniter "github.com/json-iterator/go"
elastic "github.com/olivere/elastic/v7"
)
var (
indexType = "_doc"
indexPrefix = ".seaweedfs_"
indexKV = ".seaweedfs_kv_entries"
kvMappings = ` {
"mappings": {
"enabled": false,
"properties": {
"Value":{
"type": "binary"
}
}
}
}`
)
type ESEntry struct {
ParentId string `json:"ParentId"`
Entry *filer.Entry
}
type ESKVEntry struct {
Value []byte `json:"Value"`
}
func init() {
filer.Stores = append(filer.Stores, &ElasticStore{})
}
type ElasticStore struct {
client *elastic.Client
maxPageSize int
}
func (store *ElasticStore) GetName() string {
return "elastic7"
}
func (store *ElasticStore) Initialize(configuration weed_util.Configuration, prefix string) (err error) {
options := []elastic.ClientOptionFunc{}
servers := configuration.GetStringSlice(prefix + "servers")
options = append(options, elastic.SetURL(servers...))
username := configuration.GetString(prefix + "username")
password := configuration.GetString(prefix + "password")
if username != "" && password != "" {
options = append(options, elastic.SetBasicAuth(username, password))
}
options = append(options, elastic.SetSniff(configuration.GetBool(prefix+"sniff_enabled")))
options = append(options, elastic.SetHealthcheck(configuration.GetBool(prefix+"healthcheck_enabled")))
store.maxPageSize = configuration.GetInt(prefix + "index.max_result_window")
if store.maxPageSize <= 0 {
store.maxPageSize = 10000
}
glog.Infof("filer store elastic endpoints: %v.", servers)
return store.initialize(options)
}
func (store *ElasticStore) initialize(options []elastic.ClientOptionFunc) (err error) {
ctx := context.Background()
store.client, err = elastic.NewClient(options...)
if err != nil {
return fmt.Errorf("init elastic %v.", err)
}
if ok, err := store.client.IndexExists(indexKV).Do(ctx); err == nil && !ok {
_, err = store.client.CreateIndex(indexKV).Body(kvMappings).Do(ctx)
if err != nil {
return fmt.Errorf("create index(%s) %v.", indexKV, err)
}
}
return nil
}
func (store *ElasticStore) BeginTransaction(ctx context.Context) (context.Context, error) {
return ctx, nil
}
func (store *ElasticStore) CommitTransaction(ctx context.Context) error {
return nil
}
func (store *ElasticStore) RollbackTransaction(ctx context.Context) error {
return nil
}
func (store *ElasticStore) ListDirectoryPrefixedEntries(ctx context.Context, fullpath weed_util.FullPath, startFileName string, inclusive bool, limit int, prefix string) (entries []*filer.Entry, err error) {
return nil, filer.ErrUnsupportedListDirectoryPrefixed
}
func (store *ElasticStore) InsertEntry(ctx context.Context, entry *filer.Entry) (err error) {
index := getIndex(entry.FullPath)
dir, _ := entry.FullPath.DirAndName()
id := weed_util.Md5String([]byte(entry.FullPath))
esEntry := &ESEntry{
ParentId: weed_util.Md5String([]byte(dir)),
Entry: entry,
}
value, err := jsoniter.Marshal(esEntry)
if err != nil {
glog.Errorf("insert entry(%s) %v.", string(entry.FullPath), err)
return fmt.Errorf("insert entry %v.", err)
}
_, err = store.client.Index().
Index(index).
Type(indexType).
Id(id).
BodyJson(string(value)).
Do(ctx)
if err != nil {
glog.Errorf("insert entry(%s) %v.", string(entry.FullPath), err)
return fmt.Errorf("insert entry %v.", err)
}
return nil
}
func (store *ElasticStore) UpdateEntry(ctx context.Context, entry *filer.Entry) (err error) {
return store.InsertEntry(ctx, entry)
}
func (store *ElasticStore) FindEntry(ctx context.Context, fullpath weed_util.FullPath) (entry *filer.Entry, err error) {
index := getIndex(fullpath)
id := weed_util.Md5String([]byte(fullpath))
searchResult, err := store.client.Get().
Index(index).
Type(indexType).
Id(id).
Do(ctx)
if elastic.IsNotFound(err) {
return nil, filer_pb.ErrNotFound
}
if searchResult != nil && searchResult.Found {
esEntry := &ESEntry{
ParentId: "",
Entry: &filer.Entry{},
}
err := jsoniter.Unmarshal(searchResult.Source, esEntry)
return esEntry.Entry, err
}
glog.Errorf("find entry(%s),%v.", string(fullpath), err)
return nil, filer_pb.ErrNotFound
}
func (store *ElasticStore) DeleteEntry(ctx context.Context, fullpath weed_util.FullPath) (err error) {
index := getIndex(fullpath)
id := weed_util.Md5String([]byte(fullpath))
if strings.Count(string(fullpath), "/") == 1 {
return store.deleteIndex(ctx, index)
}
return store.deleteEntry(ctx, index, id)
}
func (store *ElasticStore) deleteIndex(ctx context.Context, index string) (err error) {
deleteResult, err := store.client.DeleteIndex(index).Do(ctx)
if elastic.IsNotFound(err) || (err == nil && deleteResult.Acknowledged) {
return nil
}
glog.Errorf("delete index(%s) %v.", index, err)
return err
}
func (store *ElasticStore) deleteEntry(ctx context.Context, index, id string) (err error) {
deleteResult, err := store.client.Delete().
Index(index).
Type(indexType).
Id(id).
Do(ctx)
if err == nil {
if deleteResult.Result == "deleted" || deleteResult.Result == "not_found" {
return nil
}
}
glog.Errorf("delete entry(index:%s,_id:%s) %v.", index, id, err)
return fmt.Errorf("delete entry %v.", err)
}
func (store *ElasticStore) DeleteFolderChildren(ctx context.Context, fullpath weed_util.FullPath) (err error) {
if entries, err := store.ListDirectoryEntries(ctx, fullpath, "", false, math.MaxInt32); err == nil {
for _, entry := range entries {
store.DeleteEntry(ctx, entry.FullPath)
}
}
return nil
}
func (store *ElasticStore) ListDirectoryEntries(
ctx context.Context, fullpath weed_util.FullPath, startFileName string, inclusive bool, limit int,
) (entries []*filer.Entry, err error) {
if string(fullpath) == "/" {
return store.listRootDirectoryEntries(ctx, startFileName, inclusive, limit)
}
return store.listDirectoryEntries(ctx, fullpath, startFileName, inclusive, limit)
}
func (store *ElasticStore) listRootDirectoryEntries(ctx context.Context, startFileName string, inclusive bool, limit int) (entries []*filer.Entry, err error) {
indexResult, err := store.client.CatIndices().Do(ctx)
if err != nil {
glog.Errorf("list indices %v.", err)
return entries, err
}
for _, index := range indexResult {
if index.Index == indexKV {
continue
}
if strings.HasPrefix(index.Index, indexPrefix) {
if entry, err := store.FindEntry(ctx,
weed_util.FullPath("/"+strings.Replace(index.Index, indexPrefix, "", 1))); err == nil {
fileName := getFileName(entry.FullPath)
if fileName == startFileName && !inclusive {
continue
}
limit--
if limit < 0 {
break
}
entries = append(entries, entry)
}
}
}
return entries, nil
}
func (store *ElasticStore) listDirectoryEntries(
ctx context.Context, fullpath weed_util.FullPath, startFileName string, inclusive bool, limit int,
) (entries []*filer.Entry, err error) {
first := true
index := getIndex(fullpath)
nextStart := ""
parentId := weed_util.Md5String([]byte(fullpath))
if _, err := store.client.Refresh(index).Do(ctx); err != nil {
if elastic.IsNotFound(err) {
store.client.CreateIndex(index).Do(ctx)
return entries, nil
}
}
for {
result := &elastic.SearchResult{}
if (startFileName == "" && first) || inclusive {
if result, err = store.search(ctx, index, parentId); err != nil {
glog.Errorf("search (%s,%s,%t,%d) %v.", string(fullpath), startFileName, inclusive, limit, err)
return entries, err
}
} else {
fullPath := string(fullpath) + "/" + startFileName
if !first {
fullPath = nextStart
}
after := weed_util.Md5String([]byte(fullPath))
if result, err = store.searchAfter(ctx, index, parentId, after); err != nil {
glog.Errorf("searchAfter (%s,%s,%t,%d) %v.", string(fullpath), startFileName, inclusive, limit, err)
return entries, err
}
}
first = false
for _, hit := range result.Hits.Hits {
esEntry := &ESEntry{
ParentId: "",
Entry: &filer.Entry{},
}
if err := jsoniter.Unmarshal(hit.Source, esEntry); err == nil {
limit--
if limit < 0 {
return entries, nil
}
nextStart = string(esEntry.Entry.FullPath)
fileName := getFileName(esEntry.Entry.FullPath)
if fileName == startFileName && !inclusive {
continue
}
entries = append(entries, esEntry.Entry)
}
}
if len(result.Hits.Hits) < store.maxPageSize {
break
}
}
return entries, nil
}
func (store *ElasticStore) search(ctx context.Context, index, parentId string) (result *elastic.SearchResult, err error) {
if count, err := store.client.Count(index).Do(ctx); err == nil && count == 0 {
return &elastic.SearchResult{
Hits: &elastic.SearchHits{
Hits: make([]*elastic.SearchHit, 0)},
}, nil
}
queryResult, err := store.client.Search().
Index(index).
Query(elastic.NewMatchQuery("ParentId", parentId)).
Size(store.maxPageSize).
Sort("_id", false).
Do(ctx)
return queryResult, err
}
func (store *ElasticStore) searchAfter(ctx context.Context, index, parentId, after string) (result *elastic.SearchResult, err error) {
queryResult, err := store.client.Search().
Index(index).
Query(elastic.NewMatchQuery("ParentId", parentId)).
SearchAfter(after).
Size(store.maxPageSize).
Sort("_id", false).
Do(ctx)
return queryResult, err
}
func (store *ElasticStore) Shutdown() {
store.client.Stop()
}
func getIndex(fullpath weed_util.FullPath) string {
path := strings.Split(string(fullpath), "/")
if len(path) > 1 {
return indexPrefix + path[1]
}
return ""
}
func getFileName(fullpath weed_util.FullPath) string {
path := strings.Split(string(fullpath), "/")
if len(path) > 1 {
return path[len(path)-1]
}
return ""
}

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save