yourchanges
10 years ago
116 changed files with 1836 additions and 674 deletions
-
2.gitignore
-
5.project
-
1.travis.yml
-
19Dockerfile
-
6Dockerfile.go_build
-
42README.md
-
5docs/api.rst
-
22docs/benchmarks.rst
-
275docs/changelist.rst
-
2docs/clients.rst
-
12docs/conf.py
-
22docs/directories.rst
-
118docs/distributed_filer.rst
-
2docs/failover.rst
-
32docs/gettingstarted.rst
-
27docs/index.rst
-
85docs/ttl.rst
-
87go/filer/cassandra_store/cassandra_store.go
-
22go/filer/cassandra_store/schema.cql
-
6go/filer/client_operations.go
-
20go/filer/directory.go
-
0go/filer/embedded_filer/design.txt
-
15go/filer/embedded_filer/directory.go
-
22go/filer/embedded_filer/directory_in_map.go
-
2go/filer/embedded_filer/directory_test.go
-
12go/filer/embedded_filer/filer_embedded.go
-
16go/filer/embedded_filer/files_in_leveldb.go
-
17go/filer/filer.go
-
50go/filer/flat_namespace/flat_namespace_filer.go
-
9go/filer/flat_namespace/flat_namespace_store.go
-
48go/filer/redis_store/redis_store.go
-
2go/glog/convenient_api.go
-
3go/images/orientation.go
-
3go/images/resizing.go
-
5go/operation/assign_file_id.go
-
2go/operation/data_struts.go
-
3go/operation/delete_content.go
-
3go/operation/list_masters.go
-
9go/operation/lookup.go
-
3go/operation/submit.go
-
2go/operation/system_message.pb.go
-
3go/operation/system_message_test.go
-
3go/operation/upload_content.go
-
146go/security/guard.go
-
2go/sequence/sequence.go
-
2go/stats/disk.go
-
2go/stats/disk_notsupported.go
-
2go/stats/memory_notsupported.go
-
5go/storage/cdb_map.go
-
3go/storage/cdb_map_test.go
-
2go/storage/compact_map.go
-
5go/storage/compact_map_perf_test.go
-
3go/storage/compress.go
-
3go/storage/crc.go
-
5go/storage/file_id.go
-
14go/storage/needle.go
-
5go/storage/needle_map.go
-
5go/storage/needle_read_write.go
-
21go/storage/store.go
-
3go/storage/store_vacuum.go
-
34go/storage/volume.go
-
5go/storage/volume_info.go
-
3go/storage/volume_super_block.go
-
3go/storage/volume_vacuum.go
-
2go/storage/volume_version.go
-
3go/tools/read_index.go
-
5go/topology/allocate_volume.go
-
27go/topology/collection.go
-
2go/topology/data_center.go
-
8go/topology/data_node.go
-
5go/topology/node.go
-
5go/topology/store_replicate.go
-
38go/topology/topology.go
-
5go/topology/topology_event_handling.go
-
9go/topology/topology_map.go
-
21go/topology/topology_vacuum.go
-
9go/topology/volume_growth.go
-
5go/topology/volume_growth_test.go
-
10go/topology/volume_layout.go
-
8go/topology/volume_location_list.go
-
39go/util/concurrent_read_map.go
-
3go/util/config.go
-
4go/util/constants.go
-
3go/util/file_util.go
-
3go/util/net_timeout.go
-
159go/weed/benchmark.go
-
2go/weed/compact.go
-
5go/weed/download.go
-
7go/weed/export.go
-
26go/weed/filer.go
-
7go/weed/fix.go
-
16go/weed/master.go
-
2go/weed/mount.go
-
7go/weed/mount_std.go
-
35go/weed/server.go
-
3go/weed/shell.go
-
2go/weed/signal_handling_notsupported.go
-
3go/weed/upload.go
-
3go/weed/version.go
-
16go/weed/volume.go
@ -1 +1,3 @@ |
|||||
weed |
weed |
||||
|
tags |
||||
|
*.swp |
@ -1,6 +1,21 @@ |
|||||
FROM cydev/go |
|
||||
RUN go get code.google.com/p/weed-fs/go/weed |
|
||||
|
FROM progrium/busybox |
||||
|
|
||||
|
WORKDIR /opt/weed |
||||
|
|
||||
|
RUN opkg-install curl |
||||
|
RUN echo insecure >> ~/.curlrc |
||||
|
|
||||
|
RUN \ |
||||
|
curl -Lks https://bintray.com$(curl -Lk http://bintray.com/chrislusf/Weed-FS/seaweed/_latestVersion | grep linux_amd64.tar.gz | sed -n "/href/ s/.*href=['\"]\([^'\"]*\)['\"].*/\1/gp") | gunzip | tar -xf - -C /opt/weed/ && \ |
||||
|
mv weed_* bin && \ |
||||
|
chmod +x ./bin/weed |
||||
|
|
||||
EXPOSE 8080 |
EXPOSE 8080 |
||||
EXPOSE 9333 |
EXPOSE 9333 |
||||
|
|
||||
VOLUME /data |
VOLUME /data |
||||
|
|
||||
|
ENV WEED_HOME /opt/weed |
||||
|
ENV PATH ${PATH}:${WEED_HOME}/bin |
||||
|
|
||||
ENTRYPOINT ["weed"] |
ENTRYPOINT ["weed"] |
@ -0,0 +1,6 @@ |
|||||
|
FROM cydev/go |
||||
|
RUN go get github.com/chrislusf/weed-fs/go/weed |
||||
|
EXPOSE 8080 |
||||
|
EXPOSE 9333 |
||||
|
VOLUME /data |
||||
|
ENTRYPOINT ["weed"] |
@ -0,0 +1,275 @@ |
|||||
|
Change List |
||||
|
=================================== |
||||
|
|
||||
|
Introduction |
||||
|
############ |
||||
|
This file contains list of recent changes, important features, usage changes, data format changes, etc. Do read this if you upgrade. |
||||
|
|
||||
|
|
||||
|
v0.67 |
||||
|
##### |
||||
|
1. Increase "weed benchmark" performance to pump in more data. The bottleneck is on the client side. Duh... |
||||
|
|
||||
|
v0.65 |
||||
|
##### |
||||
|
|
||||
|
1. Reset the cluster configuration if "-peers" is not empty. |
||||
|
|
||||
|
v0.64 |
||||
|
##### |
||||
|
|
||||
|
1. Add TTL support! |
||||
|
1. filer: resolve directory log file error, avoid possible race condition |
||||
|
|
||||
|
v0.63 |
||||
|
##### |
||||
|
|
||||
|
1. Compiled with Go 1.3.1 to fix a rare crashing issue. |
||||
|
|
||||
|
v0.62 |
||||
|
##### |
||||
|
|
||||
|
1. Add support for Etag. |
||||
|
2. Add /admin/mv to move a file or a folder. |
||||
|
3. Add client Go API to pre-process the images. |
||||
|
|
||||
|
v0.61 |
||||
|
##### |
||||
|
|
||||
|
1. Reduce memory requirements for "weed fix" |
||||
|
2. Guess mime type by file name extensions when stored mime type is "application/octstream" |
||||
|
3. Added simple volume id lookup caching expiring by time. |
||||
|
|
||||
|
v0.60 |
||||
|
##### |
||||
|
|
||||
|
Fix file missing error caused by .idx file overwriting. The problem shows up if the weed volume server is restarted after 2 times. But the actual .idx file may have already been overwritten on second restart. |
||||
|
|
||||
|
To fix this issue, please run "weed fix -dir=... -volumeId=..." to re-generate the .idx file. |
||||
|
|
||||
|
v0.59 |
||||
|
##### |
||||
|
|
||||
|
1. Add option to automatically fix jpeg picture orientation. |
||||
|
2. Add volume id lookup caching |
||||
|
3. Support Partial Content and Range Requests. http status code == 206. |
||||
|
|
||||
|
v0.57 |
||||
|
##### |
||||
|
|
||||
|
Add hidden dynamic image resizing feature |
||||
|
|
||||
|
Add an hidden feature: For images, jpg/png/gif, if you specify append these url parameters, &width=xxx or &height=xxx or both, the image will be dynamically resized. However, resizing the image would cause high CPU and memory usage. Not recommended unless special use cases. So this would not be documented anywhere else. |
||||
|
|
||||
|
v0.56 Major Command line options change |
||||
|
##### |
||||
|
|
||||
|
|
||||
|
Adjust command line options. |
||||
|
|
||||
|
1. switch to use -publicIp instead of -publicUrl |
||||
|
2. -ip can be empty. It will listen to all available interfaces. |
||||
|
3. For "weed server", these options are changed: |
||||
|
- -masterPort => -master.port |
||||
|
- -peers => -master.peers |
||||
|
- -mdir => -master.dir |
||||
|
- -volumeSizeLimitMB => -master.volumeSizeLimitMB |
||||
|
- -conf => -master.conf |
||||
|
- -defaultReplicaPlacement => -master.defaultReplicaPlacement |
||||
|
- -port => -volume.port |
||||
|
- -max => -volume.max |
||||
|
|
||||
|
v0.55 Recursive folder deletion for Filer |
||||
|
##### |
||||
|
|
||||
|
Now folders with sub folders or files can be deleted recursively. |
||||
|
|
||||
|
Also, for filer, avoid showing files under the first created directory when listing the root directory. |
||||
|
|
||||
|
v0.54 Misc improvements |
||||
|
##### |
||||
|
|
||||
|
No need to persist metadata for master sequence number generation. This shall avoid possible issues where file are lost due to duplicated sequence number generated in rare cases. |
||||
|
|
||||
|
More robust handing of "peers" in master node clustering mode. |
||||
|
|
||||
|
Added logging instructions. |
||||
|
|
||||
|
v0.53 Miscellaneous improvements |
||||
|
##### |
||||
|
|
||||
|
Added retry logic to wait for cluster peers during cluster bootstrapping. Previously the cluster bootstrapping is ordered. This make it tricky to deploy automatically and repeatedly. The fix make the commands repeatable. |
||||
|
|
||||
|
Also, when growing volumes, additional preferred "rack" and "dataNode" parameters are also provided, works together with existing "dataCenter" parameter. |
||||
|
|
||||
|
Fix important bug where settings for non-"000" replications are read back wrong, if volume server is restarted. |
||||
|
|
||||
|
v0.52 Added "filer" server |
||||
|
##### |
||||
|
|
||||
|
A "weed filer" server is added, to provide more "common" file storage. Currently the fullFileName-to-fileId mapping is stored with an efficient embedded leveldb. So it's not linearly scalable yet. But it can handle LOTS of files. |
||||
|
|
||||
|
|
||||
|
.. code-block:: bash |
||||
|
|
||||
|
//POST a file and read it back |
||||
|
curl -F "filename=@README.md" "http://localhost:8888/path/to/sources/" |
||||
|
curl "http://localhost:8888/path/to/sources/README.md" |
||||
|
//POST a file with a new name and read it back |
||||
|
curl -F "filename=@Makefile" "http://localhost:8888/path/to/sources/new_name" |
||||
|
curl "http://localhost:8888/path/to/sources/new_name" |
||||
|
//list sub folders and files |
||||
|
curl "http://localhost:8888/path/to/sources/?pretty=y" |
||||
|
|
||||
|
|
||||
|
v0.51 Idle Timeout |
||||
|
##### |
||||
|
|
||||
|
Previously the timeout setting is "-readTimeout", which is the time limit of the whole http connection. This is inconvenient for large files or for slow internet connections. Now this option is replaced with "-idleTimeout", and default to 10 seconds. Ideally, you should not need to tweak it based on your use case. |
||||
|
|
||||
|
v0.50 Improved Locking |
||||
|
##### |
||||
|
|
||||
|
1. All read operation switched to thread-safe pread, no read locks now. |
||||
|
2. When vacuuming large volumes, a lock was preventing heartbeats to master node. This is fixed now. |
||||
|
3. Fix volume compaction error for collections. |
||||
|
|
||||
|
v0.49 Bug Fixes |
||||
|
##### |
||||
|
|
||||
|
With the new benchmark tool to bombard the system, many bugs are found and fixed, especially on clustering, http connection reuse. |
||||
|
|
||||
|
v0.48 added benchmark command! |
||||
|
##### |
||||
|
|
||||
|
Benchmark! Enough said. |
||||
|
|
||||
|
v0.47 Improving replication |
||||
|
##### |
||||
|
|
||||
|
Support more replication types. |
||||
|
|
||||
|
v0.46 Adding failover master server |
||||
|
##### |
||||
|
|
||||
|
Automatically fail over master servers! |
||||
|
|
||||
|
v0.46 Add "weed server" command |
||||
|
##### |
||||
|
|
||||
|
Now you can start one master server and one volume server in just one command! |
||||
|
|
||||
|
.. code-block:: bash |
||||
|
|
||||
|
weed server |
||||
|
|
||||
|
|
||||
|
v0.45 Add support for extra large file |
||||
|
##### |
||||
|
|
||||
|
For extra large file, this example will split the file into 100MB chunks. |
||||
|
|
||||
|
.. code-block:: bash |
||||
|
|
||||
|
weed upload -maxMB=100 the_file_name |
||||
|
|
||||
|
|
||||
|
Also, Added "download" command, for simple files or chunked files. |
||||
|
|
||||
|
.. code-block:: bash |
||||
|
|
||||
|
weed download file_id [file_id3](file_id2) |
||||
|
|
||||
|
|
||||
|
v0.34 Add support for multiple directories on volume server |
||||
|
##### |
||||
|
|
||||
|
For volume server, add support for multiple folders and multiple max limit. For example: |
||||
|
|
||||
|
.. code-block:: bash |
||||
|
|
||||
|
weed volume -dir=folder1,folder2,folder3 -max=7,8,9 |
||||
|
|
||||
|
|
||||
|
v0.33 Add Nicer URL support |
||||
|
##### |
||||
|
|
||||
|
For HTTP GET request |
||||
|
|
||||
|
.. code-block:: bash |
||||
|
|
||||
|
http://localhost:8080/3,01637037d6 |
||||
|
|
||||
|
Can also be retrieved by |
||||
|
|
||||
|
.. code-block:: bash |
||||
|
|
||||
|
http://localhost:8080/3/01637037d6/my_preferred_name.jpg |
||||
|
|
||||
|
|
||||
|
v0.32 Add support for Last-Modified header |
||||
|
##### |
||||
|
|
||||
|
The last modified timestamp is stored with 5 additional bytes. |
||||
|
|
||||
|
Return http code 304 if the file is not modified. |
||||
|
|
||||
|
Also, the writing are more solid with the fix for issue#26. |
||||
|
|
||||
|
v0.31 Allocate File Key on specific data center |
||||
|
##### |
||||
|
|
||||
|
Volume servers can start with a specific data center name. |
||||
|
|
||||
|
.. code-block:: bash |
||||
|
|
||||
|
weed volume -dir=/tmp/1 -port=8080 -dataCenter=dc1 |
||||
|
weed volume -dir=/tmp/2 -port=8081 -dataCenter=dc2 |
||||
|
|
||||
|
Or the master server can determine the data center via volume server's IP address and settings in weed.conf file. |
||||
|
|
||||
|
Now when requesting a file key, an optional "dataCenter" parameter can limit the assigned volume to the specific data center. For example, this specif |
||||
|
|
||||
|
.. code-block:: bash |
||||
|
|
||||
|
http://localhost:9333/dir/assign?dataCenter=dc1 |
||||
|
|
||||
|
v0.26 Storing File Name and Mime Type |
||||
|
##### |
||||
|
|
||||
|
In order to keep one single disk read for each file, a new storage format is implemented to store: is gzipped or not, file name and mime type (used when downloading files), and possibly other future new attributes. The volumes with old storage format are treated as read only and deprecated. |
||||
|
|
||||
|
Also, you can pre-gzip and submit your file directly, for example, gzip "my.css" into "my.css.gz", and submit. In this case, "my.css" will be stored as the file name. This should save some transmission time, and allow you to force gzipped storage or customize the gzip compression level. |
||||
|
|
||||
|
v0.25 Adding reclaiming garbage spaces |
||||
|
|
||||
|
Garbage spaces are reclaimed by an automatic compacting process. Garbage spaces are generated when updating or deleting files. If they exceed a configurable threshold, 0.3 by default (meaning 30% of the used disk space is garbage), the volume will be marked as readonly, compacted and garbage spaces are reclaimed, and then marked as writable. |
||||
|
|
||||
|
v0.19 Adding rack and data center aware replication |
||||
|
##### |
||||
|
|
||||
|
Now when you have one rack, or multiple racks, or multiple data centers, you can choose your own replication strategy. |
||||
|
|
||||
|
v0.18 Detect disconnected volume servers |
||||
|
##### |
||||
|
|
||||
|
The disconnected volume servers would not be assigned when generating the file keys. Volume servers by default send a heartbeat to master server every 5~10 seconds. Master thinks the volume server is disconnected after 5 times of the heartbeat interval, or 25 seconds by default. |
||||
|
|
||||
|
v0.16 Change to single executable file to do everything |
||||
|
##### |
||||
|
|
||||
|
If you are using v0.15 or earlier, you would use |
||||
|
|
||||
|
.. code-block:: bash |
||||
|
|
||||
|
>weedvolume -dir="/tmp" -volumes=0-4 -mserver="localhost:9333" -port=8080 -publicUrl="localhost:8080" |
||||
|
|
||||
|
With v0.16 or later, you would need to do this in stead: |
||||
|
|
||||
|
.. code-block:: bash |
||||
|
|
||||
|
>weed volume -dir="/tmp" -volumes=0-4 -mserver="localhost:9333" -port=8080 -publicUrl="localhost:8080" |
||||
|
|
||||
|
And more new commands, in addition to "server","volume","fix", etc, will be added. |
||||
|
|
||||
|
This provides a simple deliverable file, and the file size is much smaller since Go language statically compile the commands. Combining commands into one file would avoid lots of duplication. |
@ -0,0 +1,118 @@ |
|||||
|
Distributed Filer |
||||
|
=========================== |
||||
|
|
||||
|
The default weed filer is in standalone mode, storing file metadata on disk. |
||||
|
It is quite efficient to go through deep directory path and can handle |
||||
|
millions of files. |
||||
|
|
||||
|
However, no SPOF is a must-have requirement for many projects. |
||||
|
|
||||
|
Luckily, SeaweedFS is so flexible that we can use a completely different way |
||||
|
to manage file metadata. |
||||
|
|
||||
|
This distributed filer uses Redis or Cassandra to store the metadata. |
||||
|
|
||||
|
Redis Setup |
||||
|
##################### |
||||
|
No setup required. |
||||
|
|
||||
|
Cassandra Setup |
||||
|
##################### |
||||
|
Here is the CQL to create the table.CassandraStore. |
||||
|
Optionally you can adjust the keyspace name and replication settings. |
||||
|
For production, you would want to set replication_factor to 3 |
||||
|
if there are at least 3 Cassandra servers. |
||||
|
|
||||
|
.. code-block:: bash |
||||
|
|
||||
|
create keyspace seaweed WITH replication = { |
||||
|
'class':'SimpleStrategy', |
||||
|
'replication_factor':1 |
||||
|
}; |
||||
|
|
||||
|
use seaweed; |
||||
|
|
||||
|
CREATE TABLE seaweed_files ( |
||||
|
path varchar, |
||||
|
fids list<varchar>, |
||||
|
PRIMARY KEY (path) |
||||
|
); |
||||
|
|
||||
|
|
||||
|
Sample usage |
||||
|
##################### |
||||
|
|
||||
|
To start a weed filer in distributed mode with Redis: |
||||
|
|
||||
|
.. code-block:: bash |
||||
|
|
||||
|
# assuming you already started weed master and weed volume |
||||
|
weed filer -redis.server=localhost:6379 |
||||
|
|
||||
|
To start a weed filer in distributed mode with Cassandra: |
||||
|
|
||||
|
.. code-block:: bash |
||||
|
|
||||
|
# assuming you already started weed master and weed volume |
||||
|
weed filer -cassandra.server=localhost |
||||
|
|
||||
|
Now you can add/delete files |
||||
|
|
||||
|
.. code-block:: bash |
||||
|
|
||||
|
# POST a file and read it back |
||||
|
curl -F "filename=@README.md" "http://localhost:8888/path/to/sources/" |
||||
|
curl "http://localhost:8888/path/to/sources/README.md" |
||||
|
# POST a file with a new name and read it back |
||||
|
curl -F "filename=@Makefile" "http://localhost:8888/path/to/sources/new_name" |
||||
|
curl "http://localhost:8888/path/to/sources/new_name" |
||||
|
|
||||
|
Limitation |
||||
|
############ |
||||
|
List sub folders and files are not supported because Redis or Cassandra |
||||
|
does not support prefix search. |
||||
|
|
||||
|
Flat Namespace Design |
||||
|
############ |
||||
|
In stead of using both directory and file metadata, this implementation uses |
||||
|
a flat namespace. |
||||
|
|
||||
|
If storing each directory metadata separatedly, there would be multiple |
||||
|
network round trips to fetch directory information for deep directories, |
||||
|
impeding system performance. |
||||
|
|
||||
|
A flat namespace would take more space because the parent directories are |
||||
|
repeatedly stored. But disk space is a lesser concern especially for |
||||
|
distributed systems. |
||||
|
|
||||
|
So either Redis or Cassandra is a simple file_full_path ~ file_id mapping. |
||||
|
(Actually Cassandra is a file_full_path ~ list_of_file_ids mapping |
||||
|
with the hope to support easy file appending for streaming files.) |
||||
|
|
||||
|
Complexity |
||||
|
################### |
||||
|
|
||||
|
For one file retrieval, the full_filename=>file_id lookup will be O(logN) |
||||
|
using Redis or Cassandra. But very likely the one additional network hop would |
||||
|
take longer than the actual lookup. |
||||
|
|
||||
|
Use Cases |
||||
|
######################### |
||||
|
|
||||
|
Clients can assess one "weed filer" via HTTP, create files via HTTP POST, |
||||
|
read files via HTTP POST directly. |
||||
|
|
||||
|
Future |
||||
|
################### |
||||
|
|
||||
|
SeaweedFS can support other distributed databases. It will be better |
||||
|
if that database can support prefix search, in order to list files |
||||
|
under a directory. |
||||
|
|
||||
|
Helps Wanted |
||||
|
######################## |
||||
|
|
||||
|
Please implement your preferred metadata store! |
||||
|
|
||||
|
Just follow the cassandra_store/cassandra_store.go file and send me a pull |
||||
|
request. I will handle the rest. |
@ -0,0 +1,85 @@ |
|||||
|
Store file with a Time To Live |
||||
|
=================== |
||||
|
|
||||
|
Introduction |
||||
|
############################# |
||||
|
|
||||
|
Seaweed is a key~file store, and files can optionally expire with a Time To Live (TTL). |
||||
|
|
||||
|
How to use it? |
||||
|
############################# |
||||
|
|
||||
|
Assume we want to store a file with TTL of 3 minutes. |
||||
|
|
||||
|
First, ask the master to assign a file id to a volume with a 3-minute TTL: |
||||
|
|
||||
|
.. code-block:: bash |
||||
|
|
||||
|
> curl http://localhost:9333/dir/assign?ttl=3m |
||||
|
{"count":1,"fid":"5,01637037d6","url":"127.0.0.1:8080","publicUrl":"localhost:8080"} |
||||
|
|
||||
|
Secondly, use the file id to store on the volume server |
||||
|
|
||||
|
.. code-block:: bash |
||||
|
|
||||
|
> curl -F "file=@x.go" http://127.0.0.1:8080/5,01637037d6?ttl=3m |
||||
|
|
||||
|
After writing, the file content will be returned as usual if read before the TTL expiry. But if read after the TTL expiry, the file will be reported as missing and return the http response status as not found. |
||||
|
|
||||
|
For next writes with ttl=3m, the same set of volumes with ttl=3m will be used until: |
||||
|
|
||||
|
1. the ttl=3m volumes are full. If so, new volumes will be created. |
||||
|
2. there are no write activities for 3 minutes. If so, these volumes will be stopped and deleted. |
||||
|
|
||||
|
Advanced Usage |
||||
|
############################# |
||||
|
|
||||
|
As you may have noticed, the "ttl=3m" is used twice! One for assigning file id, and one for uploading the actual file. The first one is for master to pick a matching volume, while the second one is written together with the file. |
||||
|
|
||||
|
These two TTL values are not required to be the same. As long as the volume TTL is larger than file TTL, it should be OK. |
||||
|
|
||||
|
This gives some flexibility to fine-tune the file TTL, while reducing the number of volume TTL variations, which simplifies managing the TTL volumes. |
||||
|
|
||||
|
Supported TTL format |
||||
|
############################# |
||||
|
|
||||
|
The TTL is in the format of one integer number followed by one unit. The unit can be 'm', 'h', 'd', 'w', 'M', 'y'. |
||||
|
|
||||
|
Supported TTL format examples: |
||||
|
|
||||
|
- 3m: 3 minutes |
||||
|
- 4h: 4 hours |
||||
|
- 5d: 5 days |
||||
|
- 6w: 6 weeks |
||||
|
- 7M: 7 months |
||||
|
- 8y: 8 years |
||||
|
|
||||
|
|
||||
|
How efficient it is? |
||||
|
############################# |
||||
|
|
||||
|
TTL seems easy to implement since we just need to report the file as missing if the time is over the TTL. However, the real difficulty is to efficiently reclaim disk space from expired files, similar to JVM memory garbage collection, which is a sophisticated piece of work with many man-years of effort. |
||||
|
|
||||
|
Memcached also supports TTL. It gets around this problem by putting entries into fix-sized slabs. If one slab is expired, no work is required and the slab can be overwritten right away. However, this fix-sized slab approach is not applicable to files since the file contents rarely fit in slabs exactly. |
||||
|
|
||||
|
Seaweed-FS efficiently resolves this disk space garbage collection problem with great simplicity. One of key differences from "normal" implementation is that the TTL is associated with the volume, together with each specific file. |
||||
|
|
||||
|
During the file id assigning step, the file id will be assigned to a volume with matching TTL. The volumes are checked periodically (every 5~10 seconds by default). If the latest expiration time has been reached, all the files in the whole volume will be all expired, and the volume can be safely deleted. |
||||
|
|
||||
|
Implementation Details |
||||
|
############################# |
||||
|
1. When assigning file key, the master would pick one TTL volume with matching TTL. If such volumes do not exist, create a few. |
||||
|
2. Volume servers will write the file with expiration time. When serving file, if the file is expired, the file will be reported as not found. |
||||
|
3. Volume servers will track each volume's largest expiration time, and stop reporting the expired volumes to the master server. |
||||
|
4. Master server will think the previously existed volumes are dead, and stop assigning write requests to them. |
||||
|
5. After about 10% of the TTL time, or at most 10 minutes, the volume servers will delete the expired volume. |
||||
|
|
||||
|
Deployment |
||||
|
############################# |
||||
|
|
||||
|
For deploying to production, the TTL volume maximum size should be taken into consideration. If the writes are frequent, the TTL volume will grow to the max volume size. So when the disk space is not ample enough, it's better to reduce the maximum volume size. |
||||
|
|
||||
|
It's recommended not to mix the TTL volumes and non TTL volumes in the same cluster. This is because the volume maximum size, default to 30GB, is configured on the volume master at the cluster level. |
||||
|
|
||||
|
We could implement the configuration for max volume size for each TTL. However, it could get fairly verbose. Maybe later if it is strongly desired. |
||||
|
|
@ -0,0 +1,87 @@ |
|||||
|
package cassandra_store |
||||
|
|
||||
|
import ( |
||||
|
"fmt" |
||||
|
|
||||
|
"github.com/chrislusf/weed-fs/go/glog" |
||||
|
|
||||
|
"github.com/gocql/gocql" |
||||
|
) |
||||
|
|
||||
|
/* |
||||
|
|
||||
|
Basically you need a table just like this: |
||||
|
|
||||
|
CREATE TABLE seaweed_files ( |
||||
|
path varchar, |
||||
|
fids list<varchar>, |
||||
|
PRIMARY KEY (path) |
||||
|
); |
||||
|
|
||||
|
Need to match flat_namespace.FlatNamespaceStore interface |
||||
|
Put(fullFileName string, fid string) (err error) |
||||
|
Get(fullFileName string) (fid string, err error) |
||||
|
Delete(fullFileName string) (fid string, err error) |
||||
|
|
||||
|
*/ |
||||
|
type CassandraStore struct { |
||||
|
cluster *gocql.ClusterConfig |
||||
|
session *gocql.Session |
||||
|
} |
||||
|
|
||||
|
func NewCassandraStore(keyspace string, hosts ...string) (c *CassandraStore, err error) { |
||||
|
c = &CassandraStore{} |
||||
|
c.cluster = gocql.NewCluster(hosts...) |
||||
|
c.cluster.Keyspace = keyspace |
||||
|
c.cluster.Consistency = gocql.Quorum |
||||
|
c.session, err = c.cluster.CreateSession() |
||||
|
if err != nil { |
||||
|
glog.V(0).Infof("Failed to open cassandra store, hosts %v, keyspace %s", hosts, keyspace) |
||||
|
} |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
func (c *CassandraStore) Put(fullFileName string, fid string) (err error) { |
||||
|
var input []string |
||||
|
input = append(input, fid) |
||||
|
if err := c.session.Query( |
||||
|
`INSERT INTO seaweed_files (path, fids) VALUES (?, ?)`, |
||||
|
fullFileName, input).Exec(); err != nil { |
||||
|
glog.V(0).Infof("Failed to save file %s with id %s: %v", fullFileName, fid, err) |
||||
|
return err |
||||
|
} |
||||
|
return nil |
||||
|
} |
||||
|
func (c *CassandraStore) Get(fullFileName string) (fid string, err error) { |
||||
|
var output []string |
||||
|
if err := c.session.Query( |
||||
|
`select fids FROM seaweed_files WHERE path = ? LIMIT 1`, |
||||
|
fullFileName).Consistency(gocql.One).Scan(&output); err != nil { |
||||
|
if err != gocql.ErrNotFound { |
||||
|
glog.V(0).Infof("Failed to find file %s: %v", fullFileName, fid, err) |
||||
|
} |
||||
|
} |
||||
|
if len(output) == 0 { |
||||
|
return "", fmt.Errorf("No file id found for %s", fullFileName) |
||||
|
} |
||||
|
return output[0], nil |
||||
|
} |
||||
|
|
||||
|
// Currently the fid is not returned
|
||||
|
func (c *CassandraStore) Delete(fullFileName string) (fid string, err error) { |
||||
|
if err := c.session.Query( |
||||
|
`DELETE FROM seaweed_files WHERE path = ?`, |
||||
|
fullFileName).Exec(); err != nil { |
||||
|
if err != gocql.ErrNotFound { |
||||
|
glog.V(0).Infof("Failed to delete file %s: %v", fullFileName, err) |
||||
|
} |
||||
|
return "", err |
||||
|
} |
||||
|
return "", nil |
||||
|
} |
||||
|
|
||||
|
func (c *CassandraStore) Close() { |
||||
|
if c.session != nil { |
||||
|
c.session.Close() |
||||
|
} |
||||
|
} |
@ -0,0 +1,22 @@ |
|||||
|
/* |
||||
|
|
||||
|
Here is the CQL to create the table.CassandraStore |
||||
|
|
||||
|
Optionally you can adjust the keyspace name and replication settings. |
||||
|
|
||||
|
For production server, very likely you want to set replication_factor to 3 |
||||
|
|
||||
|
*/ |
||||
|
|
||||
|
create keyspace seaweed WITH replication = { |
||||
|
'class':'SimpleStrategy', |
||||
|
'replication_factor':1 |
||||
|
}; |
||||
|
|
||||
|
use seaweed; |
||||
|
|
||||
|
CREATE TABLE seaweed_files ( |
||||
|
path varchar, |
||||
|
fids list<varchar>, |
||||
|
PRIMARY KEY (path) |
||||
|
); |
@ -1,20 +0,0 @@ |
|||||
package filer |
|
||||
|
|
||||
import () |
|
||||
|
|
||||
type DirectoryId int32 |
|
||||
|
|
||||
type DirectoryEntry struct { |
|
||||
Name string //dir name without path
|
|
||||
Id DirectoryId |
|
||||
} |
|
||||
|
|
||||
type DirectoryManager interface { |
|
||||
FindDirectory(dirPath string) (DirectoryId, error) |
|
||||
ListDirectories(dirPath string) (dirs []DirectoryEntry, err error) |
|
||||
MakeDirectory(currentDirPath string, dirName string) (DirectoryId, error) |
|
||||
MoveUnderDirectory(oldDirPath string, newParentDirPath string) error |
|
||||
DeleteDirectory(dirPath string) error |
|
||||
//functions used by FUSE
|
|
||||
FindDirectoryById(DirectoryId, error) |
|
||||
} |
|
@ -0,0 +1,15 @@ |
|||||
|
package embedded_filer |
||||
|
|
||||
|
import ( |
||||
|
"github.com/chrislusf/weed-fs/go/filer" |
||||
|
) |
||||
|
|
||||
|
type DirectoryManager interface { |
||||
|
FindDirectory(dirPath string) (filer.DirectoryId, error) |
||||
|
ListDirectories(dirPath string) (dirs []filer.DirectoryEntry, err error) |
||||
|
MakeDirectory(currentDirPath string, dirName string) (filer.DirectoryId, error) |
||||
|
MoveUnderDirectory(oldDirPath string, newParentDirPath string) error |
||||
|
DeleteDirectory(dirPath string) error |
||||
|
//functions used by FUSE
|
||||
|
FindDirectoryById(filer.DirectoryId, error) |
||||
|
} |
@ -1,4 +1,4 @@ |
|||||
package filer |
|
||||
|
package embedded_filer |
||||
|
|
||||
import ( |
import ( |
||||
"os" |
"os" |
@ -0,0 +1,50 @@ |
|||||
|
package flat_namespace |
||||
|
|
||||
|
import ( |
||||
|
"errors" |
||||
|
|
||||
|
"github.com/chrislusf/weed-fs/go/filer" |
||||
|
) |
||||
|
|
||||
|
type FlatNamesapceFiler struct { |
||||
|
master string |
||||
|
store FlatNamespaceStore |
||||
|
} |
||||
|
|
||||
|
var ( |
||||
|
NotImplemented = errors.New("Not Implemented for flat namespace meta data store!") |
||||
|
) |
||||
|
|
||||
|
func NewFlatNamesapceFiler(master string, store FlatNamespaceStore) *FlatNamesapceFiler { |
||||
|
return &FlatNamesapceFiler{ |
||||
|
master: master, |
||||
|
store: store, |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
func (filer *FlatNamesapceFiler) CreateFile(fullFileName string, fid string) (err error) { |
||||
|
return filer.store.Put(fullFileName, fid) |
||||
|
} |
||||
|
func (filer *FlatNamesapceFiler) FindFile(fullFileName string) (fid string, err error) { |
||||
|
return filer.store.Get(fullFileName) |
||||
|
} |
||||
|
func (filer *FlatNamesapceFiler) FindDirectory(dirPath string) (dirId filer.DirectoryId, err error) { |
||||
|
return 0, NotImplemented |
||||
|
} |
||||
|
func (filer *FlatNamesapceFiler) ListDirectories(dirPath string) (dirs []filer.DirectoryEntry, err error) { |
||||
|
return nil, NotImplemented |
||||
|
} |
||||
|
func (filer *FlatNamesapceFiler) ListFiles(dirPath string, lastFileName string, limit int) (files []filer.FileEntry, err error) { |
||||
|
return nil, NotImplemented |
||||
|
} |
||||
|
func (filer *FlatNamesapceFiler) DeleteDirectory(dirPath string, recursive bool) (err error) { |
||||
|
return NotImplemented |
||||
|
} |
||||
|
|
||||
|
func (filer *FlatNamesapceFiler) DeleteFile(fullFileName string) (fid string, err error) { |
||||
|
return filer.store.Delete(fullFileName) |
||||
|
} |
||||
|
|
||||
|
func (filer *FlatNamesapceFiler) Move(fromPath string, toPath string) error { |
||||
|
return NotImplemented |
||||
|
} |
@ -0,0 +1,9 @@ |
|||||
|
package flat_namespace |
||||
|
|
||||
|
import () |
||||
|
|
||||
|
type FlatNamespaceStore interface { |
||||
|
Put(fullFileName string, fid string) (err error) |
||||
|
Get(fullFileName string) (fid string, err error) |
||||
|
Delete(fullFileName string) (fid string, err error) |
||||
|
} |
@ -0,0 +1,48 @@ |
|||||
|
package redis_store |
||||
|
|
||||
|
import ( |
||||
|
redis "gopkg.in/redis.v2" |
||||
|
) |
||||
|
|
||||
|
type RedisStore struct { |
||||
|
Client *redis.Client |
||||
|
} |
||||
|
|
||||
|
func NewRedisStore(hostPort string, database int) *RedisStore { |
||||
|
client := redis.NewTCPClient(&redis.Options{ |
||||
|
Addr: hostPort, |
||||
|
Password: "", // no password set
|
||||
|
DB: int64(database), |
||||
|
}) |
||||
|
return &RedisStore{Client: client} |
||||
|
} |
||||
|
|
||||
|
func (s *RedisStore) Get(fullFileName string) (fid string, err error) { |
||||
|
fid, err = s.Client.Get(fullFileName).Result() |
||||
|
if err == redis.Nil { |
||||
|
err = nil |
||||
|
} |
||||
|
return fid, err |
||||
|
} |
||||
|
func (s *RedisStore) Put(fullFileName string, fid string) (err error) { |
||||
|
_, err = s.Client.Set(fullFileName, fid).Result() |
||||
|
if err == redis.Nil { |
||||
|
err = nil |
||||
|
} |
||||
|
return err |
||||
|
} |
||||
|
|
||||
|
// Currently the fid is not returned
|
||||
|
func (s *RedisStore) Delete(fullFileName string) (fid string, err error) { |
||||
|
_, err = s.Client.Del(fullFileName).Result() |
||||
|
if err == redis.Nil { |
||||
|
err = nil |
||||
|
} |
||||
|
return "", err |
||||
|
} |
||||
|
|
||||
|
func (c *RedisStore) Close() { |
||||
|
if c.Client != nil { |
||||
|
c.Client.Close() |
||||
|
} |
||||
|
} |
@ -1,7 +1,5 @@ |
|||||
package glog |
package glog |
||||
|
|
||||
import () |
|
||||
|
|
||||
/* |
/* |
||||
Copying the original glog because it is missing several convenient methods. |
Copying the original glog because it is missing several convenient methods. |
||||
1. remove nano time in log format |
1. remove nano time in log format |
||||
|
@ -1,7 +1,5 @@ |
|||||
package operation |
package operation |
||||
|
|
||||
import () |
|
||||
|
|
||||
type JoinResult struct { |
type JoinResult struct { |
||||
VolumeSizeLimit uint64 `json:"VolumeSizeLimit,omitempty"` |
VolumeSizeLimit uint64 `json:"VolumeSizeLimit,omitempty"` |
||||
Error string `json:"error,omitempty"` |
Error string `json:"error,omitempty"` |
||||
|
@ -0,0 +1,146 @@ |
|||||
|
package security |
||||
|
|
||||
|
import ( |
||||
|
"errors" |
||||
|
"fmt" |
||||
|
"net" |
||||
|
"net/http" |
||||
|
"strings" |
||||
|
"time" |
||||
|
|
||||
|
"github.com/chrislusf/weed-fs/go/glog" |
||||
|
"github.com/dgrijalva/jwt-go" |
||||
|
) |
||||
|
|
||||
|
var ( |
||||
|
ErrUnauthorized = errors.New("unauthorized token") |
||||
|
) |
||||
|
|
||||
|
/* |
||||
|
Guard is to ensure data access security. |
||||
|
There are 2 ways to check access: |
||||
|
1. white list. It's checking request ip address. |
||||
|
2. JSON Web Token(JWT) generated from secretKey. |
||||
|
The jwt can come from: |
||||
|
1. url parameter jwt=... |
||||
|
2. request header "Authorization" |
||||
|
3. cookie with the name "jwt" |
||||
|
|
||||
|
The white list is checked first because it is easy. |
||||
|
Then the JWT is checked. |
||||
|
|
||||
|
The Guard will also check these claims if provided: |
||||
|
1. "exp" Expiration Time |
||||
|
2. "nbf" Not Before |
||||
|
|
||||
|
Generating JWT: |
||||
|
1. use HS256 to sign |
||||
|
2. optionally set "exp", "nbf" fields, in Unix time, |
||||
|
the number of seconds elapsed since January 1, 1970 UTC. |
||||
|
|
||||
|
Referenced: |
||||
|
https://github.com/pkieltyka/jwtauth/blob/master/jwtauth.go
|
||||
|
|
||||
|
*/ |
||||
|
type Guard struct { |
||||
|
whiteList []string |
||||
|
secretKey string |
||||
|
|
||||
|
isActive bool |
||||
|
} |
||||
|
|
||||
|
func NewGuard(whiteList []string, secretKey string) *Guard { |
||||
|
g := &Guard{whiteList: whiteList, secretKey: secretKey} |
||||
|
g.isActive = len(g.whiteList) != 0 || len(g.secretKey) != 0 |
||||
|
return g |
||||
|
} |
||||
|
|
||||
|
func (g *Guard) Secure(f func(w http.ResponseWriter, r *http.Request)) func(w http.ResponseWriter, r *http.Request) { |
||||
|
if !g.isActive { |
||||
|
//if no security needed, just skip all checkings
|
||||
|
return f |
||||
|
} |
||||
|
return func(w http.ResponseWriter, r *http.Request) { |
||||
|
if err := g.doCheck(w, r); err != nil { |
||||
|
w.WriteHeader(http.StatusUnauthorized) |
||||
|
return |
||||
|
} |
||||
|
f(w, r) |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
func (g *Guard) NewToken() (tokenString string, err error) { |
||||
|
m := make(map[string]interface{}) |
||||
|
m["exp"] = time.Now().Unix() + 10 |
||||
|
return g.Encode(m) |
||||
|
} |
||||
|
|
||||
|
func (g *Guard) Encode(claims map[string]interface{}) (tokenString string, err error) { |
||||
|
if !g.isActive { |
||||
|
return "", nil |
||||
|
} |
||||
|
|
||||
|
t := jwt.New(jwt.GetSigningMethod("HS256")) |
||||
|
t.Claims = claims |
||||
|
return t.SignedString(g.secretKey) |
||||
|
} |
||||
|
|
||||
|
func (g *Guard) Decode(tokenString string) (token *jwt.Token, err error) { |
||||
|
return jwt.Parse(tokenString, func(token *jwt.Token) (interface{}, error) { |
||||
|
return g.secretKey, nil |
||||
|
}) |
||||
|
} |
||||
|
|
||||
|
func (g *Guard) doCheck(w http.ResponseWriter, r *http.Request) error { |
||||
|
if len(g.whiteList) != 0 { |
||||
|
host, _, err := net.SplitHostPort(r.RemoteAddr) |
||||
|
if err == nil { |
||||
|
for _, ip := range g.whiteList { |
||||
|
if ip == host { |
||||
|
return nil |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if len(g.secretKey) != 0 { |
||||
|
|
||||
|
// Get token from query params
|
||||
|
tokenStr := r.URL.Query().Get("jwt") |
||||
|
|
||||
|
// Get token from authorization header
|
||||
|
if tokenStr == "" { |
||||
|
bearer := r.Header.Get("Authorization") |
||||
|
if len(bearer) > 7 && strings.ToUpper(bearer[0:6]) == "BEARER" { |
||||
|
tokenStr = bearer[7:] |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// Get token from cookie
|
||||
|
if tokenStr == "" { |
||||
|
cookie, err := r.Cookie("jwt") |
||||
|
if err == nil { |
||||
|
tokenStr = cookie.Value |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if tokenStr == "" { |
||||
|
return ErrUnauthorized |
||||
|
} |
||||
|
|
||||
|
// Verify the token
|
||||
|
token, err := g.Decode(tokenStr) |
||||
|
if err != nil { |
||||
|
glog.V(1).Infof("Token verification error from %s: %v", r.RemoteAddr, err) |
||||
|
return ErrUnauthorized |
||||
|
} |
||||
|
if !token.Valid { |
||||
|
glog.V(1).Infof("Token invliad from %s: %v", r.RemoteAddr, tokenStr) |
||||
|
return ErrUnauthorized |
||||
|
} |
||||
|
|
||||
|
} |
||||
|
|
||||
|
glog.V(1).Infof("No permission from %s", r.RemoteAddr) |
||||
|
return fmt.Errorf("No write permisson from %s", r.RemoteAddr) |
||||
|
} |
@ -0,0 +1,39 @@ |
|||||
|
package util |
||||
|
|
||||
|
import ( |
||||
|
"sync" |
||||
|
) |
||||
|
|
||||
|
// A mostly for read map, which can thread-safely
|
||||
|
// initialize the map entries.
|
||||
|
type ConcurrentReadMap struct { |
||||
|
rmutex sync.RWMutex |
||||
|
mutex sync.Mutex |
||||
|
Items map[string]interface{} |
||||
|
} |
||||
|
|
||||
|
func NewConcurrentReadMap() *ConcurrentReadMap { |
||||
|
return &ConcurrentReadMap{Items: make(map[string]interface{})} |
||||
|
} |
||||
|
|
||||
|
func (m *ConcurrentReadMap) initMapEntry(key string, newEntry func() interface{}) (value interface{}) { |
||||
|
m.mutex.Lock() |
||||
|
defer m.mutex.Unlock() |
||||
|
if value, ok := m.Items[key]; ok { |
||||
|
return value |
||||
|
} |
||||
|
value = newEntry() |
||||
|
m.Items[key] = value |
||||
|
return value |
||||
|
} |
||||
|
|
||||
|
func (m *ConcurrentReadMap) Get(key string, newEntry func() interface{}) interface{} { |
||||
|
m.rmutex.RLock() |
||||
|
if value, ok := m.Items[key]; ok { |
||||
|
m.rmutex.RUnlock() |
||||
|
return value |
||||
|
} else { |
||||
|
m.rmutex.RUnlock() |
||||
|
return m.initMapEntry(key, newEntry) |
||||
|
} |
||||
|
} |
@ -1,7 +1,5 @@ |
|||||
package util |
package util |
||||
|
|
||||
import () |
|
||||
|
|
||||
const ( |
const ( |
||||
VERSION = "0.64" |
|
||||
|
VERSION = "0.67" |
||||
) |
) |
Some files were not shown because too many files changed in this diff
Write
Preview
Loading…
Cancel
Save
Reference in new issue