Browse Source

Merge master into ec-disk-type-support

Merged changes from master including:
- Comments about topology collection strategy for EC evacuation
- Kept multi-disk-type iteration for evacuation
- Function signature now includes both diskType and writer parameters
chrislusf 4 days ago
parent
commit
f1c1e3fb36
  1. 1
      .github/workflows/container_release_unified.yml
  2. 74
      .github/workflows/helm_ci.yml
  3. 10
      .github/workflows/s3tests.yml
  4. 93
      .github/workflows/sftp-tests.yml
  5. 4
      Makefile
  6. 65
      README.md
  7. 2
      docker/compose/local-s3tests-compose.yml
  8. 2
      docker/compose/test-tarantool-filer.yml
  9. 2
      docker/compose/test-ydb-filer.yml
  10. 116
      k8s/charts/seaweedfs/templates/all-in-one/all-in-one-deployment.yaml
  11. 23
      k8s/charts/seaweedfs/templates/all-in-one/all-in-one-pvc.yaml
  12. 16
      k8s/charts/seaweedfs/templates/all-in-one/all-in-one-service.yml
  13. 13
      k8s/charts/seaweedfs/templates/filer/filer-ingress.yaml
  14. 7
      k8s/charts/seaweedfs/templates/filer/filer-statefulset.yaml
  15. 4
      k8s/charts/seaweedfs/templates/master/master-statefulset.yaml
  16. 7
      k8s/charts/seaweedfs/templates/s3/s3-deployment.yaml
  17. 16
      k8s/charts/seaweedfs/templates/s3/s3-ingress.yaml
  18. 71
      k8s/charts/seaweedfs/templates/shared/post-install-bucket-hook.yaml
  19. 4
      k8s/charts/seaweedfs/templates/volume/volume-statefulset.yaml
  20. 102
      k8s/charts/seaweedfs/values.yaml
  21. 2
      test/foundationdb/docker-compose.arm64.yml
  22. 2
      test/foundationdb/docker-compose.yml
  23. 1
      test/postgres/docker-compose.yml
  24. 3
      test/s3/cors/Makefile
  25. 3
      test/s3/retention/Makefile
  26. 2
      test/s3/retention/s3_object_lock_headers_test.go
  27. 13
      test/s3/retention/s3_retention_test.go
  28. 3
      test/s3/tagging/Makefile
  29. 7
      test/s3/versioning/Makefile
  30. 41
      test/sftp/Makefile
  31. 92
      test/sftp/README.md
  32. 652
      test/sftp/basic_test.go
  33. 423
      test/sftp/framework.go
  34. 17
      test/sftp/go.mod
  35. 64
      test/sftp/go.sum
  36. 37
      test/sftp/testdata/userstore.json
  37. 22
      weed/admin/dash/admin_server.go
  38. 240
      weed/admin/handlers/file_browser_handlers.go
  39. 4
      weed/command/filer.go
  40. 6
      weed/command/s3.go
  41. 6
      weed/command/server.go
  42. 10
      weed/command/volume.go
  43. 4
      weed/credential/filer_etc/filer_etc_store.go
  44. 207
      weed/filer/empty_folder_cleanup/cleanup_queue.go
  45. 371
      weed/filer/empty_folder_cleanup/cleanup_queue_test.go
  46. 436
      weed/filer/empty_folder_cleanup/empty_folder_cleaner.go
  47. 569
      weed/filer/empty_folder_cleanup/empty_folder_cleaner_test.go
  48. 8
      weed/filer/filer.go
  49. 39
      weed/filer/filer_notify.go
  50. 39
      weed/filer/filer_on_meta_event.go
  51. 13
      weed/filer/filer_search.go
  52. 132
      weed/filer/reader_at.go
  53. 78
      weed/filer/reader_cache.go
  54. 505
      weed/filer/reader_cache_test.go
  55. 6
      weed/operation/upload_content.go
  56. 2
      weed/pb/master.proto
  57. 26
      weed/pb/master_pb/master.pb.go
  58. 12
      weed/pb/server_address.go
  59. 1
      weed/s3api/auth_signature_v4.go
  60. 10
      weed/s3api/chunked_reader_v4.go
  61. 144
      weed/s3api/chunked_reader_v4_test.go
  62. 5
      weed/s3api/filer_multipart.go
  63. 16
      weed/s3api/s3api_auth.go
  64. 17
      weed/s3api/s3api_bucket_config.go
  65. 74
      weed/s3api/s3api_bucket_handlers.go
  66. 42
      weed/s3api/s3api_object_handlers.go
  67. 58
      weed/s3api/s3api_object_handlers_delete.go
  68. 8
      weed/s3api/s3api_object_handlers_list.go
  69. 30
      weed/s3api/s3api_object_handlers_put.go
  70. 18
      weed/s3api/s3api_object_retention.go
  71. 1
      weed/s3api/s3api_server.go
  72. 11
      weed/server/common.go
  73. 4
      weed/server/master_grpc_server.go
  74. 2
      weed/server/master_grpc_server_volume.go
  75. 12
      weed/server/volume_grpc_copy.go
  76. 7
      weed/server/volume_server.go
  77. 25
      weed/server/volume_server_handlers_admin.go
  78. 2
      weed/server/volume_server_handlers_read.go
  79. 5
      weed/sftpd/sftp_file_writer.go
  80. 82
      weed/sftpd/sftp_filer.go
  81. 24
      weed/sftpd/sftp_server.go
  82. 103
      weed/sftpd/sftp_server_test.go
  83. 4
      weed/sftpd/sftp_service.go
  84. 5
      weed/sftpd/user/filestore.go
  85. 55
      weed/shell/command_volume_check_disk.go
  86. 45
      weed/shell/command_volume_server_evacuate.go
  87. 6
      weed/storage/needle/needle_parse_upload.go
  88. 12
      weed/storage/store.go
  89. 17
      weed/storage/store_ec_delete.go
  90. 2
      weed/storage/store_load_balancing_test.go
  91. 1
      weed/topology/data_node.go
  92. 63
      weed/topology/rack.go
  93. 119
      weed/topology/topology_test.go
  94. 13
      weed/util/fullpath.go
  95. 108
      weed/util/http/http_global_client_util.go
  96. 105
      weed/util/net_timeout.go
  97. 11
      weed/util/network.go

1
.github/workflows/container_release_unified.yml

@ -223,3 +223,4 @@ jobs:
echo "✓ Successfully copied ${{ matrix.variant }} to Docker Hub"

74
.github/workflows/helm_ci.yml

@ -44,6 +44,80 @@ jobs:
- name: Run chart-testing (lint)
run: ct lint --target-branch ${{ github.event.repository.default_branch }} --all --validate-maintainers=false --chart-dirs k8s/charts
- name: Verify template rendering
run: |
set -e
CHART_DIR="k8s/charts/seaweedfs"
echo "=== Testing default configuration ==="
helm template test $CHART_DIR > /tmp/default.yaml
echo "✓ Default configuration renders successfully"
echo "=== Testing with S3 enabled ==="
helm template test $CHART_DIR --set s3.enabled=true > /tmp/s3.yaml
grep -q "kind: Deployment" /tmp/s3.yaml && grep -q "seaweedfs-s3" /tmp/s3.yaml
echo "✓ S3 deployment renders correctly"
echo "=== Testing with all-in-one mode ==="
helm template test $CHART_DIR --set allInOne.enabled=true > /tmp/allinone.yaml
grep -q "seaweedfs-all-in-one" /tmp/allinone.yaml
echo "✓ All-in-one deployment renders correctly"
echo "=== Testing with security enabled ==="
helm template test $CHART_DIR --set global.enableSecurity=true > /tmp/security.yaml
grep -q "security-config" /tmp/security.yaml
echo "✓ Security configuration renders correctly"
echo "=== Testing with monitoring enabled ==="
helm template test $CHART_DIR \
--set global.monitoring.enabled=true \
--set global.monitoring.gatewayHost=prometheus \
--set global.monitoring.gatewayPort=9091 > /tmp/monitoring.yaml
echo "✓ Monitoring configuration renders correctly"
echo "=== Testing with PVC storage ==="
helm template test $CHART_DIR \
--set master.data.type=persistentVolumeClaim \
--set master.data.size=10Gi \
--set master.data.storageClass=standard > /tmp/pvc.yaml
grep -q "PersistentVolumeClaim" /tmp/pvc.yaml
echo "✓ PVC configuration renders correctly"
echo "=== Testing with custom replicas ==="
helm template test $CHART_DIR \
--set master.replicas=3 \
--set filer.replicas=2 \
--set volume.replicas=3 > /tmp/replicas.yaml
echo "✓ Custom replicas configuration renders correctly"
echo "=== Testing filer with S3 gateway ==="
helm template test $CHART_DIR \
--set filer.s3.enabled=true \
--set filer.s3.enableAuth=true > /tmp/filer-s3.yaml
echo "✓ Filer S3 gateway renders correctly"
echo "=== Testing SFTP enabled ==="
helm template test $CHART_DIR --set sftp.enabled=true > /tmp/sftp.yaml
grep -q "seaweedfs-sftp" /tmp/sftp.yaml
echo "✓ SFTP deployment renders correctly"
echo "=== Testing ingress configurations ==="
helm template test $CHART_DIR \
--set master.ingress.enabled=true \
--set filer.ingress.enabled=true \
--set s3.enabled=true \
--set s3.ingress.enabled=true > /tmp/ingress.yaml
grep -q "kind: Ingress" /tmp/ingress.yaml
echo "✓ Ingress configurations render correctly"
echo "=== Testing COSI driver ==="
helm template test $CHART_DIR --set cosi.enabled=true > /tmp/cosi.yaml
grep -q "seaweedfs-cosi" /tmp/cosi.yaml
echo "✓ COSI driver renders correctly"
echo ""
echo "✅ All template rendering tests passed!"
- name: Create kind cluster
uses: helm/kind-action@v1.13.0

10
.github/workflows/s3tests.yml

@ -64,7 +64,7 @@ jobs:
-master.raftHashicorp -master.electionTimeout 1s -master.volumeSizeLimitMB=100 \
-volume.max=100 -volume.preStopSeconds=1 \
-master.port=9333 -volume.port=8080 -filer.port=8888 -s3.port=8000 -metricsPort=9324 \
-s3.allowEmptyFolder=false -s3.allowDeleteBucketNotEmpty=true -s3.config="$GITHUB_WORKSPACE/docker/compose/s3.json" -master.peers=none &
-s3.allowDeleteBucketNotEmpty=true -s3.config="$GITHUB_WORKSPACE/docker/compose/s3.json" -master.peers=none &
pid=$!
# Wait for all SeaweedFS components to be ready
@ -368,7 +368,7 @@ jobs:
-master.raftHashicorp -master.electionTimeout 1s -master.volumeSizeLimitMB=100 \
-volume.max=100 -volume.preStopSeconds=1 \
-master.port=9334 -volume.port=8081 -filer.port=8889 -s3.port=8001 -metricsPort=9325 \
-s3.allowEmptyFolder=false -s3.allowDeleteBucketNotEmpty=true -s3.config="$GITHUB_WORKSPACE/docker/compose/s3.json" -master.peers=none &
-s3.allowDeleteBucketNotEmpty=true -s3.config="$GITHUB_WORKSPACE/docker/compose/s3.json" -master.peers=none &
pid=$!
# Wait for all SeaweedFS components to be ready
@ -526,7 +526,7 @@ jobs:
-master.raftHashicorp -master.electionTimeout 1s -master.volumeSizeLimitMB=100 \
-volume.max=100 -volume.preStopSeconds=1 \
-master.port=9335 -volume.port=8082 -filer.port=8890 -s3.port=8002 -metricsPort=9326 \
-s3.allowEmptyFolder=false -s3.allowDeleteBucketNotEmpty=true -s3.config="$GITHUB_WORKSPACE/docker/compose/s3.json" -master.peers=none &
-s3.allowDeleteBucketNotEmpty=true -s3.config="$GITHUB_WORKSPACE/docker/compose/s3.json" -master.peers=none &
pid=$!
# Wait for all SeaweedFS components to be ready
@ -636,7 +636,7 @@ jobs:
-master.raftHashicorp -master.electionTimeout 1s -master.volumeSizeLimitMB=100 \
-volume.max=100 -volume.preStopSeconds=1 \
-master.port=9336 -volume.port=8083 -filer.port=8891 -s3.port=8003 -metricsPort=9327 \
-s3.allowEmptyFolder=false -s3.allowDeleteBucketNotEmpty=true -s3.config="$GITHUB_WORKSPACE/docker/compose/s3.json" -master.peers=none &
-s3.allowDeleteBucketNotEmpty=true -s3.config="$GITHUB_WORKSPACE/docker/compose/s3.json" -master.peers=none &
pid=$!
# Wait for all SeaweedFS components to be ready
@ -817,7 +817,7 @@ jobs:
-master.raftHashicorp -master.electionTimeout 1s -master.volumeSizeLimitMB=100 \
-volume.max=100 -volume.preStopSeconds=1 \
-master.port=9337 -volume.port=8085 -filer.port=8892 -s3.port=8004 -metricsPort=9328 \
-s3.allowEmptyFolder=false -s3.allowDeleteBucketNotEmpty=true -s3.config="$GITHUB_WORKSPACE/docker/compose/s3.json" \
-s3.allowDeleteBucketNotEmpty=true -s3.config="$GITHUB_WORKSPACE/docker/compose/s3.json" \
-master.peers=none \
> /tmp/seaweedfs-sql-server.log 2>&1 &
pid=$!

93
.github/workflows/sftp-tests.yml

@ -0,0 +1,93 @@
name: "SFTP Integration Tests"
on:
push:
branches: [ master, main ]
paths:
- 'weed/sftpd/**'
- 'weed/command/sftp.go'
- 'test/sftp/**'
- '.github/workflows/sftp-tests.yml'
pull_request:
branches: [ master, main ]
paths:
- 'weed/sftpd/**'
- 'weed/command/sftp.go'
- 'test/sftp/**'
- '.github/workflows/sftp-tests.yml'
concurrency:
group: ${{ github.head_ref }}/sftp-tests
cancel-in-progress: true
permissions:
contents: read
env:
GO_VERSION: '1.24'
TEST_TIMEOUT: '15m'
jobs:
sftp-integration:
name: SFTP Integration Testing
runs-on: ubuntu-22.04
timeout-minutes: 20
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Go ${{ env.GO_VERSION }}
uses: actions/setup-go@v5
with:
go-version: ${{ env.GO_VERSION }}
- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install -y openssh-client
- name: Build SeaweedFS
run: |
cd weed
go build -o weed .
chmod +x weed
./weed version
- name: Run SFTP Integration Tests
run: |
cd test/sftp
echo "🧪 Running SFTP integration tests..."
echo "============================================"
# Install test dependencies
go mod download
# Run all SFTP tests
go test -v -timeout=${{ env.TEST_TIMEOUT }} ./...
echo "============================================"
echo "✅ SFTP integration tests completed"
- name: Test Summary
if: always()
run: |
echo "## 🔐 SFTP Integration Test Summary" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "### Test Coverage" >> $GITHUB_STEP_SUMMARY
echo "- ✅ **HomeDir Path Translation**: User home directory mapping (fixes #7470)" >> $GITHUB_STEP_SUMMARY
echo "- ✅ **File Operations**: Upload, download, delete" >> $GITHUB_STEP_SUMMARY
echo "- ✅ **Directory Operations**: Create, list, remove" >> $GITHUB_STEP_SUMMARY
echo "- ✅ **Large File Handling**: 1MB+ file support" >> $GITHUB_STEP_SUMMARY
echo "- ✅ **Path Edge Cases**: Unicode, trailing slashes, .. paths" >> $GITHUB_STEP_SUMMARY
echo "- ✅ **Admin Access**: Root user verification" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "### Test Configuration" >> $GITHUB_STEP_SUMMARY
echo "| User | HomeDir | Permissions |" >> $GITHUB_STEP_SUMMARY
echo "|------|---------|-------------|" >> $GITHUB_STEP_SUMMARY
echo "| admin | / | Full access |" >> $GITHUB_STEP_SUMMARY
echo "| testuser | /sftp/testuser | Home directory only |" >> $GITHUB_STEP_SUMMARY
echo "| readonly | /public | Read-only |" >> $GITHUB_STEP_SUMMARY

4
Makefile

@ -18,12 +18,12 @@ full_install: admin-generate
cd weed; go install -tags "elastic gocdk sqlite ydb tarantool tikv rclone"
server: install
weed -v 0 server -s3 -filer -filer.maxMB=64 -volume.max=0 -master.volumeSizeLimitMB=100 -volume.preStopSeconds=1 -s3.port=8000 -s3.allowEmptyFolder=false -s3.allowDeleteBucketNotEmpty=true -s3.config=./docker/compose/s3.json -metricsPort=9324
weed -v 0 server -s3 -filer -filer.maxMB=64 -volume.max=0 -master.volumeSizeLimitMB=100 -volume.preStopSeconds=1 -s3.port=8000 -s3.allowDeleteBucketNotEmpty=true -s3.config=./docker/compose/s3.json -metricsPort=9324
benchmark: install warp_install
pkill weed || true
pkill warp || true
weed server -debug=$(debug) -s3 -filer -volume.max=0 -master.volumeSizeLimitMB=100 -volume.preStopSeconds=1 -s3.port=8000 -s3.allowEmptyFolder=false -s3.allowDeleteBucketNotEmpty=false -s3.config=./docker/compose/s3.json &
weed server -debug=$(debug) -s3 -filer -volume.max=0 -master.volumeSizeLimitMB=100 -volume.preStopSeconds=1 -s3.port=8000 -s3.allowDeleteBucketNotEmpty=false -s3.config=./docker/compose/s3.json &
warp client &
while ! nc -z localhost 8000 ; do sleep 1 ; done
warp mixed --host=127.0.0.1:8000 --access-key=some_access_key1 --secret-key=some_secret_key1 --autoterm

65
README.md

@ -592,65 +592,22 @@ Percentage of the requests served within a certain time (ms)
```
make benchmark
warp: Benchmark data written to "warp-mixed-2023-10-16[102354]-l70a.csv.zst"
Mixed operations.
Operation: DELETE, 10%, Concurrency: 20, Ran 4m59s.
* Throughput: 6.19 obj/s
warp: Benchmark data written to "warp-mixed-2025-12-05[194844]-kBpU.csv.zst"
Operation: GET, 45%, Concurrency: 20, Ran 5m0s.
* Throughput: 279.85 MiB/s, 27.99 obj/s
Mixed operations.
Operation: DELETE, 10%, Concurrency: 20, Ran 42s.
* Throughput: 55.13 obj/s
Operation: PUT, 15%, Concurrency: 20, Ran 5m0s.
* Throughput: 89.86 MiB/s, 8.99 obj/s
Operation: GET, 45%, Concurrency: 20, Ran 42s.
* Throughput: 2477.45 MiB/s, 247.75 obj/s
Operation: STAT, 30%, Concurrency: 20, Ran 5m0s.
* Throughput: 18.63 obj/s
Operation: PUT, 15%, Concurrency: 20, Ran 42s.
* Throughput: 825.85 MiB/s, 82.59 obj/s
Cluster Total: 369.74 MiB/s, 61.79 obj/s, 0 errors over 5m0s.
```
Operation: STAT, 30%, Concurrency: 20, Ran 42s.
* Throughput: 165.27 obj/s
To see segmented request statistics, use the --analyze.v parameter.
```
warp analyze --analyze.v warp-mixed-2023-10-16[102354]-l70a.csv.zst
18642 operations loaded... Done!
Mixed operations.
----------------------------------------
Operation: DELETE - total: 1854, 10.0%, Concurrency: 20, Ran 5m0s, starting 2023-10-16 10:23:57.115 +0500 +05
* Throughput: 6.19 obj/s
Requests considered: 1855:
* Avg: 104ms, 50%: 30ms, 90%: 207ms, 99%: 1.355s, Fastest: 1ms, Slowest: 4.613s, StdDev: 320ms
----------------------------------------
Operation: GET - total: 8388, 45.3%, Size: 10485760 bytes. Concurrency: 20, Ran 5m0s, starting 2023-10-16 10:23:57.12 +0500 +05
* Throughput: 279.77 MiB/s, 27.98 obj/s
Requests considered: 8389:
* Avg: 221ms, 50%: 106ms, 90%: 492ms, 99%: 1.739s, Fastest: 8ms, Slowest: 8.633s, StdDev: 383ms
* TTFB: Avg: 81ms, Best: 2ms, 25th: 24ms, Median: 39ms, 75th: 65ms, 90th: 171ms, 99th: 669ms, Worst: 4.783s StdDev: 163ms
* First Access: Avg: 240ms, 50%: 105ms, 90%: 511ms, 99%: 2.08s, Fastest: 12ms, Slowest: 8.633s, StdDev: 480ms
* First Access TTFB: Avg: 88ms, Best: 2ms, 25th: 24ms, Median: 38ms, 75th: 64ms, 90th: 179ms, 99th: 919ms, Worst: 4.783s StdDev: 199ms
* Last Access: Avg: 219ms, 50%: 106ms, 90%: 463ms, 99%: 1.782s, Fastest: 9ms, Slowest: 8.633s, StdDev: 416ms
* Last Access TTFB: Avg: 81ms, Best: 2ms, 25th: 24ms, Median: 39ms, 75th: 65ms, 90th: 161ms, 99th: 657ms, Worst: 4.783s StdDev: 176ms
----------------------------------------
Operation: PUT - total: 2688, 14.5%, Size: 10485760 bytes. Concurrency: 20, Ran 5m0s, starting 2023-10-16 10:23:57.115 +0500 +05
* Throughput: 89.83 MiB/s, 8.98 obj/s
Requests considered: 2689:
* Avg: 1.165s, 50%: 878ms, 90%: 2.015s, 99%: 5.74s, Fastest: 99ms, Slowest: 8.264s, StdDev: 968ms
----------------------------------------
Operation: STAT - total: 5586, 30.2%, Concurrency: 20, Ran 5m0s, starting 2023-10-16 10:23:57.113 +0500 +05
* Throughput: 18.63 obj/s
Requests considered: 5587:
* Avg: 15ms, 50%: 11ms, 90%: 34ms, 99%: 80ms, Fastest: 0s, Slowest: 245ms, StdDev: 17ms
* First Access: Avg: 14ms, 50%: 10ms, 90%: 33ms, 99%: 69ms, Fastest: 0s, Slowest: 203ms, StdDev: 16ms
* Last Access: Avg: 15ms, 50%: 11ms, 90%: 34ms, 99%: 74ms, Fastest: 0s, Slowest: 203ms, StdDev: 17ms
Cluster Total: 369.64 MiB/s, 61.77 obj/s, 0 errors over 5m0s.
Total Errors:0.
Cluster Total: 3302.88 MiB/s, 550.51 obj/s over 43s.
```
[Back to TOC](#table-of-contents)

2
docker/compose/local-s3tests-compose.yml

@ -24,7 +24,7 @@ services:
- 8888:8888
- 18888:18888
- 8000:8000
command: 'filer -master="master:9333" -s3 -s3.config=/etc/seaweedfs/s3.json -s3.port=8000 -s3.allowEmptyFolder=false -s3.allowDeleteBucketNotEmpty=false'
command: 'filer -master="master:9333" -s3 -s3.config=/etc/seaweedfs/s3.json -s3.port=8000 -s3.allowDeleteBucketNotEmpty=false'
volumes:
- ./s3.json:/etc/seaweedfs/s3.json
depends_on:

2
docker/compose/test-tarantool-filer.yml

@ -15,7 +15,7 @@ services:
s3:
image: chrislusf/seaweedfs:local
command: "server -ip=127.0.0.1 -filer -master.volumeSizeLimitMB=16 -volume.max=0 -volume -volume.preStopSeconds=1 -s3 -s3.config=/etc/seaweedfs/s3.json -s3.port=8000 -s3.allowEmptyFolder=false -s3.allowDeleteBucketNotEmpty=false"
command: "server -ip=127.0.0.1 -filer -master.volumeSizeLimitMB=16 -volume.max=0 -volume -volume.preStopSeconds=1 -s3 -s3.config=/etc/seaweedfs/s3.json -s3.port=8000 -s3.allowDeleteBucketNotEmpty=false"
volumes:
- ./s3.json:/etc/seaweedfs/s3.json
environment:

2
docker/compose/test-ydb-filer.yml

@ -20,7 +20,7 @@ services:
- 8888:8888
- 8000:8000
- 18888:18888
command: "server -ip=s3 -filer -master.volumeSizeLimitMB=16 -volume.max=0 -volume -volume.preStopSeconds=1 -s3 -s3.config=/etc/seaweedfs/s3.json -s3.port=8000 -s3.allowEmptyFolder=false -s3.allowDeleteBucketNotEmpty=false"
command: "server -ip=s3 -filer -master.volumeSizeLimitMB=16 -volume.max=0 -volume -volume.preStopSeconds=1 -s3 -s3.config=/etc/seaweedfs/s3.json -s3.port=8000 -s3.allowDeleteBucketNotEmpty=false"
volumes:
- ./s3.json:/etc/seaweedfs/s3.json
environment:

116
k8s/charts/seaweedfs/templates/all-in-one/all-in-one-deployment.yaml

@ -15,9 +15,9 @@ metadata:
{{- toYaml .Values.allInOne.annotations | nindent 4 }}
{{- end }}
spec:
replicas: 1
replicas: {{ .Values.allInOne.replicas | default 1 }}
strategy:
type: Recreate
type: {{ .Values.allInOne.updateStrategy.type | default "Recreate" }}
selector:
matchLabels:
app.kubernetes.io/name: {{ template "seaweedfs.name" . }}
@ -130,12 +130,23 @@ spec:
value: {{ include "seaweedfs.cluster.masterAddress" . | quote }}
- name: {{ $clusterFilerKey }}
value: {{ include "seaweedfs.cluster.filerAddress" . | quote }}
{{- if .Values.allInOne.secretExtraEnvironmentVars }}
{{- range $key, $value := .Values.allInOne.secretExtraEnvironmentVars }}
- name: {{ $key }}
valueFrom:
{{ toYaml $value | nindent 16 }}
{{- end }}
{{- end }}
command:
- "/bin/sh"
- "-ec"
- |
/usr/bin/weed \
{{- if .Values.allInOne.loggingOverrideLevel }}
-v={{ .Values.allInOne.loggingOverrideLevel }} \
{{- else }}
-v={{ .Values.global.loggingLevel }} \
{{- end }}
server \
-dir=/data \
-master \
@ -191,6 +202,9 @@ spec:
{{- else if .Values.master.metricsPort }}
-metricsPort={{ .Values.master.metricsPort }} \
{{- end }}
{{- if .Values.allInOne.metricsIp }}
-metricsIp={{ .Values.allInOne.metricsIp }} \
{{- end }}
-filer \
-filer.port={{ .Values.filer.port }} \
{{- if .Values.filer.disableDirListing }}
@ -219,61 +233,75 @@ spec:
{{- end }}
{{- if .Values.allInOne.s3.enabled }}
-s3 \
-s3.port={{ .Values.s3.port }} \
{{- if .Values.s3.domainName }}
-s3.domainName={{ .Values.s3.domainName }} \
-s3.port={{ .Values.allInOne.s3.port | default .Values.s3.port }} \
{{- $domainName := .Values.allInOne.s3.domainName | default .Values.s3.domainName }}
{{- if $domainName }}
-s3.domainName={{ $domainName }} \
{{- end }}
{{- if .Values.global.enableSecurity }}
{{- if .Values.s3.httpsPort }}
-s3.port.https={{ .Values.s3.httpsPort }} \
{{- $httpsPort := .Values.allInOne.s3.httpsPort | default .Values.s3.httpsPort }}
{{- if $httpsPort }}
-s3.port.https={{ $httpsPort }} \
{{- end }}
-s3.cert.file=/usr/local/share/ca-certificates/client/tls.crt \
-s3.key.file=/usr/local/share/ca-certificates/client/tls.key \
{{- end }}
{{- if eq (typeOf .Values.s3.allowEmptyFolder) "bool" }}
-s3.allowEmptyFolder={{ .Values.s3.allowEmptyFolder }} \
{{- end }}
{{- if .Values.s3.enableAuth }}
{{- if or .Values.allInOne.s3.enableAuth .Values.s3.enableAuth .Values.filer.s3.enableAuth }}
-s3.config=/etc/sw/s3/seaweedfs_s3_config \
{{- end }}
{{- if .Values.s3.auditLogConfig }}
{{- $auditLogConfig := .Values.allInOne.s3.auditLogConfig | default .Values.s3.auditLogConfig }}
{{- if $auditLogConfig }}
-s3.auditLogConfig=/etc/sw/s3/s3_auditLogConfig.json \
{{- end }}
{{- end }}
{{- if .Values.allInOne.sftp.enabled }}
-sftp \
-sftp.port={{ .Values.sftp.port }} \
{{- if .Values.sftp.sshPrivateKey }}
-sftp.sshPrivateKey={{ .Values.sftp.sshPrivateKey }} \
-sftp.port={{ .Values.allInOne.sftp.port | default .Values.sftp.port }} \
{{- $sshPrivateKey := .Values.allInOne.sftp.sshPrivateKey | default .Values.sftp.sshPrivateKey }}
{{- if $sshPrivateKey }}
-sftp.sshPrivateKey={{ $sshPrivateKey }} \
{{- end }}
{{- if .Values.sftp.hostKeysFolder }}
-sftp.hostKeysFolder={{ .Values.sftp.hostKeysFolder }} \
{{- $hostKeysFolder := .Values.allInOne.sftp.hostKeysFolder | default .Values.sftp.hostKeysFolder }}
{{- if $hostKeysFolder }}
-sftp.hostKeysFolder={{ $hostKeysFolder }} \
{{- end }}
{{- if .Values.sftp.authMethods }}
-sftp.authMethods={{ .Values.sftp.authMethods }} \
{{- $authMethods := .Values.allInOne.sftp.authMethods | default .Values.sftp.authMethods }}
{{- if $authMethods }}
-sftp.authMethods={{ $authMethods }} \
{{- end }}
{{- if .Values.sftp.maxAuthTries }}
-sftp.maxAuthTries={{ .Values.sftp.maxAuthTries }} \
{{- $maxAuthTries := .Values.allInOne.sftp.maxAuthTries | default .Values.sftp.maxAuthTries }}
{{- if $maxAuthTries }}
-sftp.maxAuthTries={{ $maxAuthTries }} \
{{- end }}
{{- if .Values.sftp.bannerMessage }}
-sftp.bannerMessage="{{ .Values.sftp.bannerMessage }}" \
{{- $bannerMessage := .Values.allInOne.sftp.bannerMessage | default .Values.sftp.bannerMessage }}
{{- if $bannerMessage }}
-sftp.bannerMessage="{{ $bannerMessage }}" \
{{- end }}
{{- if .Values.sftp.loginGraceTime }}
-sftp.loginGraceTime={{ .Values.sftp.loginGraceTime }} \
{{- $loginGraceTime := .Values.allInOne.sftp.loginGraceTime | default .Values.sftp.loginGraceTime }}
{{- if $loginGraceTime }}
-sftp.loginGraceTime={{ $loginGraceTime }} \
{{- end }}
{{- if .Values.sftp.clientAliveInterval }}
-sftp.clientAliveInterval={{ .Values.sftp.clientAliveInterval }} \
{{- $clientAliveInterval := .Values.allInOne.sftp.clientAliveInterval | default .Values.sftp.clientAliveInterval }}
{{- if $clientAliveInterval }}
-sftp.clientAliveInterval={{ $clientAliveInterval }} \
{{- end }}
{{- if .Values.sftp.clientAliveCountMax }}
-sftp.clientAliveCountMax={{ .Values.sftp.clientAliveCountMax }} \
{{- $clientAliveCountMax := .Values.allInOne.sftp.clientAliveCountMax | default .Values.sftp.clientAliveCountMax }}
{{- if $clientAliveCountMax }}
-sftp.clientAliveCountMax={{ $clientAliveCountMax }} \
{{- end }}
{{- if or .Values.allInOne.sftp.enableAuth .Values.sftp.enableAuth }}
-sftp.userStoreFile=/etc/sw/sftp/seaweedfs_sftp_config \
{{- end }}
{{- end }}
{{- $extraArgsCount := len .Values.allInOne.extraArgs }}
{{- range $i, $arg := .Values.allInOne.extraArgs }}
{{ $arg | quote }}{{ if ne (add1 $i) $extraArgsCount }} \{{ end }}
{{- end }}
volumeMounts:
- name: data
mountPath: /data
{{- if and .Values.allInOne.s3.enabled (or .Values.s3.enableAuth .Values.filer.s3.enableAuth) }}
{{- if and .Values.allInOne.s3.enabled (or .Values.allInOne.s3.enableAuth .Values.s3.enableAuth .Values.filer.s3.enableAuth) }}
- name: config-s3-users
mountPath: /etc/sw/s3
readOnly: true
@ -282,10 +310,12 @@ spec:
- name: config-ssh
mountPath: /etc/sw/ssh
readOnly: true
{{- if or .Values.allInOne.sftp.enableAuth .Values.sftp.enableAuth }}
- mountPath: /etc/sw/sftp
name: config-users
readOnly: true
{{- end }}
{{- end }}
{{- if .Values.filer.notificationConfig }}
- name: notification-config
mountPath: /etc/seaweedfs/notification.toml
@ -332,15 +362,16 @@ spec:
- containerPort: {{ .Values.filer.grpcPort }}
name: swfs-fil-grpc
{{- if .Values.allInOne.s3.enabled }}
- containerPort: {{ .Values.s3.port }}
- containerPort: {{ .Values.allInOne.s3.port | default .Values.s3.port }}
name: swfs-s3
{{- if .Values.s3.httpsPort }}
- containerPort: {{ .Values.s3.httpsPort }}
{{- $httpsPort := .Values.allInOne.s3.httpsPort | default .Values.s3.httpsPort }}
{{- if $httpsPort }}
- containerPort: {{ $httpsPort }}
name: swfs-s3-tls
{{- end }}
{{- end }}
{{- if .Values.allInOne.sftp.enabled }}
- containerPort: {{ .Values.sftp.port }}
- containerPort: {{ .Values.allInOne.sftp.port | default .Values.sftp.port }}
name: swfs-sftp
{{- end }}
{{- if .Values.allInOne.metricsPort }}
@ -352,7 +383,7 @@ spec:
httpGet:
path: {{ .Values.allInOne.readinessProbe.httpGet.path }}
port: {{ .Values.master.port }}
scheme: {{ .Values.allInOne.readinessProbe.scheme }}
scheme: {{ .Values.allInOne.readinessProbe.httpGet.scheme }}
initialDelaySeconds: {{ .Values.allInOne.readinessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.allInOne.readinessProbe.periodSeconds }}
successThreshold: {{ .Values.allInOne.readinessProbe.successThreshold }}
@ -364,7 +395,7 @@ spec:
httpGet:
path: {{ .Values.allInOne.livenessProbe.httpGet.path }}
port: {{ .Values.master.port }}
scheme: {{ .Values.allInOne.livenessProbe.scheme }}
scheme: {{ .Values.allInOne.livenessProbe.httpGet.scheme }}
initialDelaySeconds: {{ .Values.allInOne.livenessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.allInOne.livenessProbe.periodSeconds }}
successThreshold: {{ .Values.allInOne.livenessProbe.successThreshold }}
@ -389,26 +420,31 @@ spec:
path: {{ .Values.allInOne.data.hostPathPrefix }}/seaweedfs-all-in-one-data/
type: DirectoryOrCreate
{{- else if eq .Values.allInOne.data.type "persistentVolumeClaim" }}
persistentVolumeClaim:
claimName: {{ template "seaweedfs.name" . }}-all-in-one-data
{{- else if eq .Values.allInOne.data.type "existingClaim" }}
persistentVolumeClaim:
claimName: {{ .Values.allInOne.data.claimName }}
{{- else if eq .Values.allInOne.data.type "emptyDir" }}
emptyDir: {}
{{- end }}
{{- if and .Values.allInOne.s3.enabled (or .Values.s3.enableAuth .Values.filer.s3.enableAuth) }}
{{- if and .Values.allInOne.s3.enabled (or .Values.allInOne.s3.enableAuth .Values.s3.enableAuth .Values.filer.s3.enableAuth) }}
- name: config-s3-users
secret:
defaultMode: 420
secretName: {{ default (printf "%s-s3-secret" (include "seaweedfs.name" .)) (or .Values.s3.existingConfigSecret .Values.filer.s3.existingConfigSecret) }}
secretName: {{ default (printf "%s-s3-secret" (include "seaweedfs.name" .)) (or .Values.allInOne.s3.existingConfigSecret .Values.s3.existingConfigSecret .Values.filer.s3.existingConfigSecret) }}
{{- end }}
{{- if .Values.allInOne.sftp.enabled }}
- name: config-ssh
secret:
defaultMode: 420
secretName: {{ default (printf "%s-sftp-ssh-secret" (include "seaweedfs.name" .)) .Values.sftp.existingSshConfigSecret }}
secretName: {{ default (printf "%s-sftp-ssh-secret" (include "seaweedfs.name" .)) (or .Values.allInOne.sftp.existingSshConfigSecret .Values.sftp.existingSshConfigSecret) }}
{{- if or .Values.allInOne.sftp.enableAuth .Values.sftp.enableAuth }}
- name: config-users
secret:
defaultMode: 420
secretName: {{ default (printf "%s-sftp-secret" (include "seaweedfs.name" .)) .Values.sftp.existingConfigSecret }}
secretName: {{ default (printf "%s-sftp-secret" (include "seaweedfs.name" .)) (or .Values.allInOne.sftp.existingConfigSecret .Values.sftp.existingConfigSecret) }}
{{- end }}
{{- end }}
{{- if .Values.filer.notificationConfig }}
- name: notification-config

23
k8s/charts/seaweedfs/templates/all-in-one/all-in-one-pvc.yaml

@ -1,21 +1,28 @@
{{- if and .Values.allInOne.enabled (eq .Values.allInOne.data.type "persistentVolumeClaim") }}
{{- if .Values.allInOne.enabled }}
{{- if eq .Values.allInOne.data.type "persistentVolumeClaim" }}
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: {{ .Values.allInOne.data.claimName }}
name: {{ template "seaweedfs.name" . }}-all-in-one-data
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: {{ template "seaweedfs.name" . }}
helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/component: seaweedfs-all-in-one
{{- if .Values.allInOne.annotations }}
{{- with .Values.allInOne.data.annotations }}
annotations:
{{- toYaml .Values.allInOne.annotations | nindent 4 }}
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: {{ .Values.allInOne.data.size }}
{{- toYaml (.Values.allInOne.data.accessModes | default (list "ReadWriteOnce")) | nindent 4 }}
{{- if .Values.allInOne.data.storageClass }}
storageClassName: {{ .Values.allInOne.data.storageClass }}
{{- end }}
resources:
requests:
storage: {{ .Values.allInOne.data.size | default "10Gi" }}
{{- end }}
{{- end }}

16
k8s/charts/seaweedfs/templates/all-in-one/all-in-one-service.yml

@ -15,6 +15,7 @@ metadata:
{{- toYaml .Values.allInOne.service.annotations | nindent 4 }}
{{- end }}
spec:
type: {{ .Values.allInOne.service.type | default "ClusterIP" }}
internalTrafficPolicy: {{ .Values.allInOne.service.internalTrafficPolicy | default "Cluster" }}
ports:
# Master ports
@ -50,13 +51,14 @@ spec:
# S3 ports (if enabled)
{{- if .Values.allInOne.s3.enabled }}
- name: "swfs-s3"
port: {{ if .Values.allInOne.s3.enabled }}{{ .Values.s3.port }}{{ else }}{{ .Values.filer.s3.port }}{{ end }}
targetPort: {{ if .Values.allInOne.s3.enabled }}{{ .Values.s3.port }}{{ else }}{{ .Values.filer.s3.port }}{{ end }}
port: {{ .Values.allInOne.s3.port | default .Values.s3.port }}
targetPort: {{ .Values.allInOne.s3.port | default .Values.s3.port }}
protocol: TCP
{{- if and .Values.allInOne.s3.enabled .Values.s3.httpsPort }}
{{- $httpsPort := .Values.allInOne.s3.httpsPort | default .Values.s3.httpsPort }}
{{- if $httpsPort }}
- name: "swfs-s3-tls"
port: {{ .Values.s3.httpsPort }}
targetPort: {{ .Values.s3.httpsPort }}
port: {{ $httpsPort }}
targetPort: {{ $httpsPort }}
protocol: TCP
{{- end }}
{{- end }}
@ -64,8 +66,8 @@ spec:
# SFTP ports (if enabled)
{{- if .Values.allInOne.sftp.enabled }}
- name: "swfs-sftp"
port: {{ .Values.sftp.port }}
targetPort: {{ .Values.sftp.port }}
port: {{ .Values.allInOne.sftp.port | default .Values.sftp.port }}
targetPort: {{ .Values.allInOne.sftp.port | default .Values.sftp.port }}
protocol: TCP
{{- end }}

13
k8s/charts/seaweedfs/templates/filer/filer-ingress.yaml

@ -1,5 +1,8 @@
{{- if .Values.filer.enabled }}
{{- if .Values.filer.ingress.enabled }}
{{- /* Filer ingress works for both normal mode (filer.enabled) and all-in-one mode (allInOne.enabled) */}}
{{- $filerEnabled := or .Values.filer.enabled .Values.allInOne.enabled }}
{{- if and $filerEnabled .Values.filer.ingress.enabled }}
{{- /* Determine service name based on deployment mode */}}
{{- $serviceName := ternary (printf "%s-all-in-one" (include "seaweedfs.name" .)) (printf "%s-filer" (include "seaweedfs.name" .)) .Values.allInOne.enabled }}
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion }}
apiVersion: networking.k8s.io/v1
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion }}
@ -33,16 +36,14 @@ spec:
backend:
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion }}
service:
name: {{ template "seaweedfs.name" . }}-filer
name: {{ $serviceName }}
port:
number: {{ .Values.filer.port }}
#name:
{{- else }}
serviceName: {{ template "seaweedfs.name" . }}-filer
serviceName: {{ $serviceName }}
servicePort: {{ .Values.filer.port }}
{{- end }}
{{- if .Values.filer.ingress.host }}
host: {{ .Values.filer.ingress.host }}
{{- end }}
{{- end }}
{{- end }}

7
k8s/charts/seaweedfs/templates/filer/filer-statefulset.yaml

@ -213,9 +213,6 @@ spec:
-s3.cert.file=/usr/local/share/ca-certificates/client/tls.crt \
-s3.key.file=/usr/local/share/ca-certificates/client/tls.key \
{{- end }}
{{- if eq (typeOf .Values.filer.s3.allowEmptyFolder) "bool" }}
-s3.allowEmptyFolder={{ .Values.filer.s3.allowEmptyFolder }} \
{{- end }}
{{- if .Values.filer.s3.enableAuth }}
-s3.config=/etc/sw/seaweedfs_s3_config \
{{- end }}
@ -289,7 +286,7 @@ spec:
httpGet:
path: {{ .Values.filer.readinessProbe.httpGet.path }}
port: {{ .Values.filer.port }}
scheme: {{ .Values.filer.readinessProbe.scheme }}
scheme: {{ .Values.filer.readinessProbe.httpGet.scheme }}
initialDelaySeconds: {{ .Values.filer.readinessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.filer.readinessProbe.periodSeconds }}
successThreshold: {{ .Values.filer.readinessProbe.successThreshold }}
@ -301,7 +298,7 @@ spec:
httpGet:
path: {{ .Values.filer.livenessProbe.httpGet.path }}
port: {{ .Values.filer.port }}
scheme: {{ .Values.filer.livenessProbe.scheme }}
scheme: {{ .Values.filer.livenessProbe.httpGet.scheme }}
initialDelaySeconds: {{ .Values.filer.livenessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.filer.livenessProbe.periodSeconds }}
successThreshold: {{ .Values.filer.livenessProbe.successThreshold }}

4
k8s/charts/seaweedfs/templates/master/master-statefulset.yaml

@ -235,7 +235,7 @@ spec:
httpGet:
path: {{ .Values.master.readinessProbe.httpGet.path }}
port: {{ .Values.master.port }}
scheme: {{ .Values.master.readinessProbe.scheme }}
scheme: {{ .Values.master.readinessProbe.httpGet.scheme }}
initialDelaySeconds: {{ .Values.master.readinessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.master.readinessProbe.periodSeconds }}
successThreshold: {{ .Values.master.readinessProbe.successThreshold }}
@ -247,7 +247,7 @@ spec:
httpGet:
path: {{ .Values.master.livenessProbe.httpGet.path }}
port: {{ .Values.master.port }}
scheme: {{ .Values.master.livenessProbe.scheme }}
scheme: {{ .Values.master.livenessProbe.httpGet.scheme }}
initialDelaySeconds: {{ .Values.master.livenessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.master.livenessProbe.periodSeconds }}
successThreshold: {{ .Values.master.livenessProbe.successThreshold }}

7
k8s/charts/seaweedfs/templates/s3/s3-deployment.yaml

@ -143,9 +143,6 @@ spec:
{{- if .Values.s3.domainName }}
-domainName={{ .Values.s3.domainName }} \
{{- end }}
{{- if eq (typeOf .Values.s3.allowEmptyFolder) "bool" }}
-allowEmptyFolder={{ .Values.s3.allowEmptyFolder }} \
{{- end }}
{{- if .Values.s3.enableAuth }}
-config=/etc/sw/seaweedfs_s3_config \
{{- end }}
@ -204,7 +201,7 @@ spec:
httpGet:
path: {{ .Values.s3.readinessProbe.httpGet.path }}
port: {{ .Values.s3.port }}
scheme: {{ .Values.s3.readinessProbe.scheme }}
scheme: {{ .Values.s3.readinessProbe.httpGet.scheme }}
initialDelaySeconds: {{ .Values.s3.readinessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.s3.readinessProbe.periodSeconds }}
successThreshold: {{ .Values.s3.readinessProbe.successThreshold }}
@ -216,7 +213,7 @@ spec:
httpGet:
path: {{ .Values.s3.livenessProbe.httpGet.path }}
port: {{ .Values.s3.port }}
scheme: {{ .Values.s3.livenessProbe.scheme }}
scheme: {{ .Values.s3.livenessProbe.httpGet.scheme }}
initialDelaySeconds: {{ .Values.s3.livenessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.s3.livenessProbe.periodSeconds }}
successThreshold: {{ .Values.s3.livenessProbe.successThreshold }}

16
k8s/charts/seaweedfs/templates/s3/s3-ingress.yaml

@ -1,4 +1,9 @@
{{- if .Values.s3.ingress.enabled }}
{{- /* S3 ingress works for standalone S3 gateway (s3.enabled), S3 on Filer (filer.s3.enabled), and all-in-one mode (allInOne.s3.enabled) */}}
{{- $s3Enabled := or .Values.s3.enabled (and .Values.filer.s3.enabled (not .Values.allInOne.enabled)) (and .Values.allInOne.enabled .Values.allInOne.s3.enabled) }}
{{- if and $s3Enabled .Values.s3.ingress.enabled }}
{{- /* Determine service name based on deployment mode */}}
{{- $serviceName := ternary (printf "%s-all-in-one" (include "seaweedfs.name" .)) (printf "%s-s3" (include "seaweedfs.name" .)) .Values.allInOne.enabled }}
{{- $s3Port := .Values.allInOne.s3.port | default .Values.s3.port }}
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion }}
apiVersion: networking.k8s.io/v1
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion }}
@ -32,13 +37,12 @@ spec:
backend:
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion }}
service:
name: {{ template "seaweedfs.name" . }}-s3
name: {{ $serviceName }}
port:
number: {{ .Values.s3.port }}
#name:
number: {{ $s3Port }}
{{- else }}
serviceName: {{ template "seaweedfs.name" . }}-s3
servicePort: {{ .Values.s3.port }}
serviceName: {{ $serviceName }}
servicePort: {{ $s3Port }}
{{- end }}
{{- if .Values.s3.ingress.host }}
host: {{ .Values.s3.ingress.host | quote }}

71
k8s/charts/seaweedfs/templates/shared/post-install-bucket-hook.yaml

@ -1,6 +1,32 @@
{{- if .Values.master.enabled }}
{{- if .Values.filer.s3.enabled }}
{{- if .Values.filer.s3.createBuckets }}
{{- /* Support bucket creation for both standalone filer.s3 and allInOne modes */}}
{{- $createBuckets := list }}
{{- $s3Enabled := false }}
{{- $enableAuth := false }}
{{- $existingConfigSecret := "" }}
{{- /* Check allInOne mode first */}}
{{- if .Values.allInOne.enabled }}
{{- if .Values.allInOne.s3.enabled }}
{{- $s3Enabled = true }}
{{- if .Values.allInOne.s3.createBuckets }}
{{- $createBuckets = .Values.allInOne.s3.createBuckets }}
{{- end }}
{{- $enableAuth = or .Values.allInOne.s3.enableAuth .Values.s3.enableAuth .Values.filer.s3.enableAuth }}
{{- $existingConfigSecret = or .Values.allInOne.s3.existingConfigSecret .Values.s3.existingConfigSecret .Values.filer.s3.existingConfigSecret }}
{{- end }}
{{- else if .Values.master.enabled }}
{{- /* Check standalone filer.s3 mode */}}
{{- if .Values.filer.s3.enabled }}
{{- $s3Enabled = true }}
{{- if .Values.filer.s3.createBuckets }}
{{- $createBuckets = .Values.filer.s3.createBuckets }}
{{- end }}
{{- $enableAuth = .Values.filer.s3.enableAuth }}
{{- $existingConfigSecret = .Values.filer.s3.existingConfigSecret }}
{{- end }}
{{- end }}
{{- if and $s3Enabled $createBuckets }}
---
apiVersion: batch/v1
kind: Job
@ -32,9 +58,9 @@ spec:
- name: WEED_CLUSTER_DEFAULT
value: "sw"
- name: WEED_CLUSTER_SW_MASTER
value: "{{ template "seaweedfs.name" . }}-master.{{ .Release.Namespace }}:{{ .Values.master.port }}"
value: {{ include "seaweedfs.cluster.masterAddress" . | quote }}
- name: WEED_CLUSTER_SW_FILER
value: "{{ template "seaweedfs.name" . }}-filer-client.{{ .Release.Namespace }}:{{ .Values.filer.port }}"
value: {{ include "seaweedfs.cluster.filerAddress" . | quote }}
- name: POD_IP
valueFrom:
fieldRef:
@ -71,24 +97,29 @@ spec:
echo "Service at $url failed to become ready within 5 minutes"
exit 1
}
{{- if .Values.allInOne.enabled }}
wait_for_service "http://$WEED_CLUSTER_SW_MASTER{{ .Values.allInOne.readinessProbe.httpGet.path }}"
wait_for_service "http://$WEED_CLUSTER_SW_FILER{{ .Values.filer.readinessProbe.httpGet.path }}"
{{- else }}
wait_for_service "http://$WEED_CLUSTER_SW_MASTER{{ .Values.master.readinessProbe.httpGet.path }}"
wait_for_service "http://$WEED_CLUSTER_SW_FILER{{ .Values.filer.readinessProbe.httpGet.path }}"
{{- range $reg, $props := $.Values.filer.s3.createBuckets }}
exec /bin/echo \
"s3.bucket.create --name {{ $props.name }}" |\
{{- end }}
{{- range $createBuckets }}
/bin/echo \
"s3.bucket.create --name {{ .name }}" |\
/usr/bin/weed shell
{{- end }}
{{- range $reg, $props := $.Values.filer.s3.createBuckets }}
{{- if $props.anonymousRead }}
exec /bin/echo \
{{- range $createBuckets }}
{{- if .anonymousRead }}
/bin/echo \
"s3.configure --user anonymous \
--buckets {{ $props.name }} \
--buckets {{ .name }} \
--actions Read \
--apply true" |\
/usr/bin/weed shell
{{- end }}
{{- end }}
{{- if .Values.filer.s3.enableAuth }}
{{- if $enableAuth }}
volumeMounts:
- name: config-users
mountPath: /etc/sw
@ -106,17 +137,15 @@ spec:
{{- if .Values.filer.containerSecurityContext.enabled }}
securityContext: {{- omit .Values.filer.containerSecurityContext "enabled" | toYaml | nindent 12 }}
{{- end }}
{{- if .Values.filer.s3.enableAuth }}
{{- if $enableAuth }}
volumes:
- name: config-users
secret:
defaultMode: 420
{{- if not (empty .Values.filer.s3.existingConfigSecret) }}
secretName: {{ .Values.filer.s3.existingConfigSecret }}
{{- if $existingConfigSecret }}
secretName: {{ $existingConfigSecret }}
{{- else }}
secretName: seaweedfs-s3-secret
secretName: {{ template "seaweedfs.name" . }}-s3-secret
{{- end }}
{{- end }}
{{- end }}{{/** if .Values.filer.s3.enableAuth **/}}
{{- end }}{{/** if .Values.master.enabled **/}}
{{- end }}{{/** if .Values.filer.s3.enabled **/}}
{{- end }}{{/** if .Values.filer.s3.createBuckets **/}}
{{- end }}

4
k8s/charts/seaweedfs/templates/volume/volume-statefulset.yaml

@ -251,7 +251,7 @@ spec:
httpGet:
path: {{ $volume.readinessProbe.httpGet.path }}
port: {{ $volume.port }}
scheme: {{ $volume.readinessProbe.scheme }}
scheme: {{ $volume.readinessProbe.httpGet.scheme }}
initialDelaySeconds: {{ $volume.readinessProbe.initialDelaySeconds }}
periodSeconds: {{ $volume.readinessProbe.periodSeconds }}
successThreshold: {{ $volume.readinessProbe.successThreshold }}
@ -263,7 +263,7 @@ spec:
httpGet:
path: {{ $volume.livenessProbe.httpGet.path }}
port: {{ $volume.port }}
scheme: {{ $volume.livenessProbe.scheme }}
scheme: {{ $volume.livenessProbe.httpGet.scheme }}
initialDelaySeconds: {{ $volume.livenessProbe.initialDelaySeconds }}
periodSeconds: {{ $volume.livenessProbe.periodSeconds }}
successThreshold: {{ $volume.livenessProbe.successThreshold }}

102
k8s/charts/seaweedfs/values.yaml

@ -856,8 +856,6 @@ filer:
port: 8333
# add additional https port
httpsPort: 0
# allow empty folders
allowEmptyFolder: false
# Suffix of the host name, {bucket}.{domainName}
domainName: ""
# enable user & permission to s3 (need to inject to all services)
@ -885,8 +883,6 @@ s3:
httpsPort: 0
metricsPort: 9327
loggingOverrideLevel: null
# allow empty folders
allowEmptyFolder: true
# enable user & permission to s3 (need to inject to all services)
enableAuth: false
# set to the name of an existing kubernetes Secret with the s3 json config file
@ -979,9 +975,9 @@ s3:
extraEnvironmentVars:
# Custom command line arguments to add to the s3 command
# Example to fix connection idle seconds:
extraArgs: ["-idleTimeout=30"]
# extraArgs: []
# Default idleTimeout is 120 seconds. Example to customize:
# extraArgs: ["-idleTimeout=300"]
extraArgs: []
# used to configure livenessProbe on s3 containers
#
@ -1097,6 +1093,7 @@ allInOne:
enabled: false
imageOverride: null
restartPolicy: Always
replicas: 1 # Number of replicas (note: multiple replicas may require shared storage)
# Core configuration
idleTimeout: 30 # Connection idle seconds
@ -1108,24 +1105,85 @@ allInOne:
metricsIp: "" # Metrics listen IP. If empty, defaults to bindAddress
loggingOverrideLevel: null # Override logging level
# Service configuration
# Custom command line arguments to add to the server command
# Example to fix IPv6 metrics connectivity issues:
# extraArgs: ["-metricsIp", "0.0.0.0"]
# Example with multiple args:
# extraArgs: ["-customFlag", "value", "-anotherFlag"]
extraArgs: []
# Update strategy configuration
# type: Recreate or RollingUpdate
# For single replica, Recreate is recommended to avoid data conflicts.
# For multiple replicas with RollingUpdate, you MUST use shared storage
# (e.g., data.type: persistentVolumeClaim with ReadWriteMany access mode)
# to avoid data loss or inconsistency between pods.
updateStrategy:
type: Recreate
# S3 gateway configuration
# Note: Most parameters below default to null, which means they inherit from
# the global s3.* settings. Set explicit values here to override for allInOne only.
s3:
enabled: false # Whether to enable S3 gateway
port: null # S3 gateway port (null inherits from s3.port)
httpsPort: null # S3 gateway HTTPS port (null inherits from s3.httpsPort)
domainName: null # Suffix of the host name (null inherits from s3.domainName)
enableAuth: false # Enable user & permission to S3
# Set to the name of an existing kubernetes Secret with the s3 json config file
# should have a secret key called seaweedfs_s3_config with an inline json config
existingConfigSecret: null
auditLogConfig: null # S3 audit log configuration (null inherits from s3.auditLogConfig)
# You may specify buckets to be created during the install process.
# Buckets may be exposed publicly by setting `anonymousRead` to `true`
# createBuckets:
# - name: bucket-a
# anonymousRead: true
# - name: bucket-b
# anonymousRead: false
# SFTP server configuration
# Note: Most parameters below default to null, which means they inherit from
# the global sftp.* settings. Set explicit values here to override for allInOne only.
sftp:
enabled: false # Whether to enable SFTP server
port: null # SFTP port (null inherits from sftp.port)
sshPrivateKey: null # Path to SSH private key (null inherits from sftp.sshPrivateKey)
hostKeysFolder: null # Path to SSH host keys folder (null inherits from sftp.hostKeysFolder)
authMethods: null # Comma-separated auth methods (null inherits from sftp.authMethods)
maxAuthTries: null # Maximum authentication attempts (null inherits from sftp.maxAuthTries)
bannerMessage: null # Banner message (null inherits from sftp.bannerMessage)
loginGraceTime: null # Login grace time (null inherits from sftp.loginGraceTime)
clientAliveInterval: null # Client keep-alive interval (null inherits from sftp.clientAliveInterval)
clientAliveCountMax: null # Maximum missed keep-alive messages (null inherits from sftp.clientAliveCountMax)
enableAuth: false # Enable SFTP authentication
# Set to the name of an existing kubernetes Secret with the sftp json config file
existingConfigSecret: null
# Set to the name of an existing kubernetes Secret with the SSH keys
existingSshConfigSecret: null
# Service settings
service:
annotations: {} # Annotations for the service
type: ClusterIP # Service type (ClusterIP, NodePort, LoadBalancer)
internalTrafficPolicy: Cluster # Internal traffic policy
# Note: For ingress in all-in-one mode, use the standard s3.ingress and
# filer.ingress settings. The templates automatically detect all-in-one mode
# and point to the correct service (seaweedfs-all-in-one instead of
# seaweedfs-s3 or seaweedfs-filer).
# Storage configuration
data:
type: "emptyDir" # Options: "hostPath", "persistentVolumeClaim", "emptyDir"
type: "emptyDir" # Options: "hostPath", "persistentVolumeClaim", "emptyDir", "existingClaim"
hostPathPrefix: /mnt/data # Path prefix for hostPath volumes
claimName: seaweedfs-data-pvc # Name of the PVC to use
size: "" # Size of the PVC
storageClass: "" # Storage class for the PVC
claimName: seaweedfs-data-pvc # Name of the PVC to use (for existingClaim type)
size: null # Size of the PVC (null defaults to 10Gi for persistentVolumeClaim type)
storageClass: null # Storage class for the PVC (null uses cluster default)
# accessModes for the PVC. Default is ["ReadWriteOnce"].
# For multi-replica deployments, use ["ReadWriteMany"] with a compatible storage class.
accessModes: []
annotations: {} # Annotations for the PVC
# Health checks
readinessProbe:
@ -1154,6 +1212,18 @@ allInOne:
# Additional resources
extraEnvironmentVars: {} # Additional environment variables
# Secret environment variables (for database credentials, etc.)
# Example:
# secretExtraEnvironmentVars:
# WEED_POSTGRES_USERNAME:
# secretKeyRef:
# name: postgres-credentials
# key: username
# WEED_POSTGRES_PASSWORD:
# secretKeyRef:
# name: postgres-credentials
# key: password
secretExtraEnvironmentVars: {}
extraVolumeMounts: "" # Additional volume mounts
extraVolumes: "" # Additional volumes
initContainers: "" # Init containers
@ -1173,7 +1243,7 @@ allInOne:
matchLabels:
app.kubernetes.io/name: {{ template "seaweedfs.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/component: master
app.kubernetes.io/component: seaweedfs-all-in-one
topologyKey: kubernetes.io/hostname
# Topology Spread Constraints Settings
@ -1181,16 +1251,16 @@ allInOne:
# for a PodSpec. By Default no constraints are set.
topologySpreadConstraints: ""
# Toleration Settings for master pods
# Toleration Settings for pods
# This should be a multi-line string matching the Toleration array
# in a PodSpec.
tolerations: ""
# nodeSelector labels for master pod assignment, formatted as a muli-line string.
# nodeSelector labels for pod assignment, formatted as a muli-line string.
# ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector
nodeSelector: ""
# Used to assign priority to master pods
# Used to assign priority to pods
# ref: https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/
priorityClassName: ""

2
test/foundationdb/docker-compose.arm64.yml

@ -147,7 +147,7 @@ services:
- "8888:8888"
- "8333:8333"
- "18888:18888"
command: "server -ip=seaweedfs -filer -master.volumeSizeLimitMB=16 -volume.max=0 -volume -volume.preStopSeconds=1 -s3 -s3.config=/etc/seaweedfs/s3.json -s3.port=8333 -s3.allowEmptyFolder=false -s3.allowDeleteBucketNotEmpty=false"
command: "server -ip=seaweedfs -filer -master.volumeSizeLimitMB=16 -volume.max=0 -volume -volume.preStopSeconds=1 -s3 -s3.config=/etc/seaweedfs/s3.json -s3.port=8333 -s3.allowDeleteBucketNotEmpty=false"
volumes:
- ./s3.json:/etc/seaweedfs/s3.json
- ./filer.toml:/etc/seaweedfs/filer.toml

2
test/foundationdb/docker-compose.yml

@ -116,7 +116,7 @@ services:
- WEED_FOUNDATIONDB_MAX_RETRY_DELAY
- WEED_MASTER_VOLUME_GROWTH_COPY_1=1
- WEED_MASTER_VOLUME_GROWTH_COPY_OTHER=1
command: "weed server -ip=seaweedfs -filer -master.volumeSizeLimitMB=16 -volume.max=0 -volume -volume.preStopSeconds=1 -s3 -s3.config=/etc/seaweedfs/s3.json -s3.port=8333 -s3.allowEmptyFolder=false -s3.allowDeleteBucketNotEmpty=false"
command: "weed server -ip=seaweedfs -filer -master.volumeSizeLimitMB=16 -volume.max=0 -volume -volume.preStopSeconds=1 -s3 -s3.config=/etc/seaweedfs/s3.json -s3.port=8333 -s3.allowDeleteBucketNotEmpty=false"
configs:
fdb.cluster:

1
test/postgres/docker-compose.yml

@ -30,7 +30,6 @@ services:
- -s3=true
- -s3.port=8333
- -webdav=false
- -s3.allowEmptyFolder=false
- -mq.broker=true
- -mq.agent=true
- -ip=seaweedfs

3
test/s3/cors/Makefile

@ -79,12 +79,11 @@ start-server: check-deps
@echo "🔍 DEBUG: Creating volume directory..."
@mkdir -p ./test-volume-data
@echo "🔍 DEBUG: Launching SeaweedFS server in background..."
@echo "🔍 DEBUG: Command: $(WEED_BINARY) server -debug -s3 -s3.port=$(S3_PORT) -s3.allowEmptyFolder=false -s3.allowDeleteBucketNotEmpty=true -s3.config=../../../docker/compose/s3.json -filer -filer.maxMB=64 -master.volumeSizeLimitMB=50 -volume.max=100 -dir=./test-volume-data -volume.preStopSeconds=1 -metricsPort=9324"
@echo "🔍 DEBUG: Command: $(WEED_BINARY) server -debug -s3 -s3.port=$(S3_PORT) -s3.allowDeleteBucketNotEmpty=true -s3.config=../../../docker/compose/s3.json -filer -filer.maxMB=64 -master.volumeSizeLimitMB=50 -volume.max=100 -dir=./test-volume-data -volume.preStopSeconds=1 -metricsPort=9324"
@$(WEED_BINARY) server \
-debug \
-s3 \
-s3.port=$(S3_PORT) \
-s3.allowEmptyFolder=false \
-s3.allowDeleteBucketNotEmpty=true \
-s3.config=../../../docker/compose/s3.json \
-filer \

3
test/s3/retention/Makefile

@ -81,12 +81,11 @@ start-server: check-deps
@echo "🔍 DEBUG: Creating volume directory..."
@mkdir -p ./test-volume-data
@echo "🔍 DEBUG: Launching SeaweedFS server in background..."
@echo "🔍 DEBUG: Command: $(WEED_BINARY) server -debug -s3 -s3.port=$(S3_PORT) -s3.allowEmptyFolder=false -s3.allowDeleteBucketNotEmpty=true -s3.config=../../../docker/compose/s3.json -filer -filer.maxMB=64 -master.volumeSizeLimitMB=50 -volume.max=100 -dir=./test-volume-data -volume.preStopSeconds=1 -metricsPort=9324"
@echo "🔍 DEBUG: Command: $(WEED_BINARY) server -debug -s3 -s3.port=$(S3_PORT) -s3.allowDeleteBucketNotEmpty=true -s3.config=../../../docker/compose/s3.json -filer -filer.maxMB=64 -master.volumeSizeLimitMB=50 -volume.max=100 -dir=./test-volume-data -volume.preStopSeconds=1 -metricsPort=9324"
@$(WEED_BINARY) server \
-debug \
-s3 \
-s3.port=$(S3_PORT) \
-s3.allowEmptyFolder=false \
-s3.allowDeleteBucketNotEmpty=true \
-s3.config=../../../docker/compose/s3.json \
-filer \

2
test/s3/retention/s3_object_lock_headers_test.go

@ -236,7 +236,7 @@ func TestObjectLockHeadersNonVersionedBucket(t *testing.T) {
bucketName := getNewBucketName()
// Create regular bucket without object lock/versioning
createBucket(t, client, bucketName)
createBucketWithoutObjectLock(t, client, bucketName)
defer deleteBucket(t, client, bucketName)
key := "test-non-versioned"

13
test/s3/retention/s3_retention_test.go

@ -69,8 +69,19 @@ func getNewBucketName() string {
return fmt.Sprintf("%s%d", defaultConfig.BucketPrefix, timestamp)
}
// createBucket creates a new bucket for testing
// createBucket creates a new bucket for testing with Object Lock enabled
// Object Lock is required for retention and legal hold functionality per AWS S3 specification
func createBucket(t *testing.T, client *s3.Client, bucketName string) {
_, err := client.CreateBucket(context.TODO(), &s3.CreateBucketInput{
Bucket: aws.String(bucketName),
ObjectLockEnabledForBucket: aws.Bool(true),
})
require.NoError(t, err)
}
// createBucketWithoutObjectLock creates a new bucket without Object Lock enabled
// Use this only for tests that specifically need to verify non-Object-Lock bucket behavior
func createBucketWithoutObjectLock(t *testing.T, client *s3.Client, bucketName string) {
_, err := client.CreateBucket(context.TODO(), &s3.CreateBucketInput{
Bucket: aws.String(bucketName),
})

3
test/s3/tagging/Makefile

@ -77,7 +77,7 @@ start-server: check-deps
@echo "🔍 DEBUG: Creating volume directory..."
@mkdir -p ./test-volume-data
@echo "🔍 DEBUG: Launching SeaweedFS server in background..."
@echo "🔍 DEBUG: Command: $(WEED_BINARY) server -filer -filer.maxMB=64 -s3 -ip.bind 0.0.0.0 -dir=./test-volume-data -master.raftHashicorp -master.electionTimeout 1s -master.volumeSizeLimitMB=100 -volume.max=100 -volume.preStopSeconds=1 -master.port=$(MASTER_PORT) -volume.port=$(VOLUME_PORT) -filer.port=$(FILER_PORT) -s3.port=$(S3_PORT) -metricsPort=9329 -s3.allowEmptyFolder=false -s3.allowDeleteBucketNotEmpty=true -s3.config=../../../docker/compose/s3.json -master.peers=none"
@echo "🔍 DEBUG: Command: $(WEED_BINARY) server -filer -filer.maxMB=64 -s3 -ip.bind 0.0.0.0 -dir=./test-volume-data -master.raftHashicorp -master.electionTimeout 1s -master.volumeSizeLimitMB=100 -volume.max=100 -volume.preStopSeconds=1 -master.port=$(MASTER_PORT) -volume.port=$(VOLUME_PORT) -filer.port=$(FILER_PORT) -s3.port=$(S3_PORT) -metricsPort=9329 -s3.allowDeleteBucketNotEmpty=true -s3.config=../../../docker/compose/s3.json -master.peers=none"
@$(WEED_BINARY) server \
-filer \
-filer.maxMB=64 \
@ -94,7 +94,6 @@ start-server: check-deps
-filer.port=$(FILER_PORT) \
-s3.port=$(S3_PORT) \
-metricsPort=9329 \
-s3.allowEmptyFolder=false \
-s3.allowDeleteBucketNotEmpty=true \
-s3.config=../../../docker/compose/s3.json \
-master.peers=none \

7
test/s3/versioning/Makefile

@ -81,12 +81,11 @@ start-server: check-deps
@echo "🔍 DEBUG: Creating volume directory..."
@mkdir -p ./test-volume-data
@echo "🔍 DEBUG: Launching SeaweedFS server in background..."
@echo "🔍 DEBUG: Command: $(WEED_BINARY) server -debug -s3 -s3.port=$(S3_PORT) -s3.allowEmptyFolder=false -s3.allowDeleteBucketNotEmpty=true -s3.config=../../../docker/compose/s3.json -filer -filer.maxMB=64 -master.volumeSizeLimitMB=50 -volume.max=100 -dir=./test-volume-data -volume.preStopSeconds=1 -metricsPort=9324"
@echo "🔍 DEBUG: Command: $(WEED_BINARY) server -debug -s3 -s3.port=$(S3_PORT) -s3.allowDeleteBucketNotEmpty=true -s3.config=../../../docker/compose/s3.json -filer -filer.maxMB=64 -master.volumeSizeLimitMB=50 -volume.max=100 -dir=./test-volume-data -volume.preStopSeconds=1 -metricsPort=9324"
@$(WEED_BINARY) server \
-debug \
-s3 \
-s3.port=$(S3_PORT) \
-s3.allowEmptyFolder=false \
-s3.allowDeleteBucketNotEmpty=true \
-s3.config=../../../docker/compose/s3.json \
-filer \
@ -222,7 +221,7 @@ test-with-server: start-server
test-versioning-with-configs: check-deps
@echo "Testing with different S3 configurations..."
@echo "Testing with empty folder allowed..."
@$(WEED_BINARY) server -s3 -s3.port=$(S3_PORT) -s3.allowEmptyFolder=true -filer -master.volumeSizeLimitMB=100 -volume.max=100 > weed-test-config1.log 2>&1 & echo $$! > weed-config1.pid
@$(WEED_BINARY) server -s3 -s3.port=$(S3_PORT) -filer -master.volumeSizeLimitMB=100 -volume.max=100 > weed-test-config1.log 2>&1 & echo $$! > weed-config1.pid
@sleep 5
@go test -v -timeout=5m -run "TestVersioningBasicWorkflow" . || true
@if [ -f weed-config1.pid ]; then kill -TERM $$(cat weed-config1.pid) 2>/dev/null || true; rm -f weed-config1.pid; fi
@ -268,7 +267,6 @@ debug-server:
-debug \
-s3 \
-s3.port=$(S3_PORT) \
-s3.allowEmptyFolder=false \
-s3.allowDeleteBucketNotEmpty=true \
-s3.config=../../../docker/compose/s3.json \
-filer \
@ -317,7 +315,6 @@ start-server-simple: check-deps
-debug \
-s3 \
-s3.port=$(S3_PORT) \
-s3.allowEmptyFolder=false \
-s3.allowDeleteBucketNotEmpty=true \
-s3.config=../../../docker/compose/s3.json \
-filer \

41
test/sftp/Makefile

@ -0,0 +1,41 @@
.PHONY: all build test test-verbose test-short test-homedir test-debug clean deps tidy
all: build test
# Build the weed binary first
build:
cd ../../weed && go build -o weed .
# Install test dependencies
deps:
go mod download
# Run all tests
test: build deps
go test -timeout 5m ./...
# Run tests with verbose output
test-verbose: build deps
go test -v -timeout 5m ./...
# Run quick tests only (skip integration tests)
test-short: deps
go test -short -v ./...
# Run specific test
test-homedir: build deps
go test -v -timeout 5m -run TestHomeDirPathTranslation ./...
# Run tests with debug output from SeaweedFS
test-debug: build deps
go test -v -timeout 5m ./... 2>&1 | tee test.log
# Clean up test artifacts
clean:
rm -f test.log
go clean -testcache
# Update go.sum
tidy:
go mod tidy

92
test/sftp/README.md

@ -0,0 +1,92 @@
# SeaweedFS SFTP Integration Tests
This directory contains integration tests for the SeaweedFS SFTP server.
## Prerequisites
1. Build the SeaweedFS binary:
```bash
cd ../../weed
go build -o weed .
```
2. Ensure `ssh-keygen` is available (for generating test SSH host keys)
## Running Tests
### Run all tests
```bash
make test
```
### Run tests with verbose output
```bash
make test-verbose
```
### Run a specific test
```bash
go test -v -run TestHomeDirPathTranslation
```
### Skip long-running tests
```bash
go test -short ./...
```
## Test Structure
- `framework.go` - Test framework that starts SeaweedFS cluster with SFTP
- `basic_test.go` - Basic SFTP operation tests including:
- HomeDir path translation (fixes issue #7470)
- File upload/download
- Directory operations
- Large file handling
- Edge cases
## Test Configuration
Tests use `testdata/userstore.json` which defines test users:
| Username | Password | HomeDir | Permissions |
|----------|----------|---------|-------------|
| admin | adminpassword | / | Full access |
| testuser | testuserpassword | /sftp/testuser | Full access to home |
| readonly | readonlypassword | /public | Read-only |
## Key Tests
### TestHomeDirPathTranslation
Tests the fix for [issue #7470](https://github.com/seaweedfs/seaweedfs/issues/7470) where
users with a non-root HomeDir (e.g., `/sftp/testuser`) could not upload files to `/`
because the path wasn't being translated to their home directory.
The test verifies:
- Uploading to `/` correctly maps to the user's HomeDir
- Creating directories at `/` works
- Listing `/` shows the user's home directory contents
- All path operations respect the HomeDir translation
## Debugging
To debug test failures:
1. Enable verbose output:
```bash
go test -v -run TestName
```
2. Keep test artifacts (don't cleanup):
```go
config := DefaultTestConfig()
config.SkipCleanup = true
```
3. Enable debug logging:
```go
config := DefaultTestConfig()
config.EnableDebug = true
```

652
test/sftp/basic_test.go

@ -0,0 +1,652 @@
package sftp
import (
"bytes"
"io"
"path"
"testing"
"github.com/stretchr/testify/require"
)
// TestHomeDirPathTranslation tests that SFTP operations correctly translate
// paths relative to the user's HomeDir.
// This is the fix for https://github.com/seaweedfs/seaweedfs/issues/7470
func TestHomeDirPathTranslation(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
config := DefaultTestConfig()
config.EnableDebug = testing.Verbose()
fw := NewSftpTestFramework(t, config)
err := fw.Setup(config)
require.NoError(t, err, "failed to setup test framework")
defer fw.Cleanup()
// Test with user "testuser" who has HomeDir="/sftp/testuser"
// When they upload to "/", it should actually go to "/sftp/testuser"
sftpClient, sshConn, err := fw.ConnectSFTP("testuser", "testuserpassword")
require.NoError(t, err, "failed to connect as testuser")
defer sshConn.Close()
defer sftpClient.Close()
// Test 1: Upload file to "/" (should map to /sftp/testuser/)
t.Run("UploadToRoot", func(t *testing.T) {
testContent := []byte("Hello from SFTP test!")
filename := "test_upload.txt"
// Create file at "/" from user's perspective
file, err := sftpClient.Create("/" + filename)
require.NoError(t, err, "should be able to create file at /")
_, err = file.Write(testContent)
require.NoError(t, err, "should be able to write to file")
err = file.Close()
require.NoError(t, err, "should be able to close file")
// Verify file exists and has correct content
readFile, err := sftpClient.Open("/" + filename)
require.NoError(t, err, "should be able to open file")
defer readFile.Close()
content, err := io.ReadAll(readFile)
require.NoError(t, err, "should be able to read file")
require.Equal(t, testContent, content, "file content should match")
// Clean up
err = sftpClient.Remove("/" + filename)
require.NoError(t, err, "should be able to remove file")
})
// Test 2: Create directory at "/" (should map to /sftp/testuser/)
t.Run("CreateDirAtRoot", func(t *testing.T) {
dirname := "test_dir"
err := sftpClient.Mkdir("/" + dirname)
require.NoError(t, err, "should be able to create directory at /")
// Verify directory exists
info, err := sftpClient.Stat("/" + dirname)
require.NoError(t, err, "should be able to stat directory")
require.True(t, info.IsDir(), "should be a directory")
// Clean up
err = sftpClient.RemoveDirectory("/" + dirname)
require.NoError(t, err, "should be able to remove directory")
})
// Test 3: List directory at "/" (should list /sftp/testuser/)
t.Run("ListRoot", func(t *testing.T) {
// Create a test file first
testContent := []byte("list test content")
filename := "list_test.txt"
file, err := sftpClient.Create("/" + filename)
require.NoError(t, err)
_, err = file.Write(testContent)
require.NoError(t, err)
file.Close()
// List root directory
files, err := sftpClient.ReadDir("/")
require.NoError(t, err, "should be able to list root directory")
// Should find our test file
found := false
for _, f := range files {
if f.Name() == filename {
found = true
break
}
}
require.True(t, found, "should find test file in listing")
// Clean up
err = sftpClient.Remove("/" + filename)
require.NoError(t, err)
})
// Test 4: Nested directory operations
t.Run("NestedOperations", func(t *testing.T) {
// Create nested directory structure
err := sftpClient.MkdirAll("/nested/dir/structure")
require.NoError(t, err, "should be able to create nested directories")
// Create file in nested directory
testContent := []byte("nested file content")
file, err := sftpClient.Create("/nested/dir/structure/file.txt")
require.NoError(t, err)
_, err = file.Write(testContent)
require.NoError(t, err)
file.Close()
// Verify file exists
readFile, err := sftpClient.Open("/nested/dir/structure/file.txt")
require.NoError(t, err)
content, err := io.ReadAll(readFile)
require.NoError(t, err)
readFile.Close()
require.Equal(t, testContent, content)
// Clean up
err = sftpClient.Remove("/nested/dir/structure/file.txt")
require.NoError(t, err)
err = sftpClient.RemoveDirectory("/nested/dir/structure")
require.NoError(t, err)
err = sftpClient.RemoveDirectory("/nested/dir")
require.NoError(t, err)
err = sftpClient.RemoveDirectory("/nested")
require.NoError(t, err)
})
// Test 5: Rename operation
t.Run("RenameFile", func(t *testing.T) {
testContent := []byte("rename test content")
file, err := sftpClient.Create("/original.txt")
require.NoError(t, err)
_, err = file.Write(testContent)
require.NoError(t, err)
file.Close()
// Rename file
err = sftpClient.Rename("/original.txt", "/renamed.txt")
require.NoError(t, err, "should be able to rename file")
// Verify old file doesn't exist
_, err = sftpClient.Stat("/original.txt")
require.Error(t, err, "original file should not exist")
// Verify new file exists with correct content
readFile, err := sftpClient.Open("/renamed.txt")
require.NoError(t, err, "renamed file should exist")
content, err := io.ReadAll(readFile)
require.NoError(t, err)
readFile.Close()
require.Equal(t, testContent, content)
// Clean up
err = sftpClient.Remove("/renamed.txt")
require.NoError(t, err)
})
}
// TestAdminRootAccess tests that admin user with HomeDir="/" can access everything
func TestAdminRootAccess(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
config := DefaultTestConfig()
config.EnableDebug = testing.Verbose()
fw := NewSftpTestFramework(t, config)
err := fw.Setup(config)
require.NoError(t, err, "failed to setup test framework")
defer fw.Cleanup()
// Connect as admin with HomeDir="/"
sftpClient, sshConn, err := fw.ConnectSFTP("admin", "adminpassword")
require.NoError(t, err, "failed to connect as admin")
defer sshConn.Close()
defer sftpClient.Close()
// Admin should be able to create directories anywhere
t.Run("CreateAnyDirectory", func(t *testing.T) {
// Create the user's home directory structure
err := sftpClient.MkdirAll("/sftp/testuser")
require.NoError(t, err, "admin should be able to create any directory")
// Create file in that directory
testContent := []byte("admin created this")
file, err := sftpClient.Create("/sftp/testuser/admin_file.txt")
require.NoError(t, err)
_, err = file.Write(testContent)
require.NoError(t, err)
file.Close()
// Verify file exists
info, err := sftpClient.Stat("/sftp/testuser/admin_file.txt")
require.NoError(t, err)
require.False(t, info.IsDir())
// Clean up
err = sftpClient.Remove("/sftp/testuser/admin_file.txt")
require.NoError(t, err)
})
}
// TestLargeFileUpload tests uploading larger files through SFTP
func TestLargeFileUpload(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
config := DefaultTestConfig()
config.EnableDebug = testing.Verbose()
fw := NewSftpTestFramework(t, config)
err := fw.Setup(config)
require.NoError(t, err, "failed to setup test framework")
defer fw.Cleanup()
sftpClient, sshConn, err := fw.ConnectSFTP("testuser", "testuserpassword")
require.NoError(t, err, "failed to connect as testuser")
defer sshConn.Close()
defer sftpClient.Close()
// Create a 1MB file
t.Run("Upload1MB", func(t *testing.T) {
size := 1024 * 1024 // 1MB
testData := bytes.Repeat([]byte("A"), size)
file, err := sftpClient.Create("/large_file.bin")
require.NoError(t, err)
n, err := file.Write(testData)
require.NoError(t, err)
require.Equal(t, size, n)
file.Close()
// Verify file size
info, err := sftpClient.Stat("/large_file.bin")
require.NoError(t, err)
require.Equal(t, int64(size), info.Size())
// Verify content
readFile, err := sftpClient.Open("/large_file.bin")
require.NoError(t, err)
content, err := io.ReadAll(readFile)
require.NoError(t, err)
readFile.Close()
require.Equal(t, testData, content)
// Clean up
err = sftpClient.Remove("/large_file.bin")
require.NoError(t, err)
})
}
// TestStatOperations tests Stat and Lstat operations
func TestStatOperations(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
config := DefaultTestConfig()
config.EnableDebug = testing.Verbose()
fw := NewSftpTestFramework(t, config)
err := fw.Setup(config)
require.NoError(t, err, "failed to setup test framework")
defer fw.Cleanup()
sftpClient, sshConn, err := fw.ConnectSFTP("testuser", "testuserpassword")
require.NoError(t, err, "failed to connect as testuser")
defer sshConn.Close()
defer sftpClient.Close()
// Create a test file
testContent := []byte("stat test content")
file, err := sftpClient.Create("/stat_test.txt")
require.NoError(t, err)
_, err = file.Write(testContent)
require.NoError(t, err)
file.Close()
t.Run("StatFile", func(t *testing.T) {
info, err := sftpClient.Stat("/stat_test.txt")
require.NoError(t, err)
require.Equal(t, "stat_test.txt", info.Name())
require.Equal(t, int64(len(testContent)), info.Size())
require.False(t, info.IsDir())
})
t.Run("StatDirectory", func(t *testing.T) {
err := sftpClient.Mkdir("/stat_dir")
require.NoError(t, err)
info, err := sftpClient.Stat("/stat_dir")
require.NoError(t, err)
require.Equal(t, "stat_dir", info.Name())
require.True(t, info.IsDir())
// Clean up
err = sftpClient.RemoveDirectory("/stat_dir")
require.NoError(t, err)
})
t.Run("StatRoot", func(t *testing.T) {
// Should be able to stat "/" which maps to user's home directory
info, err := sftpClient.Stat("/")
require.NoError(t, err, "should be able to stat root (home) directory")
require.True(t, info.IsDir(), "root should be a directory")
})
// Clean up
err = sftpClient.Remove("/stat_test.txt")
require.NoError(t, err)
}
// TestWalk tests walking directory trees
func TestWalk(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
config := DefaultTestConfig()
config.EnableDebug = testing.Verbose()
fw := NewSftpTestFramework(t, config)
err := fw.Setup(config)
require.NoError(t, err, "failed to setup test framework")
defer fw.Cleanup()
sftpClient, sshConn, err := fw.ConnectSFTP("testuser", "testuserpassword")
require.NoError(t, err, "failed to connect as testuser")
defer sshConn.Close()
defer sftpClient.Close()
// Create directory structure
err = sftpClient.MkdirAll("/walk/a/b")
require.NoError(t, err)
err = sftpClient.MkdirAll("/walk/c")
require.NoError(t, err)
// Create files
for _, p := range []string{"/walk/file1.txt", "/walk/a/file2.txt", "/walk/a/b/file3.txt", "/walk/c/file4.txt"} {
file, err := sftpClient.Create(p)
require.NoError(t, err)
file.Write([]byte("test"))
file.Close()
}
t.Run("WalkEntireTree", func(t *testing.T) {
var paths []string
walker := sftpClient.Walk("/walk")
for walker.Step() {
if walker.Err() != nil {
continue
}
paths = append(paths, walker.Path())
}
// Should find all directories and files
require.Contains(t, paths, "/walk")
require.Contains(t, paths, "/walk/a")
require.Contains(t, paths, "/walk/a/b")
require.Contains(t, paths, "/walk/c")
})
// Clean up
for _, p := range []string{"/walk/file1.txt", "/walk/a/file2.txt", "/walk/a/b/file3.txt", "/walk/c/file4.txt"} {
require.NoError(t, sftpClient.Remove(p))
}
for _, p := range []string{"/walk/a/b", "/walk/a", "/walk/c", "/walk"} {
require.NoError(t, sftpClient.RemoveDirectory(p))
}
}
// TestCurrentWorkingDirectory tests that Getwd and Chdir work correctly
func TestCurrentWorkingDirectory(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
config := DefaultTestConfig()
config.EnableDebug = testing.Verbose()
fw := NewSftpTestFramework(t, config)
err := fw.Setup(config)
require.NoError(t, err, "failed to setup test framework")
defer fw.Cleanup()
sftpClient, sshConn, err := fw.ConnectSFTP("testuser", "testuserpassword")
require.NoError(t, err, "failed to connect as testuser")
defer sshConn.Close()
defer sftpClient.Close()
// Create test directory
err = sftpClient.Mkdir("/cwd_test")
require.NoError(t, err)
t.Run("GetCurrentDir", func(t *testing.T) {
cwd, err := sftpClient.Getwd()
require.NoError(t, err)
// The initial working directory should be the user's home directory
// which from the user's perspective is "/"
require.Equal(t, "/", cwd, "initial working directory should be the virtual root")
})
t.Run("ChangeAndCreate", func(t *testing.T) {
// Create file in subdirectory using relative path after chdir
// Note: pkg/sftp doesn't support Chdir, so we test using absolute paths
file, err := sftpClient.Create("/cwd_test/relative_file.txt")
require.NoError(t, err)
file.Write([]byte("test"))
file.Close()
// Verify using absolute path
_, err = sftpClient.Stat("/cwd_test/relative_file.txt")
require.NoError(t, err)
// Clean up
sftpClient.Remove("/cwd_test/relative_file.txt")
})
// Clean up
err = sftpClient.RemoveDirectory("/cwd_test")
require.NoError(t, err)
}
// TestPathEdgeCases tests various edge cases in path handling
func TestPathEdgeCases(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
config := DefaultTestConfig()
config.EnableDebug = testing.Verbose()
fw := NewSftpTestFramework(t, config)
err := fw.Setup(config)
require.NoError(t, err, "failed to setup test framework")
defer fw.Cleanup()
sftpClient, sshConn, err := fw.ConnectSFTP("testuser", "testuserpassword")
require.NoError(t, err, "failed to connect as testuser")
defer sshConn.Close()
defer sftpClient.Close()
t.Run("PathWithDotDot", func(t *testing.T) {
// Create directory structure
err := sftpClient.MkdirAll("/edge/subdir")
require.NoError(t, err)
// Create file using path with ..
file, err := sftpClient.Create("/edge/subdir/../file.txt")
require.NoError(t, err)
file.Write([]byte("test"))
file.Close()
// Verify file was created in /edge
_, err = sftpClient.Stat("/edge/file.txt")
require.NoError(t, err, "file should be created in parent directory")
// Clean up
sftpClient.Remove("/edge/file.txt")
sftpClient.RemoveDirectory("/edge/subdir")
sftpClient.RemoveDirectory("/edge")
})
t.Run("PathWithTrailingSlash", func(t *testing.T) {
err := sftpClient.Mkdir("/trailing")
require.NoError(t, err)
// Stat with trailing slash
info, err := sftpClient.Stat("/trailing/")
require.NoError(t, err)
require.True(t, info.IsDir())
// Clean up
sftpClient.RemoveDirectory("/trailing")
})
t.Run("CreateFileAtRootPath", func(t *testing.T) {
// This is the exact scenario from issue #7470
// User with HomeDir="/sftp/testuser" uploads to "/"
file, err := sftpClient.Create("/issue7470.txt")
require.NoError(t, err, "should be able to create file at / (issue #7470)")
file.Write([]byte("This tests the fix for issue #7470"))
file.Close()
// Verify
_, err = sftpClient.Stat("/issue7470.txt")
require.NoError(t, err)
// Clean up
sftpClient.Remove("/issue7470.txt")
})
// Security test: path traversal attacks should be blocked
t.Run("PathTraversalPrevention", func(t *testing.T) {
// User's HomeDir is "/sftp/testuser"
// Attempting to escape via "../.." should NOT create files outside home directory
// First, create a valid file to ensure we can write
validFile, err := sftpClient.Create("/valid.txt")
require.NoError(t, err)
validFile.Write([]byte("valid"))
validFile.Close()
// Try various path traversal attempts
// These should either:
// 1. Be blocked (error returned), OR
// 2. Be safely resolved to stay within home directory
traversalPaths := []string{
"/../escape.txt",
"/../../escape.txt",
"/../../../escape.txt",
"/subdir/../../escape.txt",
"/./../../escape.txt",
}
for _, traversalPath := range traversalPaths {
t.Run(traversalPath, func(t *testing.T) {
// Note: The pkg/sftp client sanitizes paths locally before sending them to the server.
// So "/../escape.txt" becomes "/escape.txt" on the wire.
// Therefore, we cannot trigger the server-side path traversal block with this client.
// Instead, we verify that the file is created successfully within the jail (contained).
// The server-side protection logic is verified in unit tests (sftpd/sftp_server_test.go).
file, err := sftpClient.Create(traversalPath)
require.NoError(t, err, "creation should succeed because client sanitizes path")
file.Close()
// Clean up
err = sftpClient.Remove(traversalPath)
require.NoError(t, err)
})
}
// Clean up
sftpClient.Remove("/valid.txt")
})
}
// TestFileContent tests reading and writing file content correctly
func TestFileContent(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
config := DefaultTestConfig()
config.EnableDebug = testing.Verbose()
fw := NewSftpTestFramework(t, config)
err := fw.Setup(config)
require.NoError(t, err, "failed to setup test framework")
defer fw.Cleanup()
sftpClient, sshConn, err := fw.ConnectSFTP("testuser", "testuserpassword")
require.NoError(t, err, "failed to connect as testuser")
defer sshConn.Close()
defer sftpClient.Close()
t.Run("BinaryContent", func(t *testing.T) {
// Create binary data with all byte values
data := make([]byte, 256)
for i := 0; i < 256; i++ {
data[i] = byte(i)
}
file, err := sftpClient.Create("/binary.bin")
require.NoError(t, err)
n, err := file.Write(data)
require.NoError(t, err)
require.Equal(t, 256, n)
file.Close()
// Read back
readFile, err := sftpClient.Open("/binary.bin")
require.NoError(t, err)
content, err := io.ReadAll(readFile)
require.NoError(t, err)
readFile.Close()
require.Equal(t, data, content, "binary content should match")
// Clean up
sftpClient.Remove("/binary.bin")
})
t.Run("EmptyFile", func(t *testing.T) {
file, err := sftpClient.Create("/empty.txt")
require.NoError(t, err)
file.Close()
info, err := sftpClient.Stat("/empty.txt")
require.NoError(t, err)
require.Equal(t, int64(0), info.Size())
// Clean up
sftpClient.Remove("/empty.txt")
})
t.Run("UnicodeFilename", func(t *testing.T) {
filename := "/文件名.txt"
content := []byte("Unicode content: 你好世界")
file, err := sftpClient.Create(filename)
require.NoError(t, err)
file.Write(content)
file.Close()
// Read back
readFile, err := sftpClient.Open(filename)
require.NoError(t, err)
readContent, err := io.ReadAll(readFile)
require.NoError(t, err)
readFile.Close()
require.Equal(t, content, readContent)
// Verify in listing
files, err := sftpClient.ReadDir("/")
require.NoError(t, err)
found := false
for _, f := range files {
if f.Name() == path.Base(filename) {
found = true
break
}
}
require.True(t, found, "should find unicode filename in listing")
// Clean up
sftpClient.Remove(filename)
})
}

423
test/sftp/framework.go

@ -0,0 +1,423 @@
package sftp
import (
"fmt"
"net"
"os"
"os/exec"
"path/filepath"
"runtime"
"syscall"
"testing"
"time"
"github.com/pkg/sftp"
"github.com/stretchr/testify/require"
"golang.org/x/crypto/ssh"
)
// SftpTestFramework provides utilities for SFTP integration testing
type SftpTestFramework struct {
t *testing.T
tempDir string
dataDir string
masterProcess *os.Process
volumeProcess *os.Process
filerProcess *os.Process
sftpProcess *os.Process
masterAddr string
volumeAddr string
filerAddr string
sftpAddr string
weedBinary string
userStoreFile string
hostKeyFile string
isSetup bool
skipCleanup bool
}
// TestConfig holds configuration for SFTP tests
type TestConfig struct {
NumVolumes int
EnableDebug bool
SkipCleanup bool // for debugging failed tests
UserStoreFile string
}
// DefaultTestConfig returns a default configuration for SFTP tests
func DefaultTestConfig() *TestConfig {
return &TestConfig{
NumVolumes: 3,
EnableDebug: false,
SkipCleanup: false,
UserStoreFile: "",
}
}
// NewSftpTestFramework creates a new SFTP testing framework
func NewSftpTestFramework(t *testing.T, config *TestConfig) *SftpTestFramework {
if config == nil {
config = DefaultTestConfig()
}
tempDir, err := os.MkdirTemp("", "seaweedfs_sftp_test_")
require.NoError(t, err)
// Generate SSH host key for SFTP server
hostKeyFile := filepath.Join(tempDir, "ssh_host_key")
cmd := exec.Command("ssh-keygen", "-t", "ed25519", "-f", hostKeyFile, "-N", "")
err = cmd.Run()
require.NoError(t, err, "failed to generate SSH host key")
// Use provided userstore or copy the test one
userStoreFile := config.UserStoreFile
if userStoreFile == "" {
// Copy test userstore to temp dir
userStoreFile = filepath.Join(tempDir, "userstore.json")
testDataPath := findTestDataPath()
input, err := os.ReadFile(filepath.Join(testDataPath, "userstore.json"))
require.NoError(t, err, "failed to read test userstore.json")
err = os.WriteFile(userStoreFile, input, 0644)
require.NoError(t, err, "failed to write userstore.json")
}
return &SftpTestFramework{
t: t,
tempDir: tempDir,
dataDir: filepath.Join(tempDir, "data"),
masterAddr: "127.0.0.1:19333",
volumeAddr: "127.0.0.1:18080",
filerAddr: "127.0.0.1:18888",
sftpAddr: "127.0.0.1:12022",
weedBinary: findWeedBinary(),
userStoreFile: userStoreFile,
hostKeyFile: hostKeyFile,
isSetup: false,
}
}
// Setup starts SeaweedFS cluster with SFTP server
func (f *SftpTestFramework) Setup(config *TestConfig) error {
if f.isSetup {
return fmt.Errorf("framework already setup")
}
// Create all data directories
dirs := []string{
f.dataDir,
filepath.Join(f.dataDir, "master"),
filepath.Join(f.dataDir, "volume"),
}
for _, dir := range dirs {
if err := os.MkdirAll(dir, 0755); err != nil {
return fmt.Errorf("failed to create directory %s: %v", dir, err)
}
}
// Start master
if err := f.startMaster(config); err != nil {
return fmt.Errorf("failed to start master: %v", err)
}
// Wait for master to be ready
if err := f.waitForService(f.masterAddr, 30*time.Second); err != nil {
return fmt.Errorf("master not ready: %v", err)
}
// Start volume server
if err := f.startVolumeServer(config); err != nil {
return fmt.Errorf("failed to start volume server: %v", err)
}
// Wait for volume server to be ready
if err := f.waitForService(f.volumeAddr, 30*time.Second); err != nil {
return fmt.Errorf("volume server not ready: %v", err)
}
// Start filer
if err := f.startFiler(config); err != nil {
return fmt.Errorf("failed to start filer: %v", err)
}
// Wait for filer to be ready
if err := f.waitForService(f.filerAddr, 30*time.Second); err != nil {
return fmt.Errorf("filer not ready: %v", err)
}
// Start SFTP server
if err := f.startSftpServer(config); err != nil {
return fmt.Errorf("failed to start SFTP server: %v", err)
}
// Wait for SFTP server to be ready
if err := f.waitForService(f.sftpAddr, 30*time.Second); err != nil {
return fmt.Errorf("SFTP server not ready: %v", err)
}
// Additional wait for all services to stabilize (gRPC endpoints)
time.Sleep(500 * time.Millisecond)
f.skipCleanup = config.SkipCleanup
f.isSetup = true
return nil
}
// Cleanup stops all processes and removes temporary files
func (f *SftpTestFramework) Cleanup() {
// Stop processes in reverse order
processes := []*os.Process{f.sftpProcess, f.filerProcess, f.volumeProcess, f.masterProcess}
for _, proc := range processes {
if proc != nil {
proc.Signal(syscall.SIGTERM)
proc.Wait()
}
}
// Remove temp directory
if !f.skipCleanup {
os.RemoveAll(f.tempDir)
}
}
// GetSftpAddr returns the SFTP server address
func (f *SftpTestFramework) GetSftpAddr() string {
return f.sftpAddr
}
// GetFilerAddr returns the filer address
func (f *SftpTestFramework) GetFilerAddr() string {
return f.filerAddr
}
// ConnectSFTP creates an SFTP client connection with the given credentials
func (f *SftpTestFramework) ConnectSFTP(username, password string) (*sftp.Client, *ssh.Client, error) {
// Load the known host public key for verification
hostKeyCallback, err := f.getHostKeyCallback()
if err != nil {
return nil, nil, fmt.Errorf("failed to get host key callback: %v", err)
}
config := &ssh.ClientConfig{
User: username,
Auth: []ssh.AuthMethod{
ssh.Password(password),
},
HostKeyCallback: hostKeyCallback,
Timeout: 5 * time.Second,
}
sshConn, err := ssh.Dial("tcp", f.sftpAddr, config)
if err != nil {
return nil, nil, fmt.Errorf("failed to connect SSH: %v", err)
}
sftpClient, err := sftp.NewClient(sshConn)
if err != nil {
sshConn.Close()
return nil, nil, fmt.Errorf("failed to create SFTP client: %v", err)
}
return sftpClient, sshConn, nil
}
// getHostKeyCallback returns a callback that verifies the server's host key
// matches the known test server key we generated
func (f *SftpTestFramework) getHostKeyCallback() (ssh.HostKeyCallback, error) {
// Read the public key file generated alongside the private key
pubKeyFile := f.hostKeyFile + ".pub"
pubKeyBytes, err := os.ReadFile(pubKeyFile)
if err != nil {
return nil, fmt.Errorf("failed to read host public key: %v", err)
}
// Parse the public key
pubKey, _, _, _, err := ssh.ParseAuthorizedKey(pubKeyBytes)
if err != nil {
return nil, fmt.Errorf("failed to parse host public key: %v", err)
}
// Return a callback that verifies the server key matches our known key
return ssh.FixedHostKey(pubKey), nil
}
// startMaster starts the SeaweedFS master server
func (f *SftpTestFramework) startMaster(config *TestConfig) error {
args := []string{
"master",
"-ip=127.0.0.1",
"-port=19333",
"-mdir=" + filepath.Join(f.dataDir, "master"),
"-raftBootstrap",
"-peers=none",
}
cmd := exec.Command(f.weedBinary, args...)
cmd.Dir = f.tempDir
if config.EnableDebug {
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
}
if err := cmd.Start(); err != nil {
return err
}
f.masterProcess = cmd.Process
return nil
}
// startVolumeServer starts SeaweedFS volume server
func (f *SftpTestFramework) startVolumeServer(config *TestConfig) error {
args := []string{
"volume",
"-mserver=" + f.masterAddr,
"-ip=127.0.0.1",
"-port=18080",
"-dir=" + filepath.Join(f.dataDir, "volume"),
fmt.Sprintf("-max=%d", config.NumVolumes),
}
cmd := exec.Command(f.weedBinary, args...)
cmd.Dir = f.tempDir
if config.EnableDebug {
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
}
if err := cmd.Start(); err != nil {
return err
}
f.volumeProcess = cmd.Process
return nil
}
// startFiler starts the SeaweedFS filer server
func (f *SftpTestFramework) startFiler(config *TestConfig) error {
args := []string{
"filer",
"-master=" + f.masterAddr,
"-ip=127.0.0.1",
"-port=18888",
}
cmd := exec.Command(f.weedBinary, args...)
cmd.Dir = f.tempDir
if config.EnableDebug {
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
}
if err := cmd.Start(); err != nil {
return err
}
f.filerProcess = cmd.Process
return nil
}
// startSftpServer starts the SeaweedFS SFTP server
func (f *SftpTestFramework) startSftpServer(config *TestConfig) error {
args := []string{
"sftp",
"-filer=" + f.filerAddr,
"-ip.bind=127.0.0.1",
"-port=12022",
"-sshPrivateKey=" + f.hostKeyFile,
"-userStoreFile=" + f.userStoreFile,
}
cmd := exec.Command(f.weedBinary, args...)
cmd.Dir = f.tempDir
if config.EnableDebug {
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
}
if err := cmd.Start(); err != nil {
return err
}
f.sftpProcess = cmd.Process
return nil
}
// waitForService waits for a service to be available
func (f *SftpTestFramework) waitForService(addr string, timeout time.Duration) error {
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
conn, err := net.DialTimeout("tcp", addr, 1*time.Second)
if err == nil {
conn.Close()
return nil
}
time.Sleep(100 * time.Millisecond)
}
return fmt.Errorf("service at %s not ready within timeout", addr)
}
// findWeedBinary locates the weed binary
// Prefers local build over system-installed weed to ensure we test the latest code
func findWeedBinary() string {
// Get the directory where this source file is located
// This ensures we find the locally built weed binary first
_, thisFile, _, ok := runtime.Caller(0)
if ok {
thisDir := filepath.Dir(thisFile)
// From test/sftp/, the weed binary should be at ../../weed/weed
candidates := []string{
filepath.Join(thisDir, "../../weed/weed"),
filepath.Join(thisDir, "../weed/weed"),
}
for _, candidate := range candidates {
if _, err := os.Stat(candidate); err == nil {
abs, _ := filepath.Abs(candidate)
return abs
}
}
}
// Try relative paths from current working directory
cwd, _ := os.Getwd()
candidates := []string{
filepath.Join(cwd, "../../weed/weed"),
filepath.Join(cwd, "../weed/weed"),
filepath.Join(cwd, "./weed"),
}
for _, candidate := range candidates {
if _, err := os.Stat(candidate); err == nil {
abs, _ := filepath.Abs(candidate)
return abs
}
}
// Fallback to PATH only if local build not found
if path, err := exec.LookPath("weed"); err == nil {
return path
}
// Default fallback
return "weed"
}
// findTestDataPath locates the testdata directory
func findTestDataPath() string {
// Get the directory where this source file is located
_, thisFile, _, ok := runtime.Caller(0)
if ok {
thisDir := filepath.Dir(thisFile)
testDataPath := filepath.Join(thisDir, "testdata")
if _, err := os.Stat(testDataPath); err == nil {
return testDataPath
}
}
// Try relative paths from current working directory
cwd, _ := os.Getwd()
candidates := []string{
filepath.Join(cwd, "testdata"),
filepath.Join(cwd, "../sftp/testdata"),
filepath.Join(cwd, "test/sftp/testdata"),
}
for _, candidate := range candidates {
if _, err := os.Stat(candidate); err == nil {
return candidate
}
}
return "./testdata"
}

17
test/sftp/go.mod

@ -0,0 +1,17 @@
module seaweedfs-sftp-tests
go 1.24.0
require (
github.com/pkg/sftp v1.13.7
github.com/stretchr/testify v1.10.0
golang.org/x/crypto v0.45.0
)
require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/kr/fs v0.1.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
golang.org/x/sys v0.38.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

64
test/sftp/go.sum

@ -0,0 +1,64 @@
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/kr/fs v0.1.0 h1:Jskdu9ieNAYnjxsi0LbQp1ulIKZV1LAFgK1tWhpZgl8=
github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg=
github.com/pkg/sftp v1.13.7 h1:uv+I3nNJvlKZIQGSr8JVQLNHFU9YhhNpvC14Y6KgmSM=
github.com/pkg/sftp v1.13.7/go.mod h1:KMKI0t3T6hfA+lTR/ssZdunHo+uwq7ghoN09/FSu3DY=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.17.0/go.mod h1:gCAAfMLgwOJRpTjQ2zCCt2OcSfYMTeZVSRtQlPC7Nq4=
golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q=
golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
golang.org/x/term v0.15.0/go.mod h1:BDl952bC7+uMoWR75FIrCDx79TPU9oHkTZ9yRbYOrX0=
golang.org/x/term v0.37.0 h1:8EGAD0qCmHYZg6J17DvsMy9/wJ7/D/4pV/wfnld5lTU=
golang.org/x/term v0.37.0/go.mod h1:5pB4lxRNYYVZuTLmy8oR2BH8dflOR+IbTYFD8fi3254=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

37
test/sftp/testdata/userstore.json

@ -0,0 +1,37 @@
[
{
"Username": "admin",
"Password": "adminpassword",
"PublicKeys": [],
"HomeDir": "/",
"Permissions": {
"/": ["*"]
},
"Uid": 0,
"Gid": 0
},
{
"Username": "testuser",
"Password": "testuserpassword",
"PublicKeys": [],
"HomeDir": "/sftp/testuser",
"Permissions": {
"/sftp/testuser": ["*"]
},
"Uid": 1001,
"Gid": 1001
},
{
"Username": "readonly",
"Password": "readonlypassword",
"PublicKeys": [],
"HomeDir": "/public",
"Permissions": {
"/public": ["read", "list"]
},
"Uid": 1002,
"Gid": 1002
}
]

22
weed/admin/dash/admin_server.go

@ -99,28 +99,22 @@ func NewAdminServer(masters string, templateFS http.FileSystem, dataDir string)
// Continue without credential manager - will fall back to legacy approach
} else {
server.credentialManager = credentialManager
glog.V(0).Infof("Credential manager initialized with store type: %s", credentialManager.GetStore().GetName())
// For stores that need filer address function, set them
// For stores that need filer address function, configure them
if store := credentialManager.GetStore(); store != nil {
if filerFuncSetter, ok := store.(interface {
SetFilerAddressFunc(func() pb.ServerAddress, grpc.DialOption)
}); ok {
// Set up a goroutine to configure filer address function once we discover filers
go func() {
for {
filerAddr := server.GetFilerAddress()
if filerAddr != "" {
// Configure the function to dynamically return the current active filer (HA-aware)
// Configure the filer address function to dynamically return the current active filer
// This function will be called each time credentials need to be loaded/saved,
// so it will automatically use whatever filer is currently available (HA-aware)
filerFuncSetter.SetFilerAddressFunc(func() pb.ServerAddress {
return pb.ServerAddress(server.GetFilerAddress())
}, server.grpcDialOption)
glog.V(1).Infof("Set filer address function for credential manager: %s", filerAddr)
break
}
glog.V(1).Infof("Waiting for filer discovery for credential manager...")
time.Sleep(5 * time.Second)
}
}()
glog.V(0).Infof("Credential store configured with dynamic filer address function")
} else {
glog.V(0).Infof("Credential store %s does not support filer address function", store.GetName())
}
}
}

240
weed/admin/handlers/file_browser_handlers.go

@ -5,10 +5,12 @@ import (
"context"
"fmt"
"io"
"mime"
"mime/multipart"
"net"
"net/http"
"os"
"path"
"path/filepath"
"strconv"
"strings"
@ -20,15 +22,37 @@ import (
"github.com/seaweedfs/seaweedfs/weed/admin/view/layout"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
"github.com/seaweedfs/seaweedfs/weed/util/http/client"
)
type FileBrowserHandlers struct {
adminServer *dash.AdminServer
httpClient *client.HTTPClient
}
func NewFileBrowserHandlers(adminServer *dash.AdminServer) *FileBrowserHandlers {
// Create HTTP client with TLS support from https.client configuration
// The client is created without a timeout - each operation will set its own timeout
// If TLS is enabled but misconfigured, fail fast to alert the operator immediately
// rather than silently falling back to HTTP and causing confusing runtime errors
httpClient, err := client.NewHttpClient(client.Client)
if err != nil {
glog.Fatalf("Failed to create HTTPS client for file browser: %v", err)
}
return &FileBrowserHandlers{
adminServer: adminServer,
httpClient: httpClient,
}
}
// newClientWithTimeout creates a temporary http.Client with the specified timeout,
// reusing the TLS transport from the shared httpClient.
func (h *FileBrowserHandlers) newClientWithTimeout(timeout time.Duration) http.Client {
return http.Client{
Transport: h.httpClient.Client.Transport,
Timeout: timeout,
}
}
@ -245,8 +269,12 @@ func (h *FileBrowserHandlers) UploadFile(c *gin.Context) {
continue
}
// Create full path for the file
fullPath := filepath.Join(currentPath, fileName)
// Normalize Windows-style backslashes to forward slashes
fileName = util.CleanWindowsPath(fileName)
// Create full path for the file using path.Join for URL path semantics
// path.Join handles double slashes and is not OS-specific like filepath.Join
fullPath := path.Join(currentPath, fileName)
if !strings.HasPrefix(fullPath, "/") {
fullPath = "/" + fullPath
}
@ -327,8 +355,10 @@ func (h *FileBrowserHandlers) uploadFileToFiler(filePath string, fileHeader *mul
var body bytes.Buffer
writer := multipart.NewWriter(&body)
// Create form file field
part, err := writer.CreateFormFile("file", fileHeader.Filename)
// Create form file field with normalized base filename
// Use path.Base (not filepath.Base) since cleanFilePath uses URL path semantics
baseFileName := path.Base(cleanFilePath)
part, err := writer.CreateFormFile("file", baseFileName)
if err != nil {
return fmt.Errorf("failed to create form file: %w", err)
}
@ -345,8 +375,15 @@ func (h *FileBrowserHandlers) uploadFileToFiler(filePath string, fileHeader *mul
return fmt.Errorf("failed to close multipart writer: %w", err)
}
// Create the upload URL with validated components
uploadURL := fmt.Sprintf("http://%s%s", filerAddress, cleanFilePath)
// Create the upload URL - the httpClient will normalize to the correct scheme (http/https)
// based on the https.client configuration in security.toml
uploadURL := fmt.Sprintf("%s%s", filerAddress, cleanFilePath)
// Normalize the URL scheme based on TLS configuration
uploadURL, err = h.httpClient.NormalizeHttpScheme(uploadURL)
if err != nil {
return fmt.Errorf("failed to normalize URL scheme: %w", err)
}
// Create HTTP request
req, err := http.NewRequest("POST", uploadURL, &body)
@ -357,11 +394,11 @@ func (h *FileBrowserHandlers) uploadFileToFiler(filePath string, fileHeader *mul
// Set content type with boundary
req.Header.Set("Content-Type", writer.FormDataContentType())
// Send request
client := &http.Client{Timeout: 60 * time.Second} // Increased timeout for larger files
// Send request using TLS-aware HTTP client with 60s timeout for large file uploads
// lgtm[go/ssrf]
// Safe: filerAddress validated by validateFilerAddress() to match configured filer
// Safe: cleanFilePath validated and cleaned by validateAndCleanFilePath() to prevent path traversal
client := h.newClientWithTimeout(60 * time.Second)
resp, err := client.Do(req)
if err != nil {
return fmt.Errorf("failed to upload file: %w", err)
@ -423,8 +460,12 @@ func (h *FileBrowserHandlers) validateAndCleanFilePath(filePath string) (string,
return "", fmt.Errorf("file path cannot be empty")
}
// Normalize Windows-style backslashes to forward slashes
filePath = util.CleanWindowsPath(filePath)
// Clean the path to remove any .. or . components
cleanPath := filepath.Clean(filePath)
// Use path.Clean (not filepath.Clean) since this is a URL path
cleanPath := path.Clean(filePath)
// Ensure the path starts with /
if !strings.HasPrefix(cleanPath, "/") {
@ -444,7 +485,57 @@ func (h *FileBrowserHandlers) validateAndCleanFilePath(filePath string) (string,
return cleanPath, nil
}
// DownloadFile handles file download requests
// fetchFileContent fetches file content from the filer and returns the content or an error.
func (h *FileBrowserHandlers) fetchFileContent(filePath string, timeout time.Duration) (string, error) {
filerAddress := h.adminServer.GetFilerAddress()
if filerAddress == "" {
return "", fmt.Errorf("filer address not configured")
}
if err := h.validateFilerAddress(filerAddress); err != nil {
return "", fmt.Errorf("invalid filer address configuration: %w", err)
}
cleanFilePath, err := h.validateAndCleanFilePath(filePath)
if err != nil {
return "", err
}
// Create the file URL with proper scheme based on TLS configuration
fileURL := fmt.Sprintf("%s%s", filerAddress, cleanFilePath)
fileURL, err = h.httpClient.NormalizeHttpScheme(fileURL)
if err != nil {
return "", fmt.Errorf("failed to construct file URL: %w", err)
}
// lgtm[go/ssrf]
// Safe: filerAddress validated by validateFilerAddress() to match configured filer
// Safe: cleanFilePath validated and cleaned by validateAndCleanFilePath() to prevent path traversal
client := h.newClientWithTimeout(timeout)
resp, err := client.Get(fileURL)
if err != nil {
return "", fmt.Errorf("failed to fetch file from filer: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("filer returned status %d but failed to read response body: %w", resp.StatusCode, err)
}
return "", fmt.Errorf("filer returned status %d: %s", resp.StatusCode, string(body))
}
contentBytes, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("failed to read file content: %w", err)
}
return string(contentBytes), nil
}
// DownloadFile handles file download requests by proxying through the Admin UI server
// This ensures mTLS works correctly since the Admin UI server has the client certificates
func (h *FileBrowserHandlers) DownloadFile(c *gin.Context) {
filePath := c.Query("path")
if filePath == "" {
@ -459,6 +550,12 @@ func (h *FileBrowserHandlers) DownloadFile(c *gin.Context) {
return
}
// Validate filer address to prevent SSRF
if err := h.validateFilerAddress(filerAddress); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Invalid filer address configuration"})
return
}
// Validate and sanitize the file path
cleanFilePath, err := h.validateAndCleanFilePath(filePath)
if err != nil {
@ -466,16 +563,66 @@ func (h *FileBrowserHandlers) DownloadFile(c *gin.Context) {
return
}
// Create the download URL
downloadURL := fmt.Sprintf("http://%s%s", filerAddress, cleanFilePath)
// Create the download URL with proper scheme based on TLS configuration
downloadURL := fmt.Sprintf("%s%s", filerAddress, cleanFilePath)
downloadURL, err = h.httpClient.NormalizeHttpScheme(downloadURL)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to construct download URL: " + err.Error()})
return
}
// Proxy the download through the Admin UI server to support mTLS
// lgtm[go/ssrf]
// Safe: filerAddress validated by validateFilerAddress() to match configured filer
// Safe: cleanFilePath validated and cleaned by validateAndCleanFilePath() to prevent path traversal
// Use request context so download is cancelled when client disconnects
req, err := http.NewRequestWithContext(c.Request.Context(), "GET", downloadURL, nil)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to create request: " + err.Error()})
return
}
client := h.newClientWithTimeout(5 * time.Minute) // Longer timeout for large file downloads
resp, err := client.Do(req)
if err != nil {
c.JSON(http.StatusBadGateway, gin.H{"error": "Failed to fetch file from filer: " + err.Error()})
return
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, err := io.ReadAll(resp.Body)
if err != nil {
c.JSON(resp.StatusCode, gin.H{"error": fmt.Sprintf("Filer returned status %d but failed to read response body: %v", resp.StatusCode, err)})
return
}
c.JSON(resp.StatusCode, gin.H{"error": fmt.Sprintf("Filer returned status %d: %s", resp.StatusCode, string(body))})
return
}
// Set headers for file download
fileName := filepath.Base(cleanFilePath)
c.Header("Content-Disposition", fmt.Sprintf("attachment; filename=\"%s\"", fileName))
c.Header("Content-Type", "application/octet-stream")
// Use mime.FormatMediaType for RFC 6266 compliant Content-Disposition,
// properly handling non-ASCII characters and special characters
c.Header("Content-Disposition", mime.FormatMediaType("attachment", map[string]string{"filename": fileName}))
// Proxy the request to filer
c.Redirect(http.StatusFound, downloadURL)
// Use content type from filer response, or default to octet-stream
contentType := resp.Header.Get("Content-Type")
if contentType == "" {
contentType = "application/octet-stream"
}
c.Header("Content-Type", contentType)
// Set content length if available
if resp.ContentLength > 0 {
c.Header("Content-Length", fmt.Sprintf("%d", resp.ContentLength))
}
// Stream the response body to the client
c.Status(http.StatusOK)
_, err = io.Copy(c.Writer, resp.Body)
if err != nil {
glog.Errorf("Error streaming file download: %v", err)
}
}
// ViewFile handles file viewing requests (for text files, images, etc.)
@ -559,46 +706,13 @@ func (h *FileBrowserHandlers) ViewFile(c *gin.Context) {
viewable = false
reason = "File too large for viewing (>1MB)"
} else {
// Get file content from filer
filerAddress := h.adminServer.GetFilerAddress()
if filerAddress != "" {
// Validate filer address to prevent SSRF
if err := h.validateFilerAddress(filerAddress); err != nil {
viewable = false
reason = "Invalid filer address configuration"
} else {
cleanFilePath, err := h.validateAndCleanFilePath(filePath)
if err == nil {
fileURL := fmt.Sprintf("http://%s%s", filerAddress, cleanFilePath)
client := &http.Client{Timeout: 30 * time.Second}
// lgtm[go/ssrf]
// Safe: filerAddress validated by validateFilerAddress() to match configured filer
// Safe: cleanFilePath validated and cleaned by validateAndCleanFilePath() to prevent path traversal
resp, err := client.Get(fileURL)
if err == nil && resp.StatusCode == http.StatusOK {
defer resp.Body.Close()
contentBytes, err := io.ReadAll(resp.Body)
if err == nil {
content = string(contentBytes)
viewable = true
} else {
viewable = false
reason = "Failed to read file content"
}
} else {
viewable = false
reason = "Failed to fetch file from filer"
}
} else {
viewable = false
reason = "Invalid file path"
}
}
} else {
viewable = false
reason = "Filer address not configured"
// Fetch file content from filer
var err error
content, err = h.fetchFileContent(filePath, 30*time.Second)
if err != nil {
reason = err.Error()
}
viewable = (err == nil)
}
} else {
// Not a text file, but might be viewable as image or PDF
@ -893,18 +1007,28 @@ func (h *FileBrowserHandlers) isLikelyTextFile(filePath string, maxCheckSize int
return false
}
fileURL := fmt.Sprintf("http://%s%s", filerAddress, cleanFilePath)
// Create the file URL with proper scheme based on TLS configuration
fileURL := fmt.Sprintf("%s%s", filerAddress, cleanFilePath)
fileURL, err = h.httpClient.NormalizeHttpScheme(fileURL)
if err != nil {
glog.Errorf("Failed to normalize URL scheme: %v", err)
return false
}
client := &http.Client{Timeout: 10 * time.Second}
// lgtm[go/ssrf]
// Safe: filerAddress validated by validateFilerAddress() to match configured filer
// Safe: cleanFilePath validated and cleaned by validateAndCleanFilePath() to prevent path traversal
client := h.newClientWithTimeout(10 * time.Second)
resp, err := client.Get(fileURL)
if err != nil || resp.StatusCode != http.StatusOK {
if err != nil {
return false
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return false
}
// Read first few bytes to check if it's text
buffer := make([]byte, min(maxCheckSize, 512))
n, err := resp.Body.Read(buffer)

4
weed/command/filer.go

@ -122,13 +122,13 @@ func init() {
filerS3Options.tlsCertificate = cmdFiler.Flag.String("s3.cert.file", "", "path to the TLS certificate file")
filerS3Options.config = cmdFiler.Flag.String("s3.config", "", "path to the config file")
filerS3Options.auditLogConfig = cmdFiler.Flag.String("s3.auditLogConfig", "", "path to the audit log config file")
filerS3Options.allowEmptyFolder = cmdFiler.Flag.Bool("s3.allowEmptyFolder", true, "allow empty folders")
cmdFiler.Flag.Bool("s3.allowEmptyFolder", true, "deprecated, ignored. Empty folder cleanup is now automatic.")
filerS3Options.allowDeleteBucketNotEmpty = cmdFiler.Flag.Bool("s3.allowDeleteBucketNotEmpty", true, "allow recursive deleting all entries along with bucket")
filerS3Options.localSocket = cmdFiler.Flag.String("s3.localSocket", "", "default to /tmp/seaweedfs-s3-<port>.sock")
filerS3Options.tlsCACertificate = cmdFiler.Flag.String("s3.cacert.file", "", "path to the TLS CA certificate file")
filerS3Options.tlsVerifyClientCert = cmdFiler.Flag.Bool("s3.tlsVerifyClientCert", false, "whether to verify the client's certificate")
filerS3Options.bindIp = cmdFiler.Flag.String("s3.ip.bind", "", "ip address to bind to. If empty, default to same as -ip.bind option.")
filerS3Options.idleTimeout = cmdFiler.Flag.Int("s3.idleTimeout", 10, "connection idle seconds")
filerS3Options.idleTimeout = cmdFiler.Flag.Int("s3.idleTimeout", 120, "connection idle seconds")
filerS3Options.concurrentUploadLimitMB = cmdFiler.Flag.Int("s3.concurrentUploadLimitMB", 128, "limit total concurrent upload size for S3")
filerS3Options.concurrentFileUploadLimit = cmdFiler.Flag.Int("s3.concurrentFileUploadLimit", 0, "limit number of concurrent file uploads for S3, 0 means unlimited")

6
weed/command/s3.go

@ -49,7 +49,6 @@ type S3Options struct {
tlsVerifyClientCert *bool
metricsHttpPort *int
metricsHttpIp *string
allowEmptyFolder *bool
allowDeleteBucketNotEmpty *bool
auditLogConfig *string
localFilerSocket *string
@ -80,11 +79,11 @@ func init() {
s3StandaloneOptions.tlsVerifyClientCert = cmdS3.Flag.Bool("tlsVerifyClientCert", false, "whether to verify the client's certificate")
s3StandaloneOptions.metricsHttpPort = cmdS3.Flag.Int("metricsPort", 0, "Prometheus metrics listen port")
s3StandaloneOptions.metricsHttpIp = cmdS3.Flag.String("metricsIp", "", "metrics listen ip. If empty, default to same as -ip.bind option.")
s3StandaloneOptions.allowEmptyFolder = cmdS3.Flag.Bool("allowEmptyFolder", true, "allow empty folders")
cmdS3.Flag.Bool("allowEmptyFolder", true, "deprecated, ignored. Empty folder cleanup is now automatic.")
s3StandaloneOptions.allowDeleteBucketNotEmpty = cmdS3.Flag.Bool("allowDeleteBucketNotEmpty", true, "allow recursive deleting all entries along with bucket")
s3StandaloneOptions.localFilerSocket = cmdS3.Flag.String("localFilerSocket", "", "local filer socket path")
s3StandaloneOptions.localSocket = cmdS3.Flag.String("localSocket", "", "default to /tmp/seaweedfs-s3-<port>.sock")
s3StandaloneOptions.idleTimeout = cmdS3.Flag.Int("idleTimeout", 10, "connection idle seconds")
s3StandaloneOptions.idleTimeout = cmdS3.Flag.Int("idleTimeout", 120, "connection idle seconds")
s3StandaloneOptions.concurrentUploadLimitMB = cmdS3.Flag.Int("concurrentUploadLimitMB", 128, "limit total concurrent upload size")
s3StandaloneOptions.concurrentFileUploadLimit = cmdS3.Flag.Int("concurrentFileUploadLimit", 0, "limit number of concurrent file uploads, 0 means unlimited")
}
@ -273,7 +272,6 @@ func (s3opt *S3Options) startS3Server() bool {
AllowedOrigins: strings.Split(*s3opt.allowedOrigins, ","),
BucketsPath: filerBucketsPath,
GrpcDialOption: grpcDialOption,
AllowEmptyFolder: *s3opt.allowEmptyFolder,
AllowDeleteBucketNotEmpty: *s3opt.allowDeleteBucketNotEmpty,
LocalFilerSocket: localFilerSocket,
DataCenter: *s3opt.dataCenter,

6
weed/command/server.go

@ -133,11 +133,13 @@ func init() {
serverOptions.v.port = cmdServer.Flag.Int("volume.port", 8080, "volume server http listen port")
serverOptions.v.portGrpc = cmdServer.Flag.Int("volume.port.grpc", 0, "volume server grpc listen port")
serverOptions.v.publicPort = cmdServer.Flag.Int("volume.port.public", 0, "volume server public port")
serverOptions.v.id = cmdServer.Flag.String("volume.id", "", "volume server id. If empty, default to ip:port")
serverOptions.v.indexType = cmdServer.Flag.String("volume.index", "memory", "Choose [memory|leveldb|leveldbMedium|leveldbLarge] mode for memory~performance balance.")
serverOptions.v.diskType = cmdServer.Flag.String("volume.disk", "", "[hdd|ssd|<tag>] hard drive or solid state drive or any tag")
serverOptions.v.fixJpgOrientation = cmdServer.Flag.Bool("volume.images.fix.orientation", false, "Adjust jpg orientation when uploading.")
serverOptions.v.readMode = cmdServer.Flag.String("volume.readMode", "proxy", "[local|proxy|redirect] how to deal with non-local volume: 'not found|read in remote node|redirect volume location'.")
serverOptions.v.compactionMBPerSecond = cmdServer.Flag.Int("volume.compactionMBps", 0, "limit compaction speed in mega bytes per second")
serverOptions.v.maintenanceMBPerSecond = cmdServer.Flag.Int("volume.maintenanceMBps", 0, "limit maintenance (replication / balance) IO rate in MB/s. Unset is 0, no limitation.")
serverOptions.v.fileSizeLimitMB = cmdServer.Flag.Int("volume.fileSizeLimitMB", 256, "limit file size to avoid out of memory")
serverOptions.v.ldbTimeout = cmdServer.Flag.Int64("volume.index.leveldbTimeout", 0, "alive time for leveldb (default to 0). If leveldb of volume is not accessed in ldbTimeout hours, it will be off loaded to reduce opened files and memory consumption.")
serverOptions.v.concurrentUploadLimitMB = cmdServer.Flag.Int("volume.concurrentUploadLimitMB", 64, "limit total concurrent upload size")
@ -164,11 +166,11 @@ func init() {
s3Options.config = cmdServer.Flag.String("s3.config", "", "path to the config file")
s3Options.iamConfig = cmdServer.Flag.String("s3.iam.config", "", "path to the advanced IAM config file for S3. Overrides -iam.config if both are provided.")
s3Options.auditLogConfig = cmdServer.Flag.String("s3.auditLogConfig", "", "path to the audit log config file")
s3Options.allowEmptyFolder = cmdServer.Flag.Bool("s3.allowEmptyFolder", true, "allow empty folders")
cmdServer.Flag.Bool("s3.allowEmptyFolder", true, "deprecated, ignored. Empty folder cleanup is now automatic.")
s3Options.allowDeleteBucketNotEmpty = cmdServer.Flag.Bool("s3.allowDeleteBucketNotEmpty", true, "allow recursive deleting all entries along with bucket")
s3Options.localSocket = cmdServer.Flag.String("s3.localSocket", "", "default to /tmp/seaweedfs-s3-<port>.sock")
s3Options.bindIp = cmdServer.Flag.String("s3.ip.bind", "", "ip address to bind to. If empty, default to same as -ip.bind option.")
s3Options.idleTimeout = cmdServer.Flag.Int("s3.idleTimeout", 10, "connection idle seconds")
s3Options.idleTimeout = cmdServer.Flag.Int("s3.idleTimeout", 120, "connection idle seconds")
s3Options.concurrentUploadLimitMB = cmdServer.Flag.Int("s3.concurrentUploadLimitMB", 128, "limit total concurrent upload size for S3")
s3Options.concurrentFileUploadLimit = cmdServer.Flag.Int("s3.concurrentFileUploadLimit", 0, "limit number of concurrent file uploads for S3, 0 means unlimited")

10
weed/command/volume.go

@ -41,6 +41,7 @@ type VolumeServerOptions struct {
folderMaxLimits []int32
idxFolder *string
ip *string
id *string
publicUrl *string
bindIp *string
mastersString *string
@ -57,6 +58,7 @@ type VolumeServerOptions struct {
cpuProfile *string
memProfile *string
compactionMBPerSecond *int
maintenanceMBPerSecond *int
fileSizeLimitMB *int
concurrentUploadLimitMB *int
concurrentDownloadLimitMB *int
@ -78,6 +80,7 @@ func init() {
v.portGrpc = cmdVolume.Flag.Int("port.grpc", 0, "grpc listen port")
v.publicPort = cmdVolume.Flag.Int("port.public", 0, "port opened to public")
v.ip = cmdVolume.Flag.String("ip", util.DetectedHostAddress(), "ip or server name, also used as identifier")
v.id = cmdVolume.Flag.String("id", "", "volume server id. If empty, default to ip:port")
v.publicUrl = cmdVolume.Flag.String("publicUrl", "", "Publicly accessible address")
v.bindIp = cmdVolume.Flag.String("ip.bind", "", "ip address to bind to. If empty, default to same as -ip option.")
v.mastersString = cmdVolume.Flag.String("master", "localhost:9333", "comma-separated master servers")
@ -94,6 +97,7 @@ func init() {
v.cpuProfile = cmdVolume.Flag.String("cpuprofile", "", "cpu profile output file")
v.memProfile = cmdVolume.Flag.String("memprofile", "", "memory profile output file")
v.compactionMBPerSecond = cmdVolume.Flag.Int("compactionMBps", 0, "limit background compaction or copying speed in mega bytes per second")
v.maintenanceMBPerSecond = cmdVolume.Flag.Int("maintenanceMBps", 0, "limit maintenance (replication / balance) IO rate in MB/s. Unset is 0, no limitation.")
v.fileSizeLimitMB = cmdVolume.Flag.Int("fileSizeLimitMB", 256, "limit file size to avoid out of memory")
v.ldbTimeout = cmdVolume.Flag.Int64("index.leveldbTimeout", 0, "alive time for leveldb (default to 0). If leveldb of volume is not accessed in ldbTimeout hours, it will be off loaded to reduce opened files and memory consumption.")
v.concurrentUploadLimitMB = cmdVolume.Flag.Int("concurrentUploadLimitMB", 256, "limit total concurrent upload size")
@ -253,8 +257,11 @@ func (v VolumeServerOptions) startVolumeServer(volumeFolders, maxVolumeCounts, v
volumeNeedleMapKind = storage.NeedleMapLevelDbLarge
}
// Determine volume server ID: if not specified, use ip:port
volumeServerId := util.GetVolumeServerId(*v.id, *v.ip, *v.port)
volumeServer := weed_server.NewVolumeServer(volumeMux, publicVolumeMux,
*v.ip, *v.port, *v.portGrpc, *v.publicUrl,
*v.ip, *v.port, *v.portGrpc, *v.publicUrl, volumeServerId,
v.folders, v.folderMaxLimits, minFreeSpaces, diskTypes,
*v.idxFolder,
volumeNeedleMapKind,
@ -262,6 +269,7 @@ func (v VolumeServerOptions) startVolumeServer(volumeFolders, maxVolumeCounts, v
v.whiteList,
*v.fixJpgOrientation, *v.readMode,
*v.compactionMBPerSecond,
*v.maintenanceMBPerSecond,
*v.fileSizeLimitMB,
int64(*v.concurrentUploadLimitMB)*1024*1024,
int64(*v.concurrentDownloadLimitMB)*1024*1024,

4
weed/credential/filer_etc/filer_etc_store.go

@ -58,7 +58,7 @@ func (store *FilerEtcStore) withFilerClient(fn func(client filer_pb.SeaweedFiler
store.mu.RLock()
if store.filerAddressFunc == nil {
store.mu.RUnlock()
return fmt.Errorf("filer_etc: filer address function not configured")
return fmt.Errorf("filer_etc: filer not yet available - please wait for filer discovery to complete and try again")
}
filerAddress := store.filerAddressFunc()
@ -66,7 +66,7 @@ func (store *FilerEtcStore) withFilerClient(fn func(client filer_pb.SeaweedFiler
store.mu.RUnlock()
if filerAddress == "" {
return fmt.Errorf("filer_etc: filer address is empty")
return fmt.Errorf("filer_etc: no filer discovered yet - please ensure a filer is running and accessible")
}
// Use the pb.WithGrpcFilerClient helper similar to existing code

207
weed/filer/empty_folder_cleanup/cleanup_queue.go

@ -0,0 +1,207 @@
package empty_folder_cleanup
import (
"container/list"
"sync"
"time"
)
// CleanupQueue manages a deduplicated queue of folders pending cleanup.
// It uses a doubly-linked list ordered by event time (oldest at front) and a map for O(1) deduplication.
// Processing is triggered when:
// - Queue size reaches maxSize, OR
// - Oldest item exceeds maxAge
type CleanupQueue struct {
mu sync.Mutex
items *list.List // Linked list of *queueItem ordered by time (front = oldest)
itemsMap map[string]*list.Element // folder -> list element for O(1) lookup
maxSize int // Max queue size before triggering cleanup
maxAge time.Duration // Max age before triggering cleanup
}
// queueItem represents an item in the cleanup queue
type queueItem struct {
folder string
queueTime time.Time
}
// NewCleanupQueue creates a new CleanupQueue with the specified limits
func NewCleanupQueue(maxSize int, maxAge time.Duration) *CleanupQueue {
return &CleanupQueue{
items: list.New(),
itemsMap: make(map[string]*list.Element),
maxSize: maxSize,
maxAge: maxAge,
}
}
// Add adds a folder to the queue with the specified event time.
// The item is inserted in time-sorted order (oldest at front) to handle out-of-order events.
// If folder already exists with an older time, the time is updated and position adjusted.
// Returns true if the folder was newly added, false if it was updated.
func (q *CleanupQueue) Add(folder string, eventTime time.Time) bool {
q.mu.Lock()
defer q.mu.Unlock()
// Check if folder already exists
if elem, exists := q.itemsMap[folder]; exists {
existingItem := elem.Value.(*queueItem)
// Only update if new event is later
if eventTime.After(existingItem.queueTime) {
// Remove from current position
q.items.Remove(elem)
// Re-insert with new time in sorted position
newElem := q.insertSorted(folder, eventTime)
q.itemsMap[folder] = newElem
}
return false
}
// Insert new folder in sorted position
elem := q.insertSorted(folder, eventTime)
q.itemsMap[folder] = elem
return true
}
// insertSorted inserts an item in the correct position to maintain time ordering (oldest at front)
func (q *CleanupQueue) insertSorted(folder string, eventTime time.Time) *list.Element {
item := &queueItem{
folder: folder,
queueTime: eventTime,
}
// Find the correct position (insert before the first item with a later time)
for elem := q.items.Back(); elem != nil; elem = elem.Prev() {
existingItem := elem.Value.(*queueItem)
if !eventTime.Before(existingItem.queueTime) {
// Insert after this element
return q.items.InsertAfter(item, elem)
}
}
// This item is the oldest, insert at front
return q.items.PushFront(item)
}
// Remove removes a specific folder from the queue (e.g., when a file is created).
// Returns true if the folder was found and removed.
func (q *CleanupQueue) Remove(folder string) bool {
q.mu.Lock()
defer q.mu.Unlock()
elem, exists := q.itemsMap[folder]
if !exists {
return false
}
q.items.Remove(elem)
delete(q.itemsMap, folder)
return true
}
// ShouldProcess returns true if the queue should be processed.
// This is true when:
// - Queue size >= maxSize, OR
// - Oldest item age > maxAge
func (q *CleanupQueue) ShouldProcess() bool {
q.mu.Lock()
defer q.mu.Unlock()
return q.shouldProcessLocked()
}
// shouldProcessLocked checks if processing is needed (caller must hold lock)
func (q *CleanupQueue) shouldProcessLocked() bool {
if q.items.Len() == 0 {
return false
}
// Check if queue is full
if q.items.Len() >= q.maxSize {
return true
}
// Check if oldest item exceeds max age
front := q.items.Front()
if front != nil {
item := front.Value.(*queueItem)
if time.Since(item.queueTime) > q.maxAge {
return true
}
}
return false
}
// Pop removes and returns the oldest folder from the queue.
// Returns the folder and true if an item was available, or empty string and false if queue is empty.
func (q *CleanupQueue) Pop() (string, bool) {
q.mu.Lock()
defer q.mu.Unlock()
front := q.items.Front()
if front == nil {
return "", false
}
item := front.Value.(*queueItem)
q.items.Remove(front)
delete(q.itemsMap, item.folder)
return item.folder, true
}
// Peek returns the oldest folder without removing it.
// Returns the folder and queue time if available, or empty values if queue is empty.
func (q *CleanupQueue) Peek() (folder string, queueTime time.Time, ok bool) {
q.mu.Lock()
defer q.mu.Unlock()
front := q.items.Front()
if front == nil {
return "", time.Time{}, false
}
item := front.Value.(*queueItem)
return item.folder, item.queueTime, true
}
// Len returns the current queue size.
func (q *CleanupQueue) Len() int {
q.mu.Lock()
defer q.mu.Unlock()
return q.items.Len()
}
// Contains checks if a folder is in the queue.
func (q *CleanupQueue) Contains(folder string) bool {
q.mu.Lock()
defer q.mu.Unlock()
_, exists := q.itemsMap[folder]
return exists
}
// Clear removes all items from the queue.
func (q *CleanupQueue) Clear() {
q.mu.Lock()
defer q.mu.Unlock()
q.items.Init()
q.itemsMap = make(map[string]*list.Element)
}
// OldestAge returns the age of the oldest item in the queue, or 0 if empty.
func (q *CleanupQueue) OldestAge() time.Duration {
q.mu.Lock()
defer q.mu.Unlock()
front := q.items.Front()
if front == nil {
return 0
}
item := front.Value.(*queueItem)
return time.Since(item.queueTime)
}

371
weed/filer/empty_folder_cleanup/cleanup_queue_test.go

@ -0,0 +1,371 @@
package empty_folder_cleanup
import (
"testing"
"time"
)
func TestCleanupQueue_Add(t *testing.T) {
q := NewCleanupQueue(100, 10*time.Minute)
now := time.Now()
// Add first item
if !q.Add("/buckets/b1/folder1", now) {
t.Error("expected Add to return true for new item")
}
if q.Len() != 1 {
t.Errorf("expected len 1, got %d", q.Len())
}
// Add second item with later time
if !q.Add("/buckets/b1/folder2", now.Add(1*time.Second)) {
t.Error("expected Add to return true for new item")
}
if q.Len() != 2 {
t.Errorf("expected len 2, got %d", q.Len())
}
// Add duplicate with newer time - should update and reposition
if q.Add("/buckets/b1/folder1", now.Add(2*time.Second)) {
t.Error("expected Add to return false for existing item")
}
if q.Len() != 2 {
t.Errorf("expected len 2 after duplicate, got %d", q.Len())
}
// folder1 should now be at the back (newer time) - verify by popping
folder1, _ := q.Pop()
folder2, _ := q.Pop()
if folder1 != "/buckets/b1/folder2" || folder2 != "/buckets/b1/folder1" {
t.Errorf("expected folder1 to be moved to back, got %s, %s", folder1, folder2)
}
}
func TestCleanupQueue_Add_OutOfOrder(t *testing.T) {
q := NewCleanupQueue(100, 10*time.Minute)
baseTime := time.Now()
// Add items out of order
q.Add("/buckets/b1/folder3", baseTime.Add(3*time.Second))
q.Add("/buckets/b1/folder1", baseTime.Add(1*time.Second))
q.Add("/buckets/b1/folder2", baseTime.Add(2*time.Second))
// Items should be in time order (oldest first) - verify by popping
expected := []string{"/buckets/b1/folder1", "/buckets/b1/folder2", "/buckets/b1/folder3"}
for i, exp := range expected {
folder, ok := q.Pop()
if !ok || folder != exp {
t.Errorf("at index %d: expected %s, got %s", i, exp, folder)
}
}
}
func TestCleanupQueue_Add_DuplicateWithOlderTime(t *testing.T) {
q := NewCleanupQueue(100, 10*time.Minute)
baseTime := time.Now()
// Add folder at t=5
q.Add("/buckets/b1/folder1", baseTime.Add(5*time.Second))
// Try to add same folder with older time - should NOT update
q.Add("/buckets/b1/folder1", baseTime.Add(2*time.Second))
// Time should remain at t=5
_, queueTime, _ := q.Peek()
if queueTime != baseTime.Add(5*time.Second) {
t.Errorf("expected time to remain unchanged, got %v", queueTime)
}
}
func TestCleanupQueue_Remove(t *testing.T) {
q := NewCleanupQueue(100, 10*time.Minute)
now := time.Now()
q.Add("/buckets/b1/folder1", now)
q.Add("/buckets/b1/folder2", now.Add(1*time.Second))
q.Add("/buckets/b1/folder3", now.Add(2*time.Second))
// Remove middle item
if !q.Remove("/buckets/b1/folder2") {
t.Error("expected Remove to return true for existing item")
}
if q.Len() != 2 {
t.Errorf("expected len 2, got %d", q.Len())
}
if q.Contains("/buckets/b1/folder2") {
t.Error("removed item should not be in queue")
}
// Remove non-existent item
if q.Remove("/buckets/b1/nonexistent") {
t.Error("expected Remove to return false for non-existent item")
}
// Verify order is preserved by popping
folder1, _ := q.Pop()
folder3, _ := q.Pop()
if folder1 != "/buckets/b1/folder1" || folder3 != "/buckets/b1/folder3" {
t.Errorf("unexpected order: %s, %s", folder1, folder3)
}
}
func TestCleanupQueue_Pop(t *testing.T) {
q := NewCleanupQueue(100, 10*time.Minute)
now := time.Now()
// Pop from empty queue
folder, ok := q.Pop()
if ok {
t.Error("expected Pop to return false for empty queue")
}
if folder != "" {
t.Errorf("expected empty folder, got %s", folder)
}
// Add items and pop in order
q.Add("/buckets/b1/folder1", now)
q.Add("/buckets/b1/folder2", now.Add(1*time.Second))
q.Add("/buckets/b1/folder3", now.Add(2*time.Second))
folder, ok = q.Pop()
if !ok || folder != "/buckets/b1/folder1" {
t.Errorf("expected folder1, got %s (ok=%v)", folder, ok)
}
folder, ok = q.Pop()
if !ok || folder != "/buckets/b1/folder2" {
t.Errorf("expected folder2, got %s (ok=%v)", folder, ok)
}
folder, ok = q.Pop()
if !ok || folder != "/buckets/b1/folder3" {
t.Errorf("expected folder3, got %s (ok=%v)", folder, ok)
}
// Queue should be empty now
if q.Len() != 0 {
t.Errorf("expected empty queue, got len %d", q.Len())
}
}
func TestCleanupQueue_Peek(t *testing.T) {
q := NewCleanupQueue(100, 10*time.Minute)
now := time.Now()
// Peek empty queue
folder, _, ok := q.Peek()
if ok {
t.Error("expected Peek to return false for empty queue")
}
// Add item and peek
q.Add("/buckets/b1/folder1", now)
folder, queueTime, ok := q.Peek()
if !ok || folder != "/buckets/b1/folder1" {
t.Errorf("expected folder1, got %s (ok=%v)", folder, ok)
}
if queueTime != now {
t.Errorf("expected queue time %v, got %v", now, queueTime)
}
// Peek should not remove item
if q.Len() != 1 {
t.Errorf("Peek should not remove item, len=%d", q.Len())
}
}
func TestCleanupQueue_Contains(t *testing.T) {
q := NewCleanupQueue(100, 10*time.Minute)
now := time.Now()
q.Add("/buckets/b1/folder1", now)
if !q.Contains("/buckets/b1/folder1") {
t.Error("expected Contains to return true")
}
if q.Contains("/buckets/b1/folder2") {
t.Error("expected Contains to return false for non-existent")
}
}
func TestCleanupQueue_ShouldProcess_MaxSize(t *testing.T) {
q := NewCleanupQueue(3, 10*time.Minute)
now := time.Now()
// Empty queue
if q.ShouldProcess() {
t.Error("empty queue should not need processing")
}
// Add items below max
q.Add("/buckets/b1/folder1", now)
q.Add("/buckets/b1/folder2", now.Add(1*time.Second))
if q.ShouldProcess() {
t.Error("queue below max should not need processing")
}
// Add item to reach max
q.Add("/buckets/b1/folder3", now.Add(2*time.Second))
if !q.ShouldProcess() {
t.Error("queue at max should need processing")
}
}
func TestCleanupQueue_ShouldProcess_MaxAge(t *testing.T) {
q := NewCleanupQueue(100, 100*time.Millisecond) // Short max age for testing
// Add item with old event time
oldTime := time.Now().Add(-1 * time.Second) // 1 second ago
q.Add("/buckets/b1/folder1", oldTime)
// Item is older than maxAge, should need processing
if !q.ShouldProcess() {
t.Error("old item should trigger processing")
}
// Clear and add fresh item
q.Clear()
q.Add("/buckets/b1/folder2", time.Now())
// Fresh item should not trigger processing
if q.ShouldProcess() {
t.Error("fresh item should not trigger processing")
}
}
func TestCleanupQueue_Clear(t *testing.T) {
q := NewCleanupQueue(100, 10*time.Minute)
now := time.Now()
q.Add("/buckets/b1/folder1", now)
q.Add("/buckets/b1/folder2", now.Add(1*time.Second))
q.Add("/buckets/b1/folder3", now.Add(2*time.Second))
q.Clear()
if q.Len() != 0 {
t.Errorf("expected empty queue after Clear, got len %d", q.Len())
}
if q.Contains("/buckets/b1/folder1") {
t.Error("queue should not contain items after Clear")
}
}
func TestCleanupQueue_OldestAge(t *testing.T) {
q := NewCleanupQueue(100, 10*time.Minute)
// Empty queue
if q.OldestAge() != 0 {
t.Error("empty queue should have zero oldest age")
}
// Add item with time in the past
oldTime := time.Now().Add(-5 * time.Minute)
q.Add("/buckets/b1/folder1", oldTime)
// Age should be approximately 5 minutes
age := q.OldestAge()
if age < 4*time.Minute || age > 6*time.Minute {
t.Errorf("expected ~5m age, got %v", age)
}
}
func TestCleanupQueue_TimeOrder(t *testing.T) {
q := NewCleanupQueue(100, 10*time.Minute)
baseTime := time.Now()
// Add items in order
items := []string{
"/buckets/b1/a",
"/buckets/b1/b",
"/buckets/b1/c",
"/buckets/b1/d",
"/buckets/b1/e",
}
for i, item := range items {
q.Add(item, baseTime.Add(time.Duration(i)*time.Second))
}
// Pop should return in time order
for i, expected := range items {
got, ok := q.Pop()
if !ok {
t.Errorf("Pop %d: expected item, got empty", i)
}
if got != expected {
t.Errorf("Pop %d: expected %s, got %s", i, expected, got)
}
}
}
func TestCleanupQueue_DuplicateWithNewerTime(t *testing.T) {
q := NewCleanupQueue(100, 10*time.Minute)
baseTime := time.Now()
// Add items
q.Add("/buckets/b1/folder1", baseTime)
q.Add("/buckets/b1/folder2", baseTime.Add(1*time.Second))
q.Add("/buckets/b1/folder3", baseTime.Add(2*time.Second))
// Add duplicate with newer time - should update and reposition
q.Add("/buckets/b1/folder1", baseTime.Add(3*time.Second))
// folder1 should now be at the back (newest time) - verify by popping
expected := []string{"/buckets/b1/folder2", "/buckets/b1/folder3", "/buckets/b1/folder1"}
for i, exp := range expected {
folder, ok := q.Pop()
if !ok || folder != exp {
t.Errorf("at index %d: expected %s, got %s", i, exp, folder)
}
}
}
func TestCleanupQueue_Concurrent(t *testing.T) {
q := NewCleanupQueue(1000, 10*time.Minute)
done := make(chan bool)
now := time.Now()
// Concurrent adds
go func() {
for i := 0; i < 100; i++ {
q.Add("/buckets/b1/folder"+string(rune('A'+i%26)), now.Add(time.Duration(i)*time.Millisecond))
}
done <- true
}()
// Concurrent removes
go func() {
for i := 0; i < 50; i++ {
q.Remove("/buckets/b1/folder" + string(rune('A'+i%26)))
}
done <- true
}()
// Concurrent pops
go func() {
for i := 0; i < 30; i++ {
q.Pop()
}
done <- true
}()
// Concurrent reads
go func() {
for i := 0; i < 100; i++ {
q.Len()
q.Contains("/buckets/b1/folderA")
q.ShouldProcess()
}
done <- true
}()
// Wait for all goroutines
for i := 0; i < 4; i++ {
<-done
}
// Just verify no panic occurred and queue is in consistent state
_ = q.Len()
}

436
weed/filer/empty_folder_cleanup/empty_folder_cleaner.go

@ -0,0 +1,436 @@
package empty_folder_cleanup
import (
"context"
"strings"
"sync"
"time"
"github.com/seaweedfs/seaweedfs/weed/cluster/lock_manager"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/util"
)
const (
DefaultMaxCountCheck = 1000
DefaultCacheExpiry = 5 * time.Minute
DefaultQueueMaxSize = 1000
DefaultQueueMaxAge = 10 * time.Minute
DefaultProcessorSleep = 10 * time.Second // How often to check queue
)
// FilerOperations defines the filer operations needed by EmptyFolderCleaner
type FilerOperations interface {
CountDirectoryEntries(ctx context.Context, dirPath util.FullPath, limit int) (count int, err error)
DeleteEntryMetaAndData(ctx context.Context, p util.FullPath, isRecursive, ignoreRecursiveError, shouldDeleteChunks, isFromOtherCluster bool, signatures []int32, ifNotModifiedAfter int64) error
}
// folderState tracks the state of a folder for empty folder cleanup
type folderState struct {
roughCount int // Cached rough count (up to maxCountCheck)
lastAddTime time.Time // Last time an item was added
lastDelTime time.Time // Last time an item was deleted
lastCheck time.Time // Last time we checked the actual count
}
// EmptyFolderCleaner handles asynchronous cleanup of empty folders
// Each filer owns specific folders via consistent hashing based on the peer filer list
type EmptyFolderCleaner struct {
filer FilerOperations
lockRing *lock_manager.LockRing
host pb.ServerAddress
// Folder state tracking
mu sync.RWMutex
folderCounts map[string]*folderState // Rough count cache
// Cleanup queue (thread-safe, has its own lock)
cleanupQueue *CleanupQueue
// Configuration
maxCountCheck int // Max items to count (1000)
cacheExpiry time.Duration // How long to keep cache entries
processorSleep time.Duration // How often processor checks queue
bucketPath string // e.g., "/buckets"
// Control
enabled bool
stopCh chan struct{}
}
// NewEmptyFolderCleaner creates a new EmptyFolderCleaner
func NewEmptyFolderCleaner(filer FilerOperations, lockRing *lock_manager.LockRing, host pb.ServerAddress, bucketPath string) *EmptyFolderCleaner {
efc := &EmptyFolderCleaner{
filer: filer,
lockRing: lockRing,
host: host,
folderCounts: make(map[string]*folderState),
cleanupQueue: NewCleanupQueue(DefaultQueueMaxSize, DefaultQueueMaxAge),
maxCountCheck: DefaultMaxCountCheck,
cacheExpiry: DefaultCacheExpiry,
processorSleep: DefaultProcessorSleep,
bucketPath: bucketPath,
enabled: true,
stopCh: make(chan struct{}),
}
go efc.cacheEvictionLoop()
go efc.cleanupProcessor()
return efc
}
// SetEnabled enables or disables the cleaner
func (efc *EmptyFolderCleaner) SetEnabled(enabled bool) {
efc.mu.Lock()
defer efc.mu.Unlock()
efc.enabled = enabled
}
// IsEnabled returns whether the cleaner is enabled
func (efc *EmptyFolderCleaner) IsEnabled() bool {
efc.mu.RLock()
defer efc.mu.RUnlock()
return efc.enabled
}
// ownsFolder checks if this filer owns the folder via consistent hashing
func (efc *EmptyFolderCleaner) ownsFolder(folder string) bool {
servers := efc.lockRing.GetSnapshot()
if len(servers) <= 1 {
return true // Single filer case
}
return efc.hashKeyToServer(folder, servers) == efc.host
}
// hashKeyToServer uses consistent hashing to map a folder to a server
func (efc *EmptyFolderCleaner) hashKeyToServer(key string, servers []pb.ServerAddress) pb.ServerAddress {
if len(servers) == 0 {
return ""
}
x := util.HashStringToLong(key)
if x < 0 {
x = -x
}
x = x % int64(len(servers))
return servers[x]
}
// OnDeleteEvent is called when a file or directory is deleted
// Both file and directory deletions count towards making the parent folder empty
// eventTime is the time when the delete event occurred (for proper ordering)
func (efc *EmptyFolderCleaner) OnDeleteEvent(directory string, entryName string, isDirectory bool, eventTime time.Time) {
// Skip if not under bucket path (must be at least /buckets/<bucket>/...)
if efc.bucketPath != "" && !isUnderBucketPath(directory, efc.bucketPath) {
return
}
// Check if we own this folder
if !efc.ownsFolder(directory) {
glog.V(4).Infof("EmptyFolderCleaner: not owner of %s, skipping", directory)
return
}
efc.mu.Lock()
defer efc.mu.Unlock()
// Check enabled inside lock to avoid race with Stop()
if !efc.enabled {
return
}
glog.V(3).Infof("EmptyFolderCleaner: delete event in %s/%s (isDir=%v)", directory, entryName, isDirectory)
// Update cached count (create entry if needed)
state, exists := efc.folderCounts[directory]
if !exists {
state = &folderState{}
efc.folderCounts[directory] = state
}
if state.roughCount > 0 {
state.roughCount--
}
state.lastDelTime = eventTime
// Only add to cleanup queue if roughCount suggests folder might be empty
if state.roughCount > 0 {
glog.V(3).Infof("EmptyFolderCleaner: skipping queue for %s, roughCount=%d", directory, state.roughCount)
return
}
// Add to cleanup queue with event time (handles out-of-order events)
if efc.cleanupQueue.Add(directory, eventTime) {
glog.V(3).Infof("EmptyFolderCleaner: queued %s for cleanup", directory)
}
}
// OnCreateEvent is called when a file or directory is created
// Both file and directory creations cancel pending cleanup for the parent folder
func (efc *EmptyFolderCleaner) OnCreateEvent(directory string, entryName string, isDirectory bool) {
// Skip if not under bucket path (must be at least /buckets/<bucket>/...)
if efc.bucketPath != "" && !isUnderBucketPath(directory, efc.bucketPath) {
return
}
efc.mu.Lock()
defer efc.mu.Unlock()
// Check enabled inside lock to avoid race with Stop()
if !efc.enabled {
return
}
// Update cached count only if already tracked (no need to track new folders)
if state, exists := efc.folderCounts[directory]; exists {
state.roughCount++
state.lastAddTime = time.Now()
}
// Remove from cleanup queue (cancel pending cleanup)
if efc.cleanupQueue.Remove(directory) {
glog.V(3).Infof("EmptyFolderCleaner: cancelled cleanup for %s due to new entry", directory)
}
}
// cleanupProcessor runs in background and processes the cleanup queue
func (efc *EmptyFolderCleaner) cleanupProcessor() {
ticker := time.NewTicker(efc.processorSleep)
defer ticker.Stop()
for {
select {
case <-efc.stopCh:
return
case <-ticker.C:
efc.processCleanupQueue()
}
}
}
// processCleanupQueue processes items from the cleanup queue
func (efc *EmptyFolderCleaner) processCleanupQueue() {
// Check if we should process
if !efc.cleanupQueue.ShouldProcess() {
return
}
glog.V(3).Infof("EmptyFolderCleaner: processing cleanup queue (len=%d, age=%v)",
efc.cleanupQueue.Len(), efc.cleanupQueue.OldestAge())
// Process all items that are ready
for efc.cleanupQueue.Len() > 0 {
// Check if still enabled
if !efc.IsEnabled() {
return
}
// Pop the oldest item
folder, ok := efc.cleanupQueue.Pop()
if !ok {
break
}
// Execute cleanup for this folder
efc.executeCleanup(folder)
// If queue is no longer full and oldest item is not old enough, stop processing
if !efc.cleanupQueue.ShouldProcess() {
break
}
}
}
// executeCleanup performs the actual cleanup of an empty folder
func (efc *EmptyFolderCleaner) executeCleanup(folder string) {
efc.mu.Lock()
// Quick check: if we have cached count and it's > 0, skip
if state, exists := efc.folderCounts[folder]; exists {
if state.roughCount > 0 {
glog.V(3).Infof("EmptyFolderCleaner: skipping %s, cached count=%d", folder, state.roughCount)
efc.mu.Unlock()
return
}
// If there was an add after our delete, skip
if !state.lastAddTime.IsZero() && state.lastAddTime.After(state.lastDelTime) {
glog.V(3).Infof("EmptyFolderCleaner: skipping %s, add happened after delete", folder)
efc.mu.Unlock()
return
}
}
efc.mu.Unlock()
// Re-check ownership (topology might have changed)
if !efc.ownsFolder(folder) {
glog.V(3).Infof("EmptyFolderCleaner: no longer owner of %s, skipping", folder)
return
}
// Check if folder is actually empty (count up to maxCountCheck)
ctx := context.Background()
count, err := efc.countItems(ctx, folder)
if err != nil {
glog.V(2).Infof("EmptyFolderCleaner: error counting items in %s: %v", folder, err)
return
}
efc.mu.Lock()
// Update cache
if _, exists := efc.folderCounts[folder]; !exists {
efc.folderCounts[folder] = &folderState{}
}
efc.folderCounts[folder].roughCount = count
efc.folderCounts[folder].lastCheck = time.Now()
efc.mu.Unlock()
if count > 0 {
glog.V(3).Infof("EmptyFolderCleaner: folder %s has %d items, not empty", folder, count)
return
}
// Delete the empty folder
glog.V(2).Infof("EmptyFolderCleaner: deleting empty folder %s", folder)
if err := efc.deleteFolder(ctx, folder); err != nil {
glog.V(2).Infof("EmptyFolderCleaner: failed to delete empty folder %s: %v", folder, err)
return
}
// Clean up cache entry
efc.mu.Lock()
delete(efc.folderCounts, folder)
efc.mu.Unlock()
// Note: No need to recursively check parent folder here.
// The deletion of this folder will generate a metadata event,
// which will trigger OnDeleteEvent for the parent folder.
}
// countItems counts items in a folder (up to maxCountCheck)
func (efc *EmptyFolderCleaner) countItems(ctx context.Context, folder string) (int, error) {
return efc.filer.CountDirectoryEntries(ctx, util.FullPath(folder), efc.maxCountCheck)
}
// deleteFolder deletes an empty folder
func (efc *EmptyFolderCleaner) deleteFolder(ctx context.Context, folder string) error {
return efc.filer.DeleteEntryMetaAndData(ctx, util.FullPath(folder), false, false, false, false, nil, 0)
}
// isUnderPath checks if child is under parent path
func isUnderPath(child, parent string) bool {
if parent == "" || parent == "/" {
return true
}
// Ensure parent ends without slash for proper prefix matching
if len(parent) > 0 && parent[len(parent)-1] == '/' {
parent = parent[:len(parent)-1]
}
// Child must start with parent and then have a / or be exactly parent
if len(child) < len(parent) {
return false
}
if child[:len(parent)] != parent {
return false
}
if len(child) == len(parent) {
return true
}
return child[len(parent)] == '/'
}
// isUnderBucketPath checks if directory is inside a bucket (under /buckets/<bucket>/...)
// This ensures we only clean up folders inside buckets, not the buckets themselves
func isUnderBucketPath(directory, bucketPath string) bool {
if bucketPath == "" {
return true
}
// Ensure bucketPath ends without slash
if len(bucketPath) > 0 && bucketPath[len(bucketPath)-1] == '/' {
bucketPath = bucketPath[:len(bucketPath)-1]
}
// Directory must be under bucketPath
if !isUnderPath(directory, bucketPath) {
return false
}
// Directory must be at least /buckets/<bucket>/<something>
// i.e., depth must be at least bucketPath depth + 2
// For /buckets (depth 1), we need at least /buckets/mybucket/folder (depth 3)
bucketPathDepth := strings.Count(bucketPath, "/")
directoryDepth := strings.Count(directory, "/")
return directoryDepth >= bucketPathDepth+2
}
// cacheEvictionLoop periodically removes stale entries from folderCounts
func (efc *EmptyFolderCleaner) cacheEvictionLoop() {
ticker := time.NewTicker(efc.cacheExpiry)
defer ticker.Stop()
for {
select {
case <-efc.stopCh:
return
case <-ticker.C:
efc.evictStaleCacheEntries()
}
}
}
// evictStaleCacheEntries removes cache entries that haven't been accessed recently
func (efc *EmptyFolderCleaner) evictStaleCacheEntries() {
efc.mu.Lock()
defer efc.mu.Unlock()
now := time.Now()
expiredCount := 0
for folder, state := range efc.folderCounts {
// Skip if folder is in cleanup queue
if efc.cleanupQueue.Contains(folder) {
continue
}
// Find the most recent activity time for this folder
lastActivity := state.lastCheck
if state.lastAddTime.After(lastActivity) {
lastActivity = state.lastAddTime
}
if state.lastDelTime.After(lastActivity) {
lastActivity = state.lastDelTime
}
// Evict if no activity within cache expiry period
if now.Sub(lastActivity) > efc.cacheExpiry {
delete(efc.folderCounts, folder)
expiredCount++
}
}
if expiredCount > 0 {
glog.V(3).Infof("EmptyFolderCleaner: evicted %d stale cache entries", expiredCount)
}
}
// Stop stops the cleaner and cancels all pending tasks
func (efc *EmptyFolderCleaner) Stop() {
close(efc.stopCh)
efc.mu.Lock()
defer efc.mu.Unlock()
efc.enabled = false
efc.cleanupQueue.Clear()
efc.folderCounts = make(map[string]*folderState) // Clear cache on stop
}
// GetPendingCleanupCount returns the number of pending cleanup tasks (for testing)
func (efc *EmptyFolderCleaner) GetPendingCleanupCount() int {
return efc.cleanupQueue.Len()
}
// GetCachedFolderCount returns the cached count for a folder (for testing)
func (efc *EmptyFolderCleaner) GetCachedFolderCount(folder string) (int, bool) {
efc.mu.RLock()
defer efc.mu.RUnlock()
if state, exists := efc.folderCounts[folder]; exists {
return state.roughCount, true
}
return 0, false
}

569
weed/filer/empty_folder_cleanup/empty_folder_cleaner_test.go

@ -0,0 +1,569 @@
package empty_folder_cleanup
import (
"testing"
"time"
"github.com/seaweedfs/seaweedfs/weed/cluster/lock_manager"
"github.com/seaweedfs/seaweedfs/weed/pb"
)
func Test_isUnderPath(t *testing.T) {
tests := []struct {
name string
child string
parent string
expected bool
}{
{"child under parent", "/buckets/mybucket/folder/file.txt", "/buckets", true},
{"child is parent", "/buckets", "/buckets", true},
{"child not under parent", "/other/path", "/buckets", false},
{"empty parent", "/any/path", "", true},
{"root parent", "/any/path", "/", true},
{"parent with trailing slash", "/buckets/mybucket", "/buckets/", true},
{"similar prefix but not under", "/buckets-other/file", "/buckets", false},
{"deeply nested", "/buckets/a/b/c/d/e/f", "/buckets/a/b", true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := isUnderPath(tt.child, tt.parent)
if result != tt.expected {
t.Errorf("isUnderPath(%q, %q) = %v, want %v", tt.child, tt.parent, result, tt.expected)
}
})
}
}
func Test_isUnderBucketPath(t *testing.T) {
tests := []struct {
name string
directory string
bucketPath string
expected bool
}{
// Should NOT process - bucket path itself
{"bucket path itself", "/buckets", "/buckets", false},
// Should NOT process - bucket directory (immediate child)
{"bucket directory", "/buckets/mybucket", "/buckets", false},
// Should process - folder inside bucket
{"folder in bucket", "/buckets/mybucket/folder", "/buckets", true},
// Should process - nested folder
{"nested folder", "/buckets/mybucket/a/b/c", "/buckets", true},
// Should NOT process - outside buckets
{"outside buckets", "/other/path", "/buckets", false},
// Empty bucket path allows all
{"empty bucket path", "/any/path", "", true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := isUnderBucketPath(tt.directory, tt.bucketPath)
if result != tt.expected {
t.Errorf("isUnderBucketPath(%q, %q) = %v, want %v", tt.directory, tt.bucketPath, result, tt.expected)
}
})
}
}
func TestEmptyFolderCleaner_ownsFolder(t *testing.T) {
// Create a LockRing with multiple servers
lockRing := lock_manager.NewLockRing(5 * time.Second)
servers := []pb.ServerAddress{
"filer1:8888",
"filer2:8888",
"filer3:8888",
}
lockRing.SetSnapshot(servers)
// Create cleaner for filer1
cleaner1 := &EmptyFolderCleaner{
lockRing: lockRing,
host: "filer1:8888",
}
// Create cleaner for filer2
cleaner2 := &EmptyFolderCleaner{
lockRing: lockRing,
host: "filer2:8888",
}
// Create cleaner for filer3
cleaner3 := &EmptyFolderCleaner{
lockRing: lockRing,
host: "filer3:8888",
}
// Test that exactly one filer owns each folder
testFolders := []string{
"/buckets/mybucket/folder1",
"/buckets/mybucket/folder2",
"/buckets/mybucket/folder3",
"/buckets/mybucket/a/b/c",
"/buckets/otherbucket/x",
}
for _, folder := range testFolders {
ownCount := 0
if cleaner1.ownsFolder(folder) {
ownCount++
}
if cleaner2.ownsFolder(folder) {
ownCount++
}
if cleaner3.ownsFolder(folder) {
ownCount++
}
if ownCount != 1 {
t.Errorf("folder %q owned by %d filers, expected exactly 1", folder, ownCount)
}
}
}
func TestEmptyFolderCleaner_ownsFolder_singleServer(t *testing.T) {
// Create a LockRing with a single server
lockRing := lock_manager.NewLockRing(5 * time.Second)
lockRing.SetSnapshot([]pb.ServerAddress{"filer1:8888"})
cleaner := &EmptyFolderCleaner{
lockRing: lockRing,
host: "filer1:8888",
}
// Single filer should own all folders
testFolders := []string{
"/buckets/mybucket/folder1",
"/buckets/mybucket/folder2",
"/buckets/otherbucket/x",
}
for _, folder := range testFolders {
if !cleaner.ownsFolder(folder) {
t.Errorf("single filer should own folder %q", folder)
}
}
}
func TestEmptyFolderCleaner_ownsFolder_emptyRing(t *testing.T) {
// Create an empty LockRing
lockRing := lock_manager.NewLockRing(5 * time.Second)
cleaner := &EmptyFolderCleaner{
lockRing: lockRing,
host: "filer1:8888",
}
// With empty ring, should own all folders
if !cleaner.ownsFolder("/buckets/mybucket/folder") {
t.Error("should own folder with empty ring")
}
}
func TestEmptyFolderCleaner_OnCreateEvent_cancelsCleanup(t *testing.T) {
lockRing := lock_manager.NewLockRing(5 * time.Second)
lockRing.SetSnapshot([]pb.ServerAddress{"filer1:8888"})
cleaner := &EmptyFolderCleaner{
lockRing: lockRing,
host: "filer1:8888",
bucketPath: "/buckets",
enabled: true,
folderCounts: make(map[string]*folderState),
cleanupQueue: NewCleanupQueue(1000, 10*time.Minute),
stopCh: make(chan struct{}),
}
folder := "/buckets/mybucket/testfolder"
now := time.Now()
// Simulate delete event
cleaner.OnDeleteEvent(folder, "file.txt", false, now)
// Check that cleanup is queued
if cleaner.GetPendingCleanupCount() != 1 {
t.Errorf("expected 1 pending cleanup, got %d", cleaner.GetPendingCleanupCount())
}
// Simulate create event
cleaner.OnCreateEvent(folder, "newfile.txt", false)
// Check that cleanup is cancelled
if cleaner.GetPendingCleanupCount() != 0 {
t.Errorf("expected 0 pending cleanups after create, got %d", cleaner.GetPendingCleanupCount())
}
cleaner.Stop()
}
func TestEmptyFolderCleaner_OnDeleteEvent_deduplication(t *testing.T) {
lockRing := lock_manager.NewLockRing(5 * time.Second)
lockRing.SetSnapshot([]pb.ServerAddress{"filer1:8888"})
cleaner := &EmptyFolderCleaner{
lockRing: lockRing,
host: "filer1:8888",
bucketPath: "/buckets",
enabled: true,
folderCounts: make(map[string]*folderState),
cleanupQueue: NewCleanupQueue(1000, 10*time.Minute),
stopCh: make(chan struct{}),
}
folder := "/buckets/mybucket/testfolder"
now := time.Now()
// Simulate multiple delete events for same folder
for i := 0; i < 5; i++ {
cleaner.OnDeleteEvent(folder, "file"+string(rune('0'+i))+".txt", false, now.Add(time.Duration(i)*time.Second))
}
// Check that only 1 cleanup is queued (deduplicated)
if cleaner.GetPendingCleanupCount() != 1 {
t.Errorf("expected 1 pending cleanup after deduplication, got %d", cleaner.GetPendingCleanupCount())
}
cleaner.Stop()
}
func TestEmptyFolderCleaner_OnDeleteEvent_multipleFolders(t *testing.T) {
lockRing := lock_manager.NewLockRing(5 * time.Second)
lockRing.SetSnapshot([]pb.ServerAddress{"filer1:8888"})
cleaner := &EmptyFolderCleaner{
lockRing: lockRing,
host: "filer1:8888",
bucketPath: "/buckets",
enabled: true,
folderCounts: make(map[string]*folderState),
cleanupQueue: NewCleanupQueue(1000, 10*time.Minute),
stopCh: make(chan struct{}),
}
now := time.Now()
// Delete files in different folders
cleaner.OnDeleteEvent("/buckets/mybucket/folder1", "file.txt", false, now)
cleaner.OnDeleteEvent("/buckets/mybucket/folder2", "file.txt", false, now.Add(1*time.Second))
cleaner.OnDeleteEvent("/buckets/mybucket/folder3", "file.txt", false, now.Add(2*time.Second))
// Each folder should be queued
if cleaner.GetPendingCleanupCount() != 3 {
t.Errorf("expected 3 pending cleanups, got %d", cleaner.GetPendingCleanupCount())
}
cleaner.Stop()
}
func TestEmptyFolderCleaner_OnDeleteEvent_notOwner(t *testing.T) {
lockRing := lock_manager.NewLockRing(5 * time.Second)
lockRing.SetSnapshot([]pb.ServerAddress{"filer1:8888", "filer2:8888"})
// Create cleaner for filer that doesn't own the folder
cleaner := &EmptyFolderCleaner{
lockRing: lockRing,
host: "filer1:8888",
bucketPath: "/buckets",
enabled: true,
folderCounts: make(map[string]*folderState),
cleanupQueue: NewCleanupQueue(1000, 10*time.Minute),
stopCh: make(chan struct{}),
}
now := time.Now()
// Try many folders, looking for one that filer1 doesn't own
foundNonOwned := false
for i := 0; i < 100; i++ {
folder := "/buckets/mybucket/folder" + string(rune('0'+i%10)) + string(rune('0'+i/10))
if !cleaner.ownsFolder(folder) {
// This folder is not owned by filer1
cleaner.OnDeleteEvent(folder, "file.txt", false, now)
if cleaner.GetPendingCleanupCount() != 0 {
t.Errorf("non-owner should not queue cleanup for folder %s", folder)
}
foundNonOwned = true
break
}
}
if !foundNonOwned {
t.Skip("could not find a folder not owned by filer1")
}
cleaner.Stop()
}
func TestEmptyFolderCleaner_OnDeleteEvent_disabled(t *testing.T) {
lockRing := lock_manager.NewLockRing(5 * time.Second)
lockRing.SetSnapshot([]pb.ServerAddress{"filer1:8888"})
cleaner := &EmptyFolderCleaner{
lockRing: lockRing,
host: "filer1:8888",
bucketPath: "/buckets",
enabled: false, // Disabled
folderCounts: make(map[string]*folderState),
cleanupQueue: NewCleanupQueue(1000, 10*time.Minute),
stopCh: make(chan struct{}),
}
folder := "/buckets/mybucket/testfolder"
now := time.Now()
// Simulate delete event
cleaner.OnDeleteEvent(folder, "file.txt", false, now)
// Check that no cleanup is queued when disabled
if cleaner.GetPendingCleanupCount() != 0 {
t.Errorf("disabled cleaner should not queue cleanup, got %d", cleaner.GetPendingCleanupCount())
}
cleaner.Stop()
}
func TestEmptyFolderCleaner_OnDeleteEvent_directoryDeletion(t *testing.T) {
lockRing := lock_manager.NewLockRing(5 * time.Second)
lockRing.SetSnapshot([]pb.ServerAddress{"filer1:8888"})
cleaner := &EmptyFolderCleaner{
lockRing: lockRing,
host: "filer1:8888",
bucketPath: "/buckets",
enabled: true,
folderCounts: make(map[string]*folderState),
cleanupQueue: NewCleanupQueue(1000, 10*time.Minute),
stopCh: make(chan struct{}),
}
folder := "/buckets/mybucket/testfolder"
now := time.Now()
// Simulate directory delete event - should trigger cleanup
// because subdirectory deletion also makes parent potentially empty
cleaner.OnDeleteEvent(folder, "subdir", true, now)
// Check that cleanup IS queued for directory deletion
if cleaner.GetPendingCleanupCount() != 1 {
t.Errorf("directory deletion should trigger cleanup, got %d", cleaner.GetPendingCleanupCount())
}
cleaner.Stop()
}
func TestEmptyFolderCleaner_cachedCounts(t *testing.T) {
lockRing := lock_manager.NewLockRing(5 * time.Second)
lockRing.SetSnapshot([]pb.ServerAddress{"filer1:8888"})
cleaner := &EmptyFolderCleaner{
lockRing: lockRing,
host: "filer1:8888",
bucketPath: "/buckets",
enabled: true,
folderCounts: make(map[string]*folderState),
cleanupQueue: NewCleanupQueue(1000, 10*time.Minute),
stopCh: make(chan struct{}),
}
folder := "/buckets/mybucket/testfolder"
// Initialize cached count
cleaner.folderCounts[folder] = &folderState{roughCount: 5}
// Simulate create events
cleaner.OnCreateEvent(folder, "newfile1.txt", false)
cleaner.OnCreateEvent(folder, "newfile2.txt", false)
// Check cached count increased
count, exists := cleaner.GetCachedFolderCount(folder)
if !exists {
t.Error("cached folder count should exist")
}
if count != 7 {
t.Errorf("expected cached count 7, got %d", count)
}
// Simulate delete events
now := time.Now()
cleaner.OnDeleteEvent(folder, "file1.txt", false, now)
cleaner.OnDeleteEvent(folder, "file2.txt", false, now.Add(1*time.Second))
// Check cached count decreased
count, exists = cleaner.GetCachedFolderCount(folder)
if !exists {
t.Error("cached folder count should exist")
}
if count != 5 {
t.Errorf("expected cached count 5, got %d", count)
}
cleaner.Stop()
}
func TestEmptyFolderCleaner_Stop(t *testing.T) {
lockRing := lock_manager.NewLockRing(5 * time.Second)
lockRing.SetSnapshot([]pb.ServerAddress{"filer1:8888"})
cleaner := &EmptyFolderCleaner{
lockRing: lockRing,
host: "filer1:8888",
bucketPath: "/buckets",
enabled: true,
folderCounts: make(map[string]*folderState),
cleanupQueue: NewCleanupQueue(1000, 10*time.Minute),
stopCh: make(chan struct{}),
}
now := time.Now()
// Queue some cleanups
cleaner.OnDeleteEvent("/buckets/mybucket/folder1", "file1.txt", false, now)
cleaner.OnDeleteEvent("/buckets/mybucket/folder2", "file2.txt", false, now.Add(1*time.Second))
cleaner.OnDeleteEvent("/buckets/mybucket/folder3", "file3.txt", false, now.Add(2*time.Second))
// Verify cleanups are queued
if cleaner.GetPendingCleanupCount() < 1 {
t.Error("expected at least 1 pending cleanup before stop")
}
// Stop the cleaner
cleaner.Stop()
// Verify all cleanups are cancelled
if cleaner.GetPendingCleanupCount() != 0 {
t.Errorf("expected 0 pending cleanups after stop, got %d", cleaner.GetPendingCleanupCount())
}
}
func TestEmptyFolderCleaner_cacheEviction(t *testing.T) {
lockRing := lock_manager.NewLockRing(5 * time.Second)
lockRing.SetSnapshot([]pb.ServerAddress{"filer1:8888"})
cleaner := &EmptyFolderCleaner{
lockRing: lockRing,
host: "filer1:8888",
bucketPath: "/buckets",
enabled: true,
folderCounts: make(map[string]*folderState),
cleanupQueue: NewCleanupQueue(1000, 10*time.Minute),
cacheExpiry: 100 * time.Millisecond, // Short expiry for testing
stopCh: make(chan struct{}),
}
folder1 := "/buckets/mybucket/folder1"
folder2 := "/buckets/mybucket/folder2"
folder3 := "/buckets/mybucket/folder3"
// Add some cache entries with old timestamps
oldTime := time.Now().Add(-1 * time.Hour)
cleaner.folderCounts[folder1] = &folderState{roughCount: 5, lastCheck: oldTime}
cleaner.folderCounts[folder2] = &folderState{roughCount: 3, lastCheck: oldTime}
// folder3 has recent activity
cleaner.folderCounts[folder3] = &folderState{roughCount: 2, lastCheck: time.Now()}
// Verify all entries exist
if len(cleaner.folderCounts) != 3 {
t.Errorf("expected 3 cache entries, got %d", len(cleaner.folderCounts))
}
// Run eviction
cleaner.evictStaleCacheEntries()
// Verify stale entries are evicted
if len(cleaner.folderCounts) != 1 {
t.Errorf("expected 1 cache entry after eviction, got %d", len(cleaner.folderCounts))
}
// Verify the recent entry still exists
if _, exists := cleaner.folderCounts[folder3]; !exists {
t.Error("expected folder3 to still exist in cache")
}
// Verify stale entries are removed
if _, exists := cleaner.folderCounts[folder1]; exists {
t.Error("expected folder1 to be evicted")
}
if _, exists := cleaner.folderCounts[folder2]; exists {
t.Error("expected folder2 to be evicted")
}
cleaner.Stop()
}
func TestEmptyFolderCleaner_cacheEviction_skipsEntriesInQueue(t *testing.T) {
lockRing := lock_manager.NewLockRing(5 * time.Second)
lockRing.SetSnapshot([]pb.ServerAddress{"filer1:8888"})
cleaner := &EmptyFolderCleaner{
lockRing: lockRing,
host: "filer1:8888",
bucketPath: "/buckets",
enabled: true,
folderCounts: make(map[string]*folderState),
cleanupQueue: NewCleanupQueue(1000, 10*time.Minute),
cacheExpiry: 100 * time.Millisecond,
stopCh: make(chan struct{}),
}
folder := "/buckets/mybucket/folder"
oldTime := time.Now().Add(-1 * time.Hour)
// Add a stale cache entry
cleaner.folderCounts[folder] = &folderState{roughCount: 0, lastCheck: oldTime}
// Also add to cleanup queue
cleaner.cleanupQueue.Add(folder, time.Now())
// Run eviction
cleaner.evictStaleCacheEntries()
// Verify entry is NOT evicted because it's in cleanup queue
if _, exists := cleaner.folderCounts[folder]; !exists {
t.Error("expected folder to still exist in cache (is in cleanup queue)")
}
cleaner.Stop()
}
func TestEmptyFolderCleaner_queueFIFOOrder(t *testing.T) {
lockRing := lock_manager.NewLockRing(5 * time.Second)
lockRing.SetSnapshot([]pb.ServerAddress{"filer1:8888"})
cleaner := &EmptyFolderCleaner{
lockRing: lockRing,
host: "filer1:8888",
bucketPath: "/buckets",
enabled: true,
folderCounts: make(map[string]*folderState),
cleanupQueue: NewCleanupQueue(1000, 10*time.Minute),
stopCh: make(chan struct{}),
}
now := time.Now()
// Add folders in order
folders := []string{
"/buckets/mybucket/folder1",
"/buckets/mybucket/folder2",
"/buckets/mybucket/folder3",
}
for i, folder := range folders {
cleaner.OnDeleteEvent(folder, "file.txt", false, now.Add(time.Duration(i)*time.Second))
}
// Verify queue length
if cleaner.GetPendingCleanupCount() != 3 {
t.Errorf("expected 3 queued folders, got %d", cleaner.GetPendingCleanupCount())
}
// Verify time-sorted order by popping
for i, expected := range folders {
folder, ok := cleaner.cleanupQueue.Pop()
if !ok || folder != expected {
t.Errorf("expected folder %s at index %d, got %s", expected, i, folder)
}
}
cleaner.Stop()
}

8
weed/filer/filer.go

@ -11,6 +11,7 @@ import (
"github.com/seaweedfs/seaweedfs/weed/s3api/s3bucket"
"github.com/seaweedfs/seaweedfs/weed/cluster/lock_manager"
"github.com/seaweedfs/seaweedfs/weed/filer/empty_folder_cleanup"
"github.com/seaweedfs/seaweedfs/weed/cluster"
"github.com/seaweedfs/seaweedfs/weed/pb"
@ -56,6 +57,7 @@ type Filer struct {
MaxFilenameLength uint32
deletionQuit chan struct{}
DeletionRetryQueue *DeletionRetryQueue
EmptyFolderCleaner *empty_folder_cleanup.EmptyFolderCleaner
}
func NewFiler(masters pb.ServerDiscovery, grpcDialOption grpc.DialOption, filerHost pb.ServerAddress, filerGroup string, collection string, replication string, dataCenter string, maxFilenameLength uint32, notifyFn func()) *Filer {
@ -116,6 +118,9 @@ func (f *Filer) AggregateFromPeers(self pb.ServerAddress, existingNodes []*maste
f.Dlm.LockRing.SetSnapshot(snapshot)
glog.V(0).Infof("%s aggregate from peers %+v", self, snapshot)
// Initialize the empty folder cleaner using the same LockRing as Dlm for consistent hashing
f.EmptyFolderCleaner = empty_folder_cleanup.NewEmptyFolderCleaner(f, f.Dlm.LockRing, self, f.DirBucketsPath)
f.MetaAggregator = NewMetaAggregator(f, self, f.GrpcDialOption)
f.MasterClient.SetOnPeerUpdateFn(func(update *master_pb.ClusterNodeUpdate, startFrom time.Time) {
if update.NodeType != cluster.FilerType {
@ -506,6 +511,9 @@ func (f *Filer) IsDirectoryEmpty(ctx context.Context, dirPath util.FullPath) (bo
func (f *Filer) Shutdown() {
close(f.deletionQuit)
if f.EmptyFolderCleaner != nil {
f.EmptyFolderCleaner.Stop()
}
f.LocalMetaLogBuffer.ShutdownLogBuffer()
f.Store.Shutdown()
}

39
weed/filer/filer_notify.go

@ -66,6 +66,10 @@ func (f *Filer) NotifyUpdateEvent(ctx context.Context, oldEntry, newEntry *Entry
f.logMetaEvent(ctx, fullpath, eventNotification)
// Trigger empty folder cleanup for local events
// Remote events are handled via MetaAggregator.onMetadataChangeEvent
f.triggerLocalEmptyFolderCleanup(oldEntry, newEntry)
}
func (f *Filer) logMetaEvent(ctx context.Context, fullpath string, eventNotification *filer_pb.EventNotification) {
@ -89,6 +93,41 @@ func (f *Filer) logMetaEvent(ctx context.Context, fullpath string, eventNotifica
}
// triggerLocalEmptyFolderCleanup triggers empty folder cleanup for local events
// This is needed because onMetadataChangeEvent is only called for remote peer events
func (f *Filer) triggerLocalEmptyFolderCleanup(oldEntry, newEntry *Entry) {
if f.EmptyFolderCleaner == nil || !f.EmptyFolderCleaner.IsEnabled() {
return
}
eventTime := time.Now()
// Handle delete events (oldEntry exists, newEntry is nil)
if oldEntry != nil && newEntry == nil {
dir, name := oldEntry.FullPath.DirAndName()
f.EmptyFolderCleaner.OnDeleteEvent(dir, name, oldEntry.IsDirectory(), eventTime)
}
// Handle create events (oldEntry is nil, newEntry exists)
if oldEntry == nil && newEntry != nil {
dir, name := newEntry.FullPath.DirAndName()
f.EmptyFolderCleaner.OnCreateEvent(dir, name, newEntry.IsDirectory())
}
// Handle rename/move events (both exist but paths differ)
if oldEntry != nil && newEntry != nil {
oldDir, oldName := oldEntry.FullPath.DirAndName()
newDir, newName := newEntry.FullPath.DirAndName()
if oldDir != newDir || oldName != newName {
// Treat old location as delete
f.EmptyFolderCleaner.OnDeleteEvent(oldDir, oldName, oldEntry.IsDirectory(), eventTime)
// Treat new location as create
f.EmptyFolderCleaner.OnCreateEvent(newDir, newName, newEntry.IsDirectory())
}
}
}
func (f *Filer) logFlushFunc(logBuffer *log_buffer.LogBuffer, startTime, stopTime time.Time, buf []byte, minOffset, maxOffset int64) {
if len(buf) == 0 {

39
weed/filer/filer_on_meta_event.go

@ -2,6 +2,7 @@ package filer
import (
"bytes"
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
@ -13,6 +14,7 @@ func (f *Filer) onMetadataChangeEvent(event *filer_pb.SubscribeMetadataResponse)
f.maybeReloadFilerConfiguration(event)
f.maybeReloadRemoteStorageConfigurationAndMapping(event)
f.onBucketEvents(event)
f.onEmptyFolderCleanupEvents(event)
}
func (f *Filer) onBucketEvents(event *filer_pb.SubscribeMetadataResponse) {
@ -32,6 +34,43 @@ func (f *Filer) onBucketEvents(event *filer_pb.SubscribeMetadataResponse) {
}
}
// onEmptyFolderCleanupEvents handles create/delete events for empty folder cleanup
func (f *Filer) onEmptyFolderCleanupEvents(event *filer_pb.SubscribeMetadataResponse) {
if f.EmptyFolderCleaner == nil || !f.EmptyFolderCleaner.IsEnabled() {
return
}
message := event.EventNotification
directory := event.Directory
eventTime := time.Unix(0, event.TsNs)
// Handle delete events - trigger folder cleanup check
if filer_pb.IsDelete(event) && message.OldEntry != nil {
f.EmptyFolderCleaner.OnDeleteEvent(directory, message.OldEntry.Name, message.OldEntry.IsDirectory, eventTime)
}
// Handle create events - cancel pending cleanup for the folder
if filer_pb.IsCreate(event) && message.NewEntry != nil {
f.EmptyFolderCleaner.OnCreateEvent(directory, message.NewEntry.Name, message.NewEntry.IsDirectory)
}
// Handle rename/move events
if filer_pb.IsRename(event) {
// Treat the old location as a delete
if message.OldEntry != nil {
f.EmptyFolderCleaner.OnDeleteEvent(directory, message.OldEntry.Name, message.OldEntry.IsDirectory, eventTime)
}
// Treat the new location as a create
if message.NewEntry != nil {
newDir := message.NewParentPath
if newDir == "" {
newDir = directory
}
f.EmptyFolderCleaner.OnCreateEvent(newDir, message.NewEntry.Name, message.NewEntry.IsDirectory)
}
}
}
func (f *Filer) maybeReloadFilerConfiguration(event *filer_pb.SubscribeMetadataResponse) {
if DirectoryEtcSeaweedFS != event.Directory {
if DirectoryEtcSeaweedFS != event.EventNotification.NewParentPath {

13
weed/filer/filer_search.go

@ -41,6 +41,19 @@ func (f *Filer) ListDirectoryEntries(ctx context.Context, p util.FullPath, start
return entries, hasMore, err
}
// CountDirectoryEntries counts entries in a directory up to limit
func (f *Filer) CountDirectoryEntries(ctx context.Context, p util.FullPath, limit int) (count int, err error) {
entries, hasMore, err := f.ListDirectoryEntries(ctx, p, "", false, int64(limit), "", "", "")
if err != nil {
return 0, err
}
count = len(entries)
if hasMore {
count = limit // At least this many
}
return count, nil
}
// For now, prefix and namePattern are mutually exclusive
func (f *Filer) StreamListDirectoryEntries(ctx context.Context, p util.FullPath, startFileName string, inclusive bool, limit int64, prefix string, namePattern string, namePatternExclude string, eachEntryFunc ListEachEntryFunc) (lastFileName string, err error) {
if strings.HasSuffix(string(p), "/") && len(p) > 1 {

132
weed/filer/reader_at.go

@ -7,6 +7,8 @@ import (
"math/rand"
"sync"
"golang.org/x/sync/errgroup"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
@ -19,6 +21,11 @@ import (
// the prefetch count is derived from the -concurrentReaders option.
const DefaultPrefetchCount = 4
// minReadConcurrency is the minimum number of parallel chunk fetches.
// This ensures at least some parallelism even when prefetchCount is low,
// improving throughput for reads spanning multiple chunks.
const minReadConcurrency = 4
type ChunkReadAt struct {
masterClient *wdclient.MasterClient
chunkViews *IntervalList[*ChunkView]
@ -175,67 +182,139 @@ func (c *ChunkReadAt) ReadAtWithTime(ctx context.Context, p []byte, offset int64
return c.doReadAt(ctx, p, offset)
}
// chunkReadTask represents a single chunk read operation for parallel processing
type chunkReadTask struct {
chunk *ChunkView
bufferStart int64 // start position in the output buffer
bufferEnd int64 // end position in the output buffer
chunkOffset uint64 // offset within the chunk to read from
bytesRead int
modifiedTsNs int64
}
func (c *ChunkReadAt) doReadAt(ctx context.Context, p []byte, offset int64) (n int, ts int64, err error) {
// Collect all chunk read tasks
var tasks []*chunkReadTask
var gaps []struct{ start, length int64 } // gaps that need zero-filling
startOffset, remaining := offset, int64(len(p))
var nextChunks *Interval[*ChunkView]
var lastChunk *Interval[*ChunkView]
for x := c.chunkViews.Front(); x != nil; x = x.Next {
chunk := x.Value
if remaining <= 0 {
break
}
if x.Next != nil {
nextChunks = x.Next
}
lastChunk = x
// Handle gap before this chunk
if startOffset < chunk.ViewOffset {
gap := chunk.ViewOffset - startOffset
glog.V(4).Infof("zero [%d,%d)", startOffset, chunk.ViewOffset)
n += zero(p, startOffset-offset, gap)
gaps = append(gaps, struct{ start, length int64 }{startOffset - offset, gap})
startOffset, remaining = chunk.ViewOffset, remaining-gap
if remaining <= 0 {
break
}
}
// fmt.Printf(">>> doReadAt [%d,%d), chunk[%d,%d)\n", offset, offset+int64(len(p)), chunk.ViewOffset, chunk.ViewOffset+int64(chunk.ViewSize))
chunkStart, chunkStop := max(chunk.ViewOffset, startOffset), min(chunk.ViewOffset+int64(chunk.ViewSize), startOffset+remaining)
if chunkStart >= chunkStop {
continue
}
// glog.V(4).Infof("read [%d,%d), %d/%d chunk %s [%d,%d)", chunkStart, chunkStop, i, len(c.chunkViews), chunk.FileId, chunk.ViewOffset-chunk.Offset, chunk.ViewOffset-chunk.Offset+int64(chunk.ViewSize))
bufferOffset := chunkStart - chunk.ViewOffset + chunk.OffsetInChunk
ts = chunk.ModifiedTsNs
copied, err := c.readChunkSliceAt(ctx, p[startOffset-offset:chunkStop-chunkStart+startOffset-offset], chunk, nextChunks, uint64(bufferOffset))
if err != nil {
glog.Errorf("fetching chunk %+v: %v\n", chunk, err)
return copied, ts, err
tasks = append(tasks, &chunkReadTask{
chunk: chunk,
bufferStart: startOffset - offset,
bufferEnd: chunkStop - chunkStart + startOffset - offset,
chunkOffset: uint64(bufferOffset),
})
startOffset, remaining = chunkStop, remaining-(chunkStop-chunkStart)
}
// Zero-fill gaps
for _, gap := range gaps {
glog.V(4).Infof("zero [%d,%d)", offset+gap.start, offset+gap.start+gap.length)
n += zero(p, gap.start, gap.length)
}
// If only one chunk or random access mode, use sequential reading
if len(tasks) <= 1 || c.readerPattern.IsRandomMode() {
for _, task := range tasks {
copied, readErr := c.readChunkSliceAt(ctx, p[task.bufferStart:task.bufferEnd], task.chunk, nil, task.chunkOffset)
ts = max(ts, task.chunk.ModifiedTsNs)
if readErr != nil {
glog.Errorf("fetching chunk %+v: %v\n", task.chunk, readErr)
return n + copied, ts, readErr
}
n += copied
startOffset, remaining = startOffset+int64(copied), remaining-int64(copied)
}
} else {
// Parallel chunk fetching for multiple chunks
// This significantly improves throughput when chunks are on different volume servers
g, gCtx := errgroup.WithContext(ctx)
// Limit concurrency to avoid overwhelming the system
concurrency := c.prefetchCount
if concurrency < minReadConcurrency {
concurrency = minReadConcurrency
}
if concurrency > len(tasks) {
concurrency = len(tasks)
}
g.SetLimit(concurrency)
for _, task := range tasks {
g.Go(func() error {
// Read directly into the correct position in the output buffer
copied, readErr := c.readChunkSliceAtForParallel(gCtx, p[task.bufferStart:task.bufferEnd], task.chunk, task.chunkOffset)
task.bytesRead = copied
task.modifiedTsNs = task.chunk.ModifiedTsNs
return readErr
})
}
// Wait for all chunk reads to complete
if waitErr := g.Wait(); waitErr != nil {
err = waitErr
}
// Aggregate results (order is preserved since we read directly into buffer positions)
for _, task := range tasks {
n += task.bytesRead
ts = max(ts, task.modifiedTsNs)
}
if err != nil {
return n, ts, err
}
}
// glog.V(4).Infof("doReadAt [%d,%d), n:%v, err:%v", offset, offset+int64(len(p)), n, err)
// Trigger prefetch for sequential reads
if lastChunk != nil && lastChunk.Next != nil && c.prefetchCount > 0 && !c.readerPattern.IsRandomMode() {
c.readerCache.MaybeCache(lastChunk.Next, c.prefetchCount)
}
// zero the remaining bytes if a gap exists at the end of the last chunk (or a fully sparse file)
if err == nil && remaining > 0 {
// Zero the remaining bytes if a gap exists at the end
if remaining > 0 {
var delta int64
if c.fileSize >= startOffset {
delta = min(remaining, c.fileSize-startOffset)
startOffset -= offset
}
bufStart := startOffset - offset
if delta > 0 {
glog.V(4).Infof("zero2 [%d,%d) of file size %d bytes", startOffset, startOffset+delta, c.fileSize)
n += zero(p, startOffset, delta)
n += zero(p, bufStart, delta)
}
}
}
if err == nil && offset+int64(len(p)) >= c.fileSize {
err = io.EOF
}
// fmt.Printf("~~~ filled %d, err: %v\n\n", n, err)
return
}
func (c *ChunkReadAt) readChunkSliceAt(ctx context.Context, buffer []byte, chunkView *ChunkView, nextChunkViews *Interval[*ChunkView], offset uint64) (n int, err error) {
@ -249,7 +328,7 @@ func (c *ChunkReadAt) readChunkSliceAt(ctx context.Context, buffer []byte, chunk
}
shouldCache := (uint64(chunkView.ViewOffset) + chunkView.ChunkSize) <= c.readerCache.chunkCache.GetMaxFilePartSizeInCache()
n, err = c.readerCache.ReadChunkAt(buffer, chunkView.FileId, chunkView.CipherKey, chunkView.IsGzipped, int64(offset), int(chunkView.ChunkSize), shouldCache)
n, err = c.readerCache.ReadChunkAt(ctx, buffer, chunkView.FileId, chunkView.CipherKey, chunkView.IsGzipped, int64(offset), int(chunkView.ChunkSize), shouldCache)
if c.lastChunkFid != chunkView.FileId {
if chunkView.OffsetInChunk == 0 { // start of a new chunk
if c.lastChunkFid != "" {
@ -266,6 +345,13 @@ func (c *ChunkReadAt) readChunkSliceAt(ctx context.Context, buffer []byte, chunk
return
}
// readChunkSliceAtForParallel is a simplified version for parallel chunk fetching
// It doesn't update lastChunkFid or trigger prefetch (handled by the caller)
func (c *ChunkReadAt) readChunkSliceAtForParallel(ctx context.Context, buffer []byte, chunkView *ChunkView, offset uint64) (n int, err error) {
shouldCache := (uint64(chunkView.ViewOffset) + chunkView.ChunkSize) <= c.readerCache.chunkCache.GetMaxFilePartSizeInCache()
return c.readerCache.ReadChunkAt(ctx, buffer, chunkView.FileId, chunkView.CipherKey, chunkView.IsGzipped, int64(offset), int(chunkView.ChunkSize), shouldCache)
}
func zero(buffer []byte, start, length int64) int {
if length <= 0 {
return 0

78
weed/filer/reader_cache.go

@ -35,6 +35,7 @@ type SingleChunkCacher struct {
shouldCache bool
wg sync.WaitGroup
cacheStartedCh chan struct{}
done chan struct{} // signals when download is complete
}
func NewReaderCache(limit int, chunkCache chunk_cache.ChunkCache, lookupFileIdFn wdclient.LookupFileIdFunctionType) *ReaderCache {
@ -93,14 +94,18 @@ func (rc *ReaderCache) MaybeCache(chunkViews *Interval[*ChunkView], count int) {
return
}
func (rc *ReaderCache) ReadChunkAt(buffer []byte, fileId string, cipherKey []byte, isGzipped bool, offset int64, chunkSize int, shouldCache bool) (int, error) {
func (rc *ReaderCache) ReadChunkAt(ctx context.Context, buffer []byte, fileId string, cipherKey []byte, isGzipped bool, offset int64, chunkSize int, shouldCache bool) (int, error) {
rc.Lock()
if cacher, found := rc.downloaders[fileId]; found {
if n, err := cacher.readChunkAt(buffer, offset); n != 0 && err == nil {
rc.Unlock()
n, err := cacher.readChunkAt(ctx, buffer, offset)
if n > 0 || err != nil {
return n, err
}
// If n=0 and err=nil, the cacher couldn't provide data for this offset.
// Fall through to try chunkCache.
rc.Lock()
}
if shouldCache || rc.lookupFileIdFn == nil {
n, err := rc.chunkCache.ReadChunkAt(buffer, fileId, uint64(offset))
@ -134,7 +139,7 @@ func (rc *ReaderCache) ReadChunkAt(buffer []byte, fileId string, cipherKey []byt
rc.downloaders[fileId] = cacher
rc.Unlock()
return cacher.readChunkAt(buffer, offset)
return cacher.readChunkAt(ctx, buffer, offset)
}
func (rc *ReaderCache) UnCache(fileId string) {
@ -166,38 +171,53 @@ func newSingleChunkCacher(parent *ReaderCache, fileId string, cipherKey []byte,
chunkSize: chunkSize,
shouldCache: shouldCache,
cacheStartedCh: make(chan struct{}),
done: make(chan struct{}),
}
}
// startCaching downloads the chunk data in the background.
// It does NOT hold the lock during the HTTP download to allow concurrent readers
// to wait efficiently using the done channel.
func (s *SingleChunkCacher) startCaching() {
s.wg.Add(1)
defer s.wg.Done()
s.Lock()
defer s.Unlock()
defer close(s.done) // guarantee completion signal even on panic
s.cacheStartedCh <- struct{}{} // means this has been started
s.cacheStartedCh <- struct{}{} // signal that we've started
// Note: We intentionally use context.Background() here, NOT a request-specific context.
// The downloaded chunk is a shared resource - multiple concurrent readers may be waiting
// for this same download to complete. If we used a request context and that request was
// cancelled, it would abort the download and cause errors for all other waiting readers.
// The download should always complete once started to serve all potential consumers.
// Lookup file ID without holding the lock
urlStrings, err := s.parent.lookupFileIdFn(context.Background(), s.chunkFileId)
if err != nil {
s.Lock()
s.err = fmt.Errorf("operation LookupFileId %s failed, err: %v", s.chunkFileId, err)
s.Unlock()
return
}
s.data = mem.Allocate(s.chunkSize)
_, s.err = util_http.RetriedFetchChunkData(context.Background(), s.data, urlStrings, s.cipherKey, s.isGzipped, true, 0, s.chunkFileId)
if s.err != nil {
mem.Free(s.data)
s.data = nil
return
}
// Allocate buffer and download without holding the lock
// This allows multiple downloads to proceed in parallel
data := mem.Allocate(s.chunkSize)
_, fetchErr := util_http.RetriedFetchChunkData(context.Background(), data, urlStrings, s.cipherKey, s.isGzipped, true, 0, s.chunkFileId)
// Now acquire lock to update state
s.Lock()
if fetchErr != nil {
mem.Free(data)
s.err = fetchErr
} else {
s.data = data
if s.shouldCache {
s.parent.chunkCache.SetChunk(s.chunkFileId, s.data)
}
atomic.StoreInt64(&s.completedTimeNew, time.Now().UnixNano())
return
}
s.Unlock()
}
func (s *SingleChunkCacher) destroy() {
@ -209,13 +229,34 @@ func (s *SingleChunkCacher) destroy() {
if s.data != nil {
mem.Free(s.data)
s.data = nil
close(s.cacheStartedCh)
}
}
func (s *SingleChunkCacher) readChunkAt(buf []byte, offset int64) (int, error) {
// readChunkAt reads data from the cached chunk.
// It waits for the download to complete if it's still in progress.
// The ctx parameter allows the reader to cancel its wait (but the download continues
// for other readers - see comment in startCaching about shared resource semantics).
func (s *SingleChunkCacher) readChunkAt(ctx context.Context, buf []byte, offset int64) (int, error) {
s.wg.Add(1)
defer s.wg.Done()
// Wait for download to complete, but allow reader cancellation.
// Prioritize checking done first - if data is already available,
// return it even if context is also cancelled.
select {
case <-s.done:
// Download already completed, proceed immediately
default:
// Download not complete, wait for it or context cancellation
select {
case <-s.done:
// Download completed
case <-ctx.Done():
// Reader cancelled while waiting - download continues for other readers
return 0, ctx.Err()
}
}
s.Lock()
defer s.Unlock()
@ -228,5 +269,4 @@ func (s *SingleChunkCacher) readChunkAt(buf []byte, offset int64) (int, error) {
}
return copy(buf, s.data[offset:]), nil
}

505
weed/filer/reader_cache_test.go

@ -0,0 +1,505 @@
package filer
import (
"context"
"fmt"
"sync"
"sync/atomic"
"testing"
"time"
)
// mockChunkCacheForReaderCache implements chunk cache for testing
type mockChunkCacheForReaderCache struct {
data map[string][]byte
hitCount int32
mu sync.Mutex
}
func newMockChunkCacheForReaderCache() *mockChunkCacheForReaderCache {
return &mockChunkCacheForReaderCache{
data: make(map[string][]byte),
}
}
func (m *mockChunkCacheForReaderCache) GetChunk(fileId string, minSize uint64) []byte {
m.mu.Lock()
defer m.mu.Unlock()
if d, ok := m.data[fileId]; ok {
atomic.AddInt32(&m.hitCount, 1)
return d
}
return nil
}
func (m *mockChunkCacheForReaderCache) ReadChunkAt(data []byte, fileId string, offset uint64) (int, error) {
m.mu.Lock()
defer m.mu.Unlock()
if d, ok := m.data[fileId]; ok && int(offset) < len(d) {
atomic.AddInt32(&m.hitCount, 1)
n := copy(data, d[offset:])
return n, nil
}
return 0, nil
}
func (m *mockChunkCacheForReaderCache) SetChunk(fileId string, data []byte) {
m.mu.Lock()
defer m.mu.Unlock()
m.data[fileId] = data
}
func (m *mockChunkCacheForReaderCache) GetMaxFilePartSizeInCache() uint64 {
return 1024 * 1024 // 1MB
}
func (m *mockChunkCacheForReaderCache) IsInCache(fileId string, lockNeeded bool) bool {
m.mu.Lock()
defer m.mu.Unlock()
_, ok := m.data[fileId]
return ok
}
// TestReaderCacheContextCancellation tests that a reader can cancel its wait
// while the download continues for other readers
func TestReaderCacheContextCancellation(t *testing.T) {
cache := newMockChunkCacheForReaderCache()
// Create a ReaderCache - we can't easily test the full flow without mocking HTTP,
// but we can test the context cancellation in readChunkAt
rc := NewReaderCache(10, cache, nil)
defer rc.destroy()
// Pre-populate cache to avoid HTTP calls
testData := []byte("test data for context cancellation")
cache.SetChunk("test-file-1", testData)
// Test that context cancellation works
ctx, cancel := context.WithCancel(context.Background())
buffer := make([]byte, len(testData))
n, err := rc.ReadChunkAt(ctx, buffer, "test-file-1", nil, false, 0, len(testData), true)
if err != nil {
t.Errorf("Expected no error, got: %v", err)
}
if n != len(testData) {
t.Errorf("Expected %d bytes, got %d", len(testData), n)
}
// Cancel context and verify it doesn't affect already completed reads
cancel()
// Subsequent read with cancelled context should still work from cache
buffer2 := make([]byte, len(testData))
n2, err2 := rc.ReadChunkAt(ctx, buffer2, "test-file-1", nil, false, 0, len(testData), true)
// Note: This may or may not error depending on whether it hits cache
_ = n2
_ = err2
}
// TestReaderCacheFallbackToChunkCache tests that when a cacher returns n=0, err=nil,
// we fall back to the chunkCache
func TestReaderCacheFallbackToChunkCache(t *testing.T) {
cache := newMockChunkCacheForReaderCache()
// Pre-populate the chunk cache with data
testData := []byte("fallback test data that should be found in chunk cache")
cache.SetChunk("fallback-file", testData)
rc := NewReaderCache(10, cache, nil)
defer rc.destroy()
// Read should hit the chunk cache
buffer := make([]byte, len(testData))
n, err := rc.ReadChunkAt(context.Background(), buffer, "fallback-file", nil, false, 0, len(testData), true)
if err != nil {
t.Errorf("Expected no error, got: %v", err)
}
if n != len(testData) {
t.Errorf("Expected %d bytes, got %d", len(testData), n)
}
// Verify cache was hit
if cache.hitCount == 0 {
t.Error("Expected chunk cache to be hit")
}
}
// TestReaderCacheMultipleReadersWaitForSameChunk tests that multiple readers
// can wait for the same chunk download to complete
func TestReaderCacheMultipleReadersWaitForSameChunk(t *testing.T) {
cache := newMockChunkCacheForReaderCache()
// Pre-populate cache so we don't need HTTP
testData := make([]byte, 1024)
for i := range testData {
testData[i] = byte(i % 256)
}
cache.SetChunk("shared-chunk", testData)
rc := NewReaderCache(10, cache, nil)
defer rc.destroy()
// Launch multiple concurrent readers for the same chunk
numReaders := 10
var wg sync.WaitGroup
errors := make(chan error, numReaders)
bytesRead := make(chan int, numReaders)
for i := 0; i < numReaders; i++ {
wg.Add(1)
go func() {
defer wg.Done()
buffer := make([]byte, len(testData))
n, err := rc.ReadChunkAt(context.Background(), buffer, "shared-chunk", nil, false, 0, len(testData), true)
if err != nil {
errors <- err
}
bytesRead <- n
}()
}
wg.Wait()
close(errors)
close(bytesRead)
// Check for errors
for err := range errors {
t.Errorf("Reader got error: %v", err)
}
// Verify all readers got the expected data
for n := range bytesRead {
if n != len(testData) {
t.Errorf("Expected %d bytes, got %d", len(testData), n)
}
}
}
// TestReaderCachePartialRead tests reading at different offsets
func TestReaderCachePartialRead(t *testing.T) {
cache := newMockChunkCacheForReaderCache()
testData := []byte("0123456789ABCDEFGHIJ")
cache.SetChunk("partial-read-file", testData)
rc := NewReaderCache(10, cache, nil)
defer rc.destroy()
tests := []struct {
name string
offset int64
size int
expected []byte
}{
{"read from start", 0, 5, []byte("01234")},
{"read from middle", 5, 5, []byte("56789")},
{"read to end", 15, 5, []byte("FGHIJ")},
{"read single byte", 10, 1, []byte("A")},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
buffer := make([]byte, tt.size)
n, err := rc.ReadChunkAt(context.Background(), buffer, "partial-read-file", nil, false, tt.offset, len(testData), true)
if err != nil {
t.Errorf("Expected no error, got: %v", err)
}
if n != tt.size {
t.Errorf("Expected %d bytes, got %d", tt.size, n)
}
if string(buffer[:n]) != string(tt.expected) {
t.Errorf("Expected %q, got %q", tt.expected, buffer[:n])
}
})
}
}
// TestReaderCacheCleanup tests that old downloaders are cleaned up
func TestReaderCacheCleanup(t *testing.T) {
cache := newMockChunkCacheForReaderCache()
// Create cache with limit of 3
rc := NewReaderCache(3, cache, nil)
defer rc.destroy()
// Add data for multiple files
for i := 0; i < 5; i++ {
fileId := string(rune('A' + i))
data := []byte("data for file " + fileId)
cache.SetChunk(fileId, data)
}
// Read from multiple files - should trigger cleanup when exceeding limit
for i := 0; i < 5; i++ {
fileId := string(rune('A' + i))
buffer := make([]byte, 20)
_, err := rc.ReadChunkAt(context.Background(), buffer, fileId, nil, false, 0, 20, true)
if err != nil {
t.Errorf("Read error for file %s: %v", fileId, err)
}
}
// Cache should still work - reads should succeed
for i := 0; i < 5; i++ {
fileId := string(rune('A' + i))
buffer := make([]byte, 20)
n, err := rc.ReadChunkAt(context.Background(), buffer, fileId, nil, false, 0, 20, true)
if err != nil {
t.Errorf("Second read error for file %s: %v", fileId, err)
}
if n == 0 {
t.Errorf("Expected data for file %s, got 0 bytes", fileId)
}
}
}
// TestSingleChunkCacherDoneSignal tests that done channel is always closed
func TestSingleChunkCacherDoneSignal(t *testing.T) {
cache := newMockChunkCacheForReaderCache()
rc := NewReaderCache(10, cache, nil)
defer rc.destroy()
// Test that we can read even when data is in cache (done channel should work)
testData := []byte("done signal test")
cache.SetChunk("done-signal-test", testData)
// Multiple goroutines reading same chunk
var wg sync.WaitGroup
for i := 0; i < 5; i++ {
wg.Add(1)
go func() {
defer wg.Done()
buffer := make([]byte, len(testData))
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
n, err := rc.ReadChunkAt(ctx, buffer, "done-signal-test", nil, false, 0, len(testData), true)
if err != nil && err != context.DeadlineExceeded {
t.Errorf("Unexpected error: %v", err)
}
if n == 0 && err == nil {
t.Error("Got 0 bytes with no error")
}
}()
}
// Should complete without hanging
done := make(chan struct{})
go func() {
wg.Wait()
close(done)
}()
select {
case <-done:
// Success
case <-time.After(10 * time.Second):
t.Fatal("Test timed out - done channel may not be signaled correctly")
}
}
// ============================================================================
// Tests that exercise SingleChunkCacher concurrency logic
// ============================================================================
//
// These tests use blocking lookupFileIdFn to exercise the wait/cancellation
// logic in SingleChunkCacher without requiring HTTP calls.
// TestSingleChunkCacherLookupError tests handling of lookup errors
func TestSingleChunkCacherLookupError(t *testing.T) {
cache := newMockChunkCacheForReaderCache()
// Lookup function that returns an error
lookupFn := func(ctx context.Context, fileId string) ([]string, error) {
return nil, fmt.Errorf("lookup failed for %s", fileId)
}
rc := NewReaderCache(10, cache, lookupFn)
defer rc.destroy()
buffer := make([]byte, 100)
_, err := rc.ReadChunkAt(context.Background(), buffer, "error-test", nil, false, 0, 100, true)
if err == nil {
t.Error("Expected an error, got nil")
}
}
// TestSingleChunkCacherContextCancellationDuringLookup tests that a reader can
// cancel its wait while the lookup is in progress. This exercises the actual
// SingleChunkCacher wait/cancel logic.
func TestSingleChunkCacherContextCancellationDuringLookup(t *testing.T) {
cache := newMockChunkCacheForReaderCache()
lookupStarted := make(chan struct{})
lookupCanFinish := make(chan struct{})
// Lookup function that blocks to simulate slow operation
lookupFn := func(ctx context.Context, fileId string) ([]string, error) {
close(lookupStarted)
<-lookupCanFinish // Block until test allows completion
return nil, fmt.Errorf("lookup completed but reader should have cancelled")
}
rc := NewReaderCache(10, cache, lookupFn)
defer rc.destroy()
defer close(lookupCanFinish) // Ensure cleanup
ctx, cancel := context.WithCancel(context.Background())
readResult := make(chan error, 1)
go func() {
buffer := make([]byte, 100)
_, err := rc.ReadChunkAt(ctx, buffer, "cancel-during-lookup", nil, false, 0, 100, true)
readResult <- err
}()
// Wait for lookup to start, then cancel the reader's context
select {
case <-lookupStarted:
cancel() // Cancel the reader while lookup is blocked
case <-time.After(5 * time.Second):
t.Fatal("Lookup never started")
}
// Read should return with context.Canceled
select {
case err := <-readResult:
if err != context.Canceled {
t.Errorf("Expected context.Canceled, got: %v", err)
}
case <-time.After(5 * time.Second):
t.Fatal("Read did not complete after context cancellation")
}
}
// TestSingleChunkCacherMultipleReadersWaitForDownload tests that multiple readers
// can wait for the same SingleChunkCacher download to complete. When lookup fails,
// all readers should receive the same error.
func TestSingleChunkCacherMultipleReadersWaitForDownload(t *testing.T) {
cache := newMockChunkCacheForReaderCache()
lookupStarted := make(chan struct{})
lookupCanFinish := make(chan struct{})
var lookupStartedOnce sync.Once
// Lookup function that blocks to simulate slow operation
lookupFn := func(ctx context.Context, fileId string) ([]string, error) {
lookupStartedOnce.Do(func() { close(lookupStarted) })
<-lookupCanFinish
return nil, fmt.Errorf("simulated lookup error")
}
rc := NewReaderCache(10, cache, lookupFn)
defer rc.destroy()
numReaders := 5
var wg sync.WaitGroup
errors := make(chan error, numReaders)
// Start multiple readers for the same chunk
for i := 0; i < numReaders; i++ {
wg.Add(1)
go func() {
defer wg.Done()
buffer := make([]byte, 100)
_, err := rc.ReadChunkAt(context.Background(), buffer, "shared-chunk", nil, false, 0, 100, true)
errors <- err
}()
}
// Wait for lookup to start, then allow completion
select {
case <-lookupStarted:
close(lookupCanFinish)
case <-time.After(5 * time.Second):
close(lookupCanFinish)
t.Fatal("Lookup never started")
}
wg.Wait()
close(errors)
// All readers should receive an error
errorCount := 0
for err := range errors {
if err != nil {
errorCount++
}
}
if errorCount != numReaders {
t.Errorf("Expected %d errors, got %d", numReaders, errorCount)
}
}
// TestSingleChunkCacherOneReaderCancelsOthersContinue tests that when one reader
// cancels, other readers waiting on the same chunk continue to wait.
func TestSingleChunkCacherOneReaderCancelsOthersContinue(t *testing.T) {
cache := newMockChunkCacheForReaderCache()
lookupStarted := make(chan struct{})
lookupCanFinish := make(chan struct{})
var lookupStartedOnce sync.Once
lookupFn := func(ctx context.Context, fileId string) ([]string, error) {
lookupStartedOnce.Do(func() { close(lookupStarted) })
<-lookupCanFinish
return nil, fmt.Errorf("simulated error after delay")
}
rc := NewReaderCache(10, cache, lookupFn)
defer rc.destroy()
cancelledReaderDone := make(chan error, 1)
otherReaderDone := make(chan error, 1)
ctx, cancel := context.WithCancel(context.Background())
// Start reader that will be cancelled
go func() {
buffer := make([]byte, 100)
_, err := rc.ReadChunkAt(ctx, buffer, "shared-chunk-2", nil, false, 0, 100, true)
cancelledReaderDone <- err
}()
// Start reader that will NOT be cancelled
go func() {
buffer := make([]byte, 100)
_, err := rc.ReadChunkAt(context.Background(), buffer, "shared-chunk-2", nil, false, 0, 100, true)
otherReaderDone <- err
}()
// Wait for lookup to start
select {
case <-lookupStarted:
case <-time.After(5 * time.Second):
t.Fatal("Lookup never started")
}
// Cancel the first reader
cancel()
// First reader should complete with context.Canceled quickly
select {
case err := <-cancelledReaderDone:
if err != context.Canceled {
t.Errorf("Cancelled reader: expected context.Canceled, got: %v", err)
}
case <-time.After(2 * time.Second):
t.Error("Cancelled reader did not complete quickly")
}
// Allow the download to complete
close(lookupCanFinish)
// Other reader should eventually complete (with error since lookup returns error)
select {
case err := <-otherReaderDone:
if err == nil || err == context.Canceled {
t.Errorf("Other reader: expected non-nil non-cancelled error, got: %v", err)
}
// Expected: "simulated error after delay"
case <-time.After(5 * time.Second):
t.Error("Other reader did not complete")
}
}

6
weed/operation/upload_content.go

@ -90,7 +90,6 @@ func (uploadResult *UploadResult) ToPbFileChunkWithSSE(fileId string, offset int
}
var (
fileNameEscaper = strings.NewReplacer(`\`, `\\`, `"`, `\"`, "\n", "")
uploader *Uploader
uploaderErr error
once sync.Once
@ -336,8 +335,9 @@ func (uploader *Uploader) upload_content(ctx context.Context, fillBufferFunction
body_writer = multipart.NewWriter(option.BytesBuffer)
}
h := make(textproto.MIMEHeader)
filename := fileNameEscaper.Replace(option.Filename)
h.Set("Content-Disposition", fmt.Sprintf(`form-data; name="file"; filename="%s"`, filename))
// Use mime.FormatMediaType for RFC 6266 compliant Content-Disposition,
// properly handling non-ASCII characters and special characters
h.Set("Content-Disposition", mime.FormatMediaType("form-data", map[string]string{"name": "file", "filename": option.Filename}))
h.Set("Idempotency-Key", option.UploadUrl)
if option.MimeType == "" {
option.MimeType = mime.TypeByExtension(strings.ToLower(filepath.Ext(option.Filename)))

2
weed/pb/master.proto

@ -81,6 +81,7 @@ message Heartbeat {
map<string, uint32> max_volume_counts = 4;
uint32 grpc_port = 20;
repeated string location_uuids = 21;
string id = 22; // volume server id, independent of ip:port for stable identification
}
message HeartbeatResponse {
@ -289,6 +290,7 @@ message DataNodeInfo {
string id = 1;
map<string, DiskInfo> diskInfos = 2;
uint32 grpc_port = 3;
string address = 4; // ip:port for connecting to the volume server
}
message RackInfo {
string id = 1;

26
weed/pb/master_pb/master.pb.go

@ -44,6 +44,7 @@ type Heartbeat struct {
MaxVolumeCounts map[string]uint32 `protobuf:"bytes,4,rep,name=max_volume_counts,json=maxVolumeCounts,proto3" json:"max_volume_counts,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"varint,2,opt,name=value"`
GrpcPort uint32 `protobuf:"varint,20,opt,name=grpc_port,json=grpcPort,proto3" json:"grpc_port,omitempty"`
LocationUuids []string `protobuf:"bytes,21,rep,name=location_uuids,json=locationUuids,proto3" json:"location_uuids,omitempty"`
Id string `protobuf:"bytes,22,opt,name=id,proto3" json:"id,omitempty"` // volume server id, independent of ip:port for stable identification
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
@ -204,6 +205,13 @@ func (x *Heartbeat) GetLocationUuids() []string {
return nil
}
func (x *Heartbeat) GetId() string {
if x != nil {
return x.Id
}
return ""
}
type HeartbeatResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
VolumeSizeLimit uint64 `protobuf:"varint,1,opt,name=volume_size_limit,json=volumeSizeLimit,proto3" json:"volume_size_limit,omitempty"`
@ -2039,6 +2047,7 @@ type DataNodeInfo struct {
Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"`
DiskInfos map[string]*DiskInfo `protobuf:"bytes,2,rep,name=diskInfos,proto3" json:"diskInfos,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"`
GrpcPort uint32 `protobuf:"varint,3,opt,name=grpc_port,json=grpcPort,proto3" json:"grpc_port,omitempty"`
Address string `protobuf:"bytes,4,opt,name=address,proto3" json:"address,omitempty"` // ip:port for connecting to the volume server
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
@ -2094,6 +2103,13 @@ func (x *DataNodeInfo) GetGrpcPort() uint32 {
return 0
}
func (x *DataNodeInfo) GetAddress() string {
if x != nil {
return x.Address
}
return ""
}
type RackInfo struct {
state protoimpl.MessageState `protogen:"open.v1"`
Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"`
@ -4038,7 +4054,7 @@ var File_master_proto protoreflect.FileDescriptor
const file_master_proto_rawDesc = "" +
"\n" +
"\fmaster.proto\x12\tmaster_pb\"\xc0\a\n" +
"\fmaster.proto\x12\tmaster_pb\"\xd0\a\n" +
"\tHeartbeat\x12\x0e\n" +
"\x02ip\x18\x01 \x01(\tR\x02ip\x12\x12\n" +
"\x04port\x18\x02 \x01(\rR\x04port\x12\x1d\n" +
@ -4063,7 +4079,8 @@ const file_master_proto_rawDesc = "" +
"\x10has_no_ec_shards\x18\x13 \x01(\bR\rhasNoEcShards\x12U\n" +
"\x11max_volume_counts\x18\x04 \x03(\v2).master_pb.Heartbeat.MaxVolumeCountsEntryR\x0fmaxVolumeCounts\x12\x1b\n" +
"\tgrpc_port\x18\x14 \x01(\rR\bgrpcPort\x12%\n" +
"\x0elocation_uuids\x18\x15 \x03(\tR\rlocationUuids\x1aB\n" +
"\x0elocation_uuids\x18\x15 \x03(\tR\rlocationUuids\x12\x0e\n" +
"\x02id\x18\x16 \x01(\tR\x02id\x1aB\n" +
"\x14MaxVolumeCountsEntry\x12\x10\n" +
"\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n" +
"\x05value\x18\x02 \x01(\rR\x05value:\x028\x01\"\xcd\x02\n" +
@ -4254,11 +4271,12 @@ const file_master_proto_rawDesc = "" +
"\fvolume_infos\x18\x06 \x03(\v2#.master_pb.VolumeInformationMessageR\vvolumeInfos\x12P\n" +
"\x0eec_shard_infos\x18\a \x03(\v2*.master_pb.VolumeEcShardInformationMessageR\fecShardInfos\x12.\n" +
"\x13remote_volume_count\x18\b \x01(\x03R\x11remoteVolumeCount\x12\x17\n" +
"\adisk_id\x18\t \x01(\rR\x06diskId\"\xd4\x01\n" +
"\adisk_id\x18\t \x01(\rR\x06diskId\"\xee\x01\n" +
"\fDataNodeInfo\x12\x0e\n" +
"\x02id\x18\x01 \x01(\tR\x02id\x12D\n" +
"\tdiskInfos\x18\x02 \x03(\v2&.master_pb.DataNodeInfo.DiskInfosEntryR\tdiskInfos\x12\x1b\n" +
"\tgrpc_port\x18\x03 \x01(\rR\bgrpcPort\x1aQ\n" +
"\tgrpc_port\x18\x03 \x01(\rR\bgrpcPort\x12\x18\n" +
"\aaddress\x18\x04 \x01(\tR\aaddress\x1aQ\n" +
"\x0eDiskInfosEntry\x12\x10\n" +
"\x03key\x18\x01 \x01(\tR\x03key\x12)\n" +
"\x05value\x18\x02 \x01(\v2\x13.master_pb.DiskInfoR\x05value:\x028\x01\"\xf0\x01\n" +

12
weed/pb/server_address.go

@ -2,11 +2,12 @@ package pb
import (
"fmt"
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
"net"
"strconv"
"strings"
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
)
type ServerAddress string
@ -32,7 +33,12 @@ func NewServerAddressWithGrpcPort(address string, grpcPort int) ServerAddress {
}
func NewServerAddressFromDataNode(dn *master_pb.DataNodeInfo) ServerAddress {
return NewServerAddressWithGrpcPort(dn.Id, int(dn.GrpcPort))
// Use Address field if available (new behavior), fall back to Id for backward compatibility
addr := dn.Address
if addr == "" {
addr = dn.Id // backward compatibility: old nodes use ip:port as id
}
return NewServerAddressWithGrpcPort(addr, int(dn.GrpcPort))
}
func NewServerAddressFromLocation(dn *master_pb.Location) ServerAddress {

1
weed/s3api/auth_signature_v4.go

@ -55,6 +55,7 @@ func (iam *IdentityAccessManagement) reqSignatureV4Verify(r *http.Request) (*Ide
const (
emptySHA256 = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
streamingContentSHA256 = "STREAMING-AWS4-HMAC-SHA256-PAYLOAD"
streamingContentSHA256Trailer = "STREAMING-AWS4-HMAC-SHA256-PAYLOAD-TRAILER"
streamingUnsignedPayload = "STREAMING-UNSIGNED-PAYLOAD-TRAILER"
unsignedPayload = "UNSIGNED-PAYLOAD"
// Limit for IAM/STS request body size to prevent DoS attacks

10
weed/s3api/chunked_reader_v4.go

@ -53,8 +53,8 @@ func (iam *IdentityAccessManagement) calculateSeedSignature(r *http.Request) (cr
// This check ensures we only proceed for streaming uploads.
switch authInfo.HashedPayload {
case streamingContentSHA256:
glog.V(3).Infof("streaming content sha256")
case streamingContentSHA256, streamingContentSHA256Trailer:
glog.V(3).Infof("streaming content sha256 (with trailer: %v)", authInfo.HashedPayload == streamingContentSHA256Trailer)
case streamingUnsignedPayload:
glog.V(3).Infof("streaming unsigned payload")
default:
@ -87,9 +87,9 @@ func (iam *IdentityAccessManagement) newChunkedReader(req *http.Request) (io.Rea
var errCode s3err.ErrorCode
switch contentSha256Header {
// Payload for STREAMING signature should be 'STREAMING-AWS4-HMAC-SHA256-PAYLOAD'
case streamingContentSHA256:
glog.V(3).Infof("streaming content sha256")
// Payload for STREAMING signature should be 'STREAMING-AWS4-HMAC-SHA256-PAYLOAD' or 'STREAMING-AWS4-HMAC-SHA256-PAYLOAD-TRAILER'
case streamingContentSHA256, streamingContentSHA256Trailer:
glog.V(3).Infof("streaming content sha256 (with trailer: %v)", contentSha256Header == streamingContentSHA256Trailer)
credential, seedSignature, region, service, seedDate, errCode = iam.calculateSeedSignature(req)
if errCode != s3err.ErrNone {
return nil, errCode

144
weed/s3api/chunked_reader_v4_test.go

@ -234,6 +234,150 @@ func TestSignedStreamingUpload(t *testing.T) {
assert.Equal(t, chunk1Data+chunk2Data, string(data))
}
// createTrailerStreamingRequest creates a streaming upload request with trailer for testing.
// If useValidTrailerSignature is true, uses a correctly calculated trailer signature;
// otherwise uses an intentionally wrong signature for negative testing.
func createTrailerStreamingRequest(t *testing.T, useValidTrailerSignature bool) (*http.Request, string) {
chunk1Data := "hello world\n"
chunk1DataLen := len(chunk1Data)
chunk1DataLenHex := fmt.Sprintf("%x", chunk1DataLen)
// Use current time for signatures
now := time.Now().UTC()
amzDate := now.Format(iso8601Format)
dateStamp := now.Format(yyyymmdd)
// Calculate seed signature
scope := dateStamp + "/" + defaultRegion + "/s3/aws4_request"
// Build canonical request for seed signature
hashedPayload := "STREAMING-AWS4-HMAC-SHA256-PAYLOAD-TRAILER"
canonicalHeaders := "content-encoding:aws-chunked\n" +
"host:s3.amazonaws.com\n" +
"x-amz-content-sha256:" + hashedPayload + "\n" +
"x-amz-date:" + amzDate + "\n" +
fmt.Sprintf("x-amz-decoded-content-length:%d\n", chunk1DataLen) +
"x-amz-trailer:x-amz-checksum-crc32\n"
signedHeaders := "content-encoding;host;x-amz-content-sha256;x-amz-date;x-amz-decoded-content-length;x-amz-trailer"
canonicalRequest := "PUT\n" +
"/test-bucket/test-object\n" +
"\n" +
canonicalHeaders + "\n" +
signedHeaders + "\n" +
hashedPayload
canonicalRequestHash := getSHA256Hash([]byte(canonicalRequest))
stringToSign := "AWS4-HMAC-SHA256\n" + amzDate + "\n" + scope + "\n" + canonicalRequestHash
signingKey := getSigningKey(defaultSecretAccessKey, dateStamp, defaultRegion, "s3")
seedSignature := getSignature(signingKey, stringToSign)
// Calculate chunk signatures
chunk1Hash := getSHA256Hash([]byte(chunk1Data))
chunk1StringToSign := "AWS4-HMAC-SHA256-PAYLOAD\n" + amzDate + "\n" + scope + "\n" +
seedSignature + "\n" + emptySHA256 + "\n" + chunk1Hash
chunk1Signature := getSignature(signingKey, chunk1StringToSign)
// Final chunk (0 bytes)
finalStringToSign := "AWS4-HMAC-SHA256-PAYLOAD\n" + amzDate + "\n" + scope + "\n" +
chunk1Signature + "\n" + emptySHA256 + "\n" + emptySHA256
finalSignature := getSignature(signingKey, finalStringToSign)
// Calculate CRC32 checksum for trailer
crcWriter := crc32.NewIEEE()
_, crcErr := crcWriter.Write([]byte(chunk1Data))
assert.NoError(t, crcErr)
checksum := crcWriter.Sum(nil)
base64EncodedChecksum := base64.StdEncoding.EncodeToString(checksum)
// The on-wire trailer format uses \r\n (HTTP/aws-chunked convention)
trailerOnWire := "x-amz-checksum-crc32:" + base64EncodedChecksum + "\r\n"
// Calculate or use wrong trailer signature
var trailerSignature string
if useValidTrailerSignature {
// The canonical trailer content uses \n for signing (per AWS SigV4 spec)
trailerCanonical := "x-amz-checksum-crc32:" + base64EncodedChecksum + "\n"
trailerHash := getSHA256Hash([]byte(trailerCanonical))
trailerStringToSign := "AWS4-HMAC-SHA256-TRAILER\n" + amzDate + "\n" + scope + "\n" +
finalSignature + "\n" + trailerHash
trailerSignature = getSignature(signingKey, trailerStringToSign)
} else {
// Intentionally wrong signature for negative testing
trailerSignature = "0000000000000000000000000000000000000000000000000000000000000000"
}
// Build the chunked payload with trailer and trailer signature
payload := fmt.Sprintf("%s;chunk-signature=%s\r\n%s\r\n", chunk1DataLenHex, chunk1Signature, chunk1Data) +
fmt.Sprintf("0;chunk-signature=%s\r\n", finalSignature) +
trailerOnWire +
"x-amz-trailer-signature:" + trailerSignature + "\r\n" +
"\r\n"
// Create the request
req, err := http.NewRequest("PUT", "http://s3.amazonaws.com/test-bucket/test-object",
bytes.NewReader([]byte(payload)))
assert.NoError(t, err)
req.Header.Set("Host", "s3.amazonaws.com")
req.Header.Set("x-amz-date", amzDate)
req.Header.Set("x-amz-content-sha256", hashedPayload)
req.Header.Set("Content-Encoding", "aws-chunked")
req.Header.Set("x-amz-decoded-content-length", fmt.Sprintf("%d", chunk1DataLen))
req.Header.Set("x-amz-trailer", "x-amz-checksum-crc32")
authHeader := fmt.Sprintf("AWS4-HMAC-SHA256 Credential=%s/%s, SignedHeaders=%s, Signature=%s",
defaultAccessKeyId, scope, signedHeaders, seedSignature)
req.Header.Set("Authorization", authHeader)
return req, chunk1Data
}
// TestSignedStreamingUploadWithTrailer tests streaming uploads with signed chunks and trailers
// This tests the STREAMING-AWS4-HMAC-SHA256-PAYLOAD-TRAILER content-sha256 header value
// which is used by AWS SDK v2 when checksum validation is enabled
func TestSignedStreamingUploadWithTrailer(t *testing.T) {
iam := setupIam()
req, expectedData := createTrailerStreamingRequest(t, true)
// Test the chunked reader
reader, errCode := iam.newChunkedReader(req)
assert.Equal(t, s3err.ErrNone, errCode)
assert.NotNil(t, reader)
// Read and verify the payload
data, err := io.ReadAll(reader)
assert.NoError(t, err)
assert.Equal(t, expectedData, string(data))
}
// TestSignedStreamingUploadWithTrailerInvalidSignature tests behavior with invalid trailer signatures.
// This is a negative test case for trailer signature validation. It currently verifies that an invalid
// signature doesn't break content reading, and is prepared for when validation is implemented.
func TestSignedStreamingUploadWithTrailerInvalidSignature(t *testing.T) {
iam := setupIam()
req, expectedData := createTrailerStreamingRequest(t, false)
// Test the chunked reader - it should be created successfully
reader, errCode := iam.newChunkedReader(req)
assert.Equal(t, s3err.ErrNone, errCode)
assert.NotNil(t, reader)
// Read the payload - currently trailer signature validation may not be implemented,
// but this test documents the expected behavior and will catch regressions
// if trailer signature validation is added in the future
data, err := io.ReadAll(reader)
// Note: If trailer signature validation is implemented, this should fail with an error
// For now, we just verify the content is correctly extracted
if err != nil {
assert.Contains(t, err.Error(), "signature", "Error should indicate signature mismatch")
} else {
// If no error, content should still be correct (trailer sig validation not yet implemented)
assert.Equal(t, expectedData, string(data))
}
}
// TestSignedStreamingUploadInvalidSignature tests that invalid chunk signatures are rejected
// This is a negative test case to ensure signature validation is actually working
func TestSignedStreamingUploadInvalidSignature(t *testing.T) {

5
weed/s3api/filer_multipart.go

@ -187,7 +187,10 @@ func (s3a *S3ApiServer) completeMultipartUpload(r *http.Request, input *s3.Compl
sort.Ints(completedPartNumbers)
uploadDirectory := s3a.genUploadsFolder(*input.Bucket) + "/" + *input.UploadId
entries, _, err := s3a.list(uploadDirectory, "", "", false, 0)
// Use explicit limit to ensure all parts are listed (up to S3's max of 10,000 parts)
// Previously limit=0 relied on server's DirListingLimit default (1000 in weed server mode),
// which caused CompleteMultipartUpload to fail for uploads with more than 1000 parts.
entries, _, err := s3a.list(uploadDirectory, "", "", false, s3_constants.MaxS3MultipartParts+1)
if err != nil {
glog.Errorf("completeMultipartUpload %s %s error: %v, entries:%d", *input.Bucket, *input.UploadId, err, len(entries))
stats.S3HandlerCounter.WithLabelValues(stats.ErrorCompletedNoSuchUpload).Inc()

16
weed/s3api/s3api_auth.go

@ -48,14 +48,22 @@ func isRequestPostPolicySignatureV4(r *http.Request) bool {
}
// Verify if the request has AWS Streaming Signature Version '4'. This is only valid for 'PUT' operation.
// Supports both with and without trailer variants:
// - STREAMING-AWS4-HMAC-SHA256-PAYLOAD (original)
// - STREAMING-AWS4-HMAC-SHA256-PAYLOAD-TRAILER (with trailing checksums)
func isRequestSignStreamingV4(r *http.Request) bool {
return r.Header.Get("x-amz-content-sha256") == streamingContentSHA256 &&
r.Method == http.MethodPut
if r.Method != http.MethodPut {
return false
}
contentSha256 := r.Header.Get("x-amz-content-sha256")
return contentSha256 == streamingContentSHA256 || contentSha256 == streamingContentSHA256Trailer
}
func isRequestUnsignedStreaming(r *http.Request) bool {
return r.Header.Get("x-amz-content-sha256") == streamingUnsignedPayload &&
r.Method == http.MethodPut
if r.Method != http.MethodPut {
return false
}
return r.Header.Get("x-amz-content-sha256") == streamingUnsignedPayload
}
// Authorization type.

17
weed/s3api/s3api_bucket_config.go

@ -514,12 +514,27 @@ func (s3a *S3ApiServer) isVersioningConfigured(bucket string) (bool, error) {
return config.Versioning != "" || config.ObjectLockConfig != nil, nil
}
// isObjectLockEnabled checks if Object Lock is enabled for a bucket (with caching)
func (s3a *S3ApiServer) isObjectLockEnabled(bucket string) (bool, error) {
config, errCode := s3a.getBucketConfig(bucket)
if errCode != s3err.ErrNone {
if errCode == s3err.ErrNoSuchBucket {
return false, filer_pb.ErrNotFound
}
return false, fmt.Errorf("failed to get bucket config: %v", errCode)
}
return config.ObjectLockConfig != nil, nil
}
// getVersioningState returns the detailed versioning state for a bucket
func (s3a *S3ApiServer) getVersioningState(bucket string) (string, error) {
config, errCode := s3a.getBucketConfig(bucket)
if errCode != s3err.ErrNone {
if errCode == s3err.ErrNoSuchBucket {
return "", nil
// Signal to callers that the bucket does not exist so they can
// decide whether to auto-create it (e.g., in PUT handlers).
return "", filer_pb.ErrNotFound
}
glog.Errorf("getVersioningState: failed to get bucket config for %s: %v", bucket, errCode)
return "", fmt.Errorf("failed to get bucket config: %v", errCode)

74
weed/s3api/s3api_bucket_handlers.go

@ -244,46 +244,64 @@ func (s3a *S3ApiServer) PutBucketHandler(w http.ResponseWriter, r *http.Request)
return
}
// create the folder for bucket, but lazily create actual collection
if err := s3a.mkdir(s3a.option.BucketsPath, bucket, setBucketOwner(r)); err != nil {
glog.Errorf("PutBucketHandler mkdir: %v", err)
s3err.WriteErrorResponse(w, r, s3err.ErrInternalError)
return
}
// Check for x-amz-bucket-object-lock-enabled header BEFORE creating bucket
// This allows us to create the bucket with Object Lock configuration atomically
objectLockEnabled := strings.EqualFold(r.Header.Get(s3_constants.AmzBucketObjectLockEnabled), "true")
// Capture any Object Lock configuration error from within the callback
// The mkdir callback doesn't support returning errors, so we capture it here
var objectLockSetupError error
// Create the folder for bucket with all settings atomically
// This ensures Object Lock configuration is set in the same CreateEntry call,
// preventing race conditions where the bucket exists without Object Lock enabled
if err := s3a.mkdir(s3a.option.BucketsPath, bucket, func(entry *filer_pb.Entry) {
// Set bucket owner
setBucketOwner(r)(entry)
// Remove bucket from negative cache after successful creation
if s3a.bucketConfigCache != nil {
s3a.bucketConfigCache.RemoveNegativeCache(bucket)
}
// Set Object Lock configuration atomically during bucket creation
if objectLockEnabled {
glog.V(3).Infof("PutBucketHandler: enabling Object Lock and Versioning for bucket %s atomically", bucket)
// Check for x-amz-bucket-object-lock-enabled header (S3 standard compliance)
if objectLockHeaderValue := r.Header.Get(s3_constants.AmzBucketObjectLockEnabled); strings.EqualFold(objectLockHeaderValue, "true") {
glog.V(3).Infof("PutBucketHandler: enabling Object Lock and Versioning for bucket %s due to x-amz-bucket-object-lock-enabled header", bucket)
if entry.Extended == nil {
entry.Extended = make(map[string][]byte)
}
// Atomically update the configuration of the specified bucket. See the updateBucketConfig
// function definition for detailed documentation on parameters and behavior.
errCode := s3a.updateBucketConfig(bucket, func(bucketConfig *BucketConfig) error {
// Enable versioning (required for Object Lock)
bucketConfig.Versioning = s3_constants.VersioningEnabled
entry.Extended[s3_constants.ExtVersioningKey] = []byte(s3_constants.VersioningEnabled)
// Create basic Object Lock configuration (enabled without default retention)
// Create and store Object Lock configuration
objectLockConfig := &ObjectLockConfiguration{
ObjectLockEnabled: s3_constants.ObjectLockEnabled,
}
// Set the cached Object Lock configuration
bucketConfig.ObjectLockConfig = objectLockConfig
if err := StoreObjectLockConfigurationInExtended(entry, objectLockConfig); err != nil {
glog.Errorf("PutBucketHandler: failed to store Object Lock config for bucket %s: %v", bucket, err)
objectLockSetupError = err
// Note: The entry will still be created, but we'll roll it back below
} else {
glog.V(3).Infof("PutBucketHandler: set ObjectLockConfig for bucket %s: %+v", bucket, objectLockConfig)
}
}
}); err != nil {
glog.Errorf("PutBucketHandler mkdir: %v", err)
s3err.WriteErrorResponse(w, r, s3err.ErrInternalError)
return
}
return nil
})
if errCode != s3err.ErrNone {
glog.Errorf("PutBucketHandler: failed to enable Object Lock for bucket %s: %v", bucket, errCode)
s3err.WriteErrorResponse(w, r, errCode)
// If Object Lock setup failed, roll back the bucket creation
// This ensures we don't leave a bucket without the requested Object Lock configuration
if objectLockSetupError != nil {
glog.Errorf("PutBucketHandler: rolling back bucket %s creation due to Object Lock setup failure: %v", bucket, objectLockSetupError)
if deleteErr := s3a.rm(s3a.option.BucketsPath, bucket, true, true); deleteErr != nil {
glog.Errorf("PutBucketHandler: failed to rollback bucket %s after Object Lock setup failure: %v", bucket, deleteErr)
}
s3err.WriteErrorResponse(w, r, s3err.ErrInternalError)
return
}
glog.V(3).Infof("PutBucketHandler: enabled Object Lock and Versioning for bucket %s", bucket)
// Remove bucket from negative cache after successful creation
if s3a.bucketConfigCache != nil {
s3a.bucketConfigCache.RemoveNegativeCache(bucket)
}
w.Header().Set("Location", "/"+bucket)

42
weed/s3api/s3api_object_handlers.go

@ -659,16 +659,14 @@ func (s3a *S3ApiServer) GetObjectHandler(w http.ResponseWriter, r *http.Request)
glog.V(3).Infof("GetObject: Set PartsCount=%d for multipart GET with PartNumber=%d", partsCount, partNumber)
// Calculate the byte range for this part
// Note: ETag is NOT overridden - AWS S3 returns the complete object's ETag
// even when requesting a specific part via PartNumber
var startOffset, endOffset int64
if partInfo != nil {
// Use part boundaries from metadata (accurate for multi-chunk parts)
startOffset = objectEntryForSSE.Chunks[partInfo.StartChunk].Offset
lastChunk := objectEntryForSSE.Chunks[partInfo.EndChunk-1]
endOffset = lastChunk.Offset + int64(lastChunk.Size) - 1
// Override ETag with the part's ETag from metadata
w.Header().Set("ETag", "\""+partInfo.ETag+"\"")
glog.V(3).Infof("GetObject: Override ETag with part %d ETag: %s (from metadata)", partNumber, partInfo.ETag)
} else {
// Fallback: assume 1:1 part-to-chunk mapping (backward compatibility)
chunkIndex := partNumber - 1
@ -680,15 +678,6 @@ func (s3a *S3ApiServer) GetObjectHandler(w http.ResponseWriter, r *http.Request)
partChunk := objectEntryForSSE.Chunks[chunkIndex]
startOffset = partChunk.Offset
endOffset = partChunk.Offset + int64(partChunk.Size) - 1
// Override ETag with chunk's ETag (fallback)
if partChunk.ETag != "" {
if md5Bytes, decodeErr := base64.StdEncoding.DecodeString(partChunk.ETag); decodeErr == nil {
partETag := fmt.Sprintf("%x", md5Bytes)
w.Header().Set("ETag", "\""+partETag+"\"")
glog.V(3).Infof("GetObject: Override ETag with part %d ETag: %s (fallback from chunk)", partNumber, partETag)
}
}
}
// Check if client supplied a Range header - if so, apply it within the part's boundaries
@ -2266,7 +2255,7 @@ func (s3a *S3ApiServer) HeadObjectHandler(w http.ResponseWriter, r *http.Request
if partNumberStr != "" {
if partNumber, parseErr := strconv.Atoi(partNumberStr); parseErr == nil && partNumber > 0 {
// Get actual parts count from metadata (not chunk count)
partsCount, partInfo := s3a.getMultipartInfo(objectEntryForSSE, partNumber)
partsCount, _ := s3a.getMultipartInfo(objectEntryForSSE, partNumber)
// Validate part number
if partNumber > partsCount {
@ -2276,31 +2265,10 @@ func (s3a *S3ApiServer) HeadObjectHandler(w http.ResponseWriter, r *http.Request
}
// Set parts count header
// Note: ETag is NOT overridden - AWS S3 returns the complete object's ETag
// even when requesting a specific part via PartNumber
w.Header().Set(s3_constants.AmzMpPartsCount, strconv.Itoa(partsCount))
glog.V(3).Infof("HeadObject: Set PartsCount=%d for part %d", partsCount, partNumber)
// Override ETag with the part's ETag
if partInfo != nil {
// Use part ETag from metadata (accurate for multi-chunk parts)
w.Header().Set("ETag", "\""+partInfo.ETag+"\"")
glog.V(3).Infof("HeadObject: Override ETag with part %d ETag: %s (from metadata)", partNumber, partInfo.ETag)
} else {
// Fallback: use chunk's ETag (backward compatibility)
chunkIndex := partNumber - 1
if chunkIndex >= len(objectEntryForSSE.Chunks) {
glog.Warningf("HeadObject: Part %d chunk index %d out of range (chunks: %d)", partNumber, chunkIndex, len(objectEntryForSSE.Chunks))
s3err.WriteErrorResponse(w, r, s3err.ErrInvalidPart)
return
}
partChunk := objectEntryForSSE.Chunks[chunkIndex]
if partChunk.ETag != "" {
if md5Bytes, decodeErr := base64.StdEncoding.DecodeString(partChunk.ETag); decodeErr == nil {
partETag := fmt.Sprintf("%x", md5Bytes)
w.Header().Set("ETag", "\""+partETag+"\"")
glog.V(3).Infof("HeadObject: Override ETag with part %d ETag: %s (fallback from chunk)", partNumber, partETag)
}
}
}
}
}

58
weed/s3api/s3api_object_handlers_delete.go

@ -1,12 +1,10 @@
package s3api
import (
"context"
"encoding/xml"
"fmt"
"io"
"net/http"
"slices"
"strings"
"github.com/seaweedfs/seaweedfs/weed/filer"
@ -127,23 +125,11 @@ func (s3a *S3ApiServer) DeleteObjectHandler(w http.ResponseWriter, r *http.Reque
dir, name := target.DirAndName()
err := s3a.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
// Use operation context that won't be cancelled if request terminates
// This ensures deletion completes atomically to avoid inconsistent state
opCtx := context.WithoutCancel(r.Context())
if err := doDeleteEntry(client, dir, name, true, false); err != nil {
return err
}
// Cleanup empty directories
if !s3a.option.AllowEmptyFolder && strings.LastIndex(object, "/") > 0 {
bucketPath := fmt.Sprintf("%s/%s", s3a.option.BucketsPath, bucket)
// Recursively delete empty parent directories, stop at bucket path
filer_pb.DoDeleteEmptyParentDirectories(opCtx, client, util.FullPath(dir), util.FullPath(bucketPath), nil)
}
return nil
return doDeleteEntry(client, dir, name, true, false)
// Note: Empty folder cleanup is now handled asynchronously by EmptyFolderCleaner
// which listens to metadata events and uses consistent hashing for coordination
})
if err != nil {
s3err.WriteErrorResponse(w, r, s3err.ErrInternalError)
return
@ -222,8 +208,6 @@ func (s3a *S3ApiServer) DeleteMultipleObjectsHandler(w http.ResponseWriter, r *h
var deleteErrors []DeleteError
var auditLog *s3err.AccessLog
directoriesWithDeletion := make(map[string]bool)
if s3err.Logger != nil {
auditLog = s3err.GetAccessLog(r, http.StatusNoContent, s3err.ErrNone)
}
@ -245,10 +229,6 @@ func (s3a *S3ApiServer) DeleteMultipleObjectsHandler(w http.ResponseWriter, r *h
versioningConfigured := (versioningState != "")
s3a.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
// Use operation context that won't be cancelled if request terminates
// This ensures batch deletion completes atomically to avoid inconsistent state
opCtx := context.WithoutCancel(r.Context())
// delete file entries
for _, object := range deleteObjects.Objects {
if object.Key == "" {
@ -357,10 +337,6 @@ func (s3a *S3ApiServer) DeleteMultipleObjectsHandler(w http.ResponseWriter, r *h
err := doDeleteEntry(client, parentDirectoryPath, entryName, isDeleteData, isRecursive)
if err == nil {
// Track directory for empty directory cleanup
if !s3a.option.AllowEmptyFolder {
directoriesWithDeletion[parentDirectoryPath] = true
}
deletedObjects = append(deletedObjects, object)
} else if strings.Contains(err.Error(), filer.MsgFailDelNonEmptyFolder) {
deletedObjects = append(deletedObjects, object)
@ -380,30 +356,8 @@ func (s3a *S3ApiServer) DeleteMultipleObjectsHandler(w http.ResponseWriter, r *h
}
}
// Cleanup empty directories - optimize by processing deepest first
if !s3a.option.AllowEmptyFolder && len(directoriesWithDeletion) > 0 {
bucketPath := fmt.Sprintf("%s/%s", s3a.option.BucketsPath, bucket)
// Collect and sort directories by depth (deepest first) to avoid redundant checks
var allDirs []string
for dirPath := range directoriesWithDeletion {
allDirs = append(allDirs, dirPath)
}
// Sort by depth (deeper directories first)
slices.SortFunc(allDirs, func(a, b string) int {
return strings.Count(b, "/") - strings.Count(a, "/")
})
// Track already-checked directories to avoid redundant work
checked := make(map[string]bool)
for _, dirPath := range allDirs {
if !checked[dirPath] {
// Recursively delete empty parent directories, stop at bucket path
// Mark this directory and all its parents as checked during recursion
filer_pb.DoDeleteEmptyParentDirectories(opCtx, client, util.FullPath(dirPath), util.FullPath(bucketPath), checked)
}
}
}
// Note: Empty folder cleanup is now handled asynchronously by EmptyFolderCleaner
// which listens to metadata events and uses consistent hashing for coordination
return nil
})

8
weed/s3api/s3api_object_handlers_list.go

@ -554,16 +554,8 @@ func (s3a *S3ApiServer) doListFilerEntries(client filer_pb.SeaweedFilerClient, d
}
// println("doListFilerEntries2 nextMarker", nextMarker)
} else {
var isEmpty bool
if !s3a.option.AllowEmptyFolder && entry.IsOlderDir() {
//if isEmpty, err = s3a.ensureDirectoryAllEmpty(client, dir, entry.Name); err != nil {
// glog.Errorf("check empty folder %s: %v", dir, err)
//}
}
if !isEmpty {
eachEntryFn(dir, entry)
}
}
} else {
eachEntryFn(dir, entry)
// glog.V(4).Infof("List File Entries %s, file: %s, maxKeys %d", dir, entry.Name, cursor.maxKeys)

30
weed/s3api/s3api_object_handlers_put.go

@ -30,14 +30,14 @@ import (
// Object lock validation errors
var (
ErrObjectLockVersioningRequired = errors.New("object lock headers can only be used on versioned buckets")
ErrObjectLockVersioningRequired = errors.New("object lock headers can only be used on buckets with Object Lock enabled")
ErrInvalidObjectLockMode = errors.New("invalid object lock mode")
ErrInvalidLegalHoldStatus = errors.New("invalid legal hold status")
ErrInvalidRetentionDateFormat = errors.New("invalid retention until date format")
ErrRetentionDateMustBeFuture = errors.New("retain until date must be in the future")
ErrObjectLockModeRequiresDate = errors.New("object lock mode requires retention until date")
ErrRetentionDateRequiresMode = errors.New("retention until date requires object lock mode")
ErrGovernanceBypassVersioningRequired = errors.New("governance bypass header can only be used on versioned buckets")
ErrGovernanceBypassVersioningRequired = errors.New("governance bypass header can only be used on buckets with Object Lock enabled")
ErrInvalidObjectLockDuration = errors.New("object lock duration must be greater than 0 days")
ErrObjectLockDurationExceeded = errors.New("object lock duration exceeds maximum allowed days")
ErrObjectLockConfigurationMissingEnabled = errors.New("object lock configuration must specify ObjectLockEnabled")
@ -159,8 +159,16 @@ func (s3a *S3ApiServer) PutObjectHandler(w http.ResponseWriter, r *http.Request)
glog.V(3).Infof("PutObjectHandler: bucket=%s, object=%s, versioningState='%s', versioningEnabled=%v, versioningConfigured=%v", bucket, object, versioningState, versioningEnabled, versioningConfigured)
// Check if Object Lock is enabled for this bucket
objectLockEnabled, err := s3a.isObjectLockEnabled(bucket)
if err != nil && !errors.Is(err, filer_pb.ErrNotFound) {
glog.Errorf("Error checking Object Lock status for bucket %s: %v", bucket, err)
s3err.WriteErrorResponse(w, r, s3err.ErrInternalError)
return
}
// Validate object lock headers before processing
if err := s3a.validateObjectLockHeaders(r, versioningEnabled); err != nil {
if err := s3a.validateObjectLockHeaders(r, objectLockEnabled); err != nil {
glog.V(2).Infof("PutObjectHandler: object lock header validation failed for bucket %s, object %s: %v", bucket, object, err)
s3err.WriteErrorResponse(w, r, mapValidationErrorToS3Error(err))
return
@ -1311,7 +1319,8 @@ func (s3a *S3ApiServer) applyBucketDefaultRetention(bucket string, entry *filer_
}
// validateObjectLockHeaders validates object lock headers in PUT requests
func (s3a *S3ApiServer) validateObjectLockHeaders(r *http.Request, versioningEnabled bool) error {
// objectLockEnabled should be true only if the bucket has Object Lock configured
func (s3a *S3ApiServer) validateObjectLockHeaders(r *http.Request, objectLockEnabled bool) error {
// Extract object lock headers from request
mode := r.Header.Get(s3_constants.AmzObjectLockMode)
retainUntilDateStr := r.Header.Get(s3_constants.AmzObjectLockRetainUntilDate)
@ -1320,8 +1329,11 @@ func (s3a *S3ApiServer) validateObjectLockHeaders(r *http.Request, versioningEna
// Check if any object lock headers are present
hasObjectLockHeaders := mode != "" || retainUntilDateStr != "" || legalHold != ""
// Object lock headers can only be used on versioned buckets
if hasObjectLockHeaders && !versioningEnabled {
// Object lock headers can only be used on buckets with Object Lock enabled
// Per AWS S3: Object Lock can only be enabled at bucket creation, and once enabled,
// objects can have retention/legal-hold metadata. Without Object Lock enabled,
// these headers must be rejected.
if hasObjectLockHeaders && !objectLockEnabled {
return ErrObjectLockVersioningRequired
}
@ -1362,11 +1374,11 @@ func (s3a *S3ApiServer) validateObjectLockHeaders(r *http.Request, versioningEna
}
}
// Check for governance bypass header - only valid for versioned buckets
// Check for governance bypass header - only valid for buckets with Object Lock enabled
bypassGovernance := r.Header.Get("x-amz-bypass-governance-retention") == "true"
// Governance bypass headers are only valid for versioned buckets (like object lock headers)
if bypassGovernance && !versioningEnabled {
// Governance bypass headers are only valid for buckets with Object Lock enabled
if bypassGovernance && !objectLockEnabled {
return ErrGovernanceBypassVersioningRequired
}

18
weed/s3api/s3api_object_retention.go

@ -586,10 +586,26 @@ func (s3a *S3ApiServer) evaluateGovernanceBypassRequest(r *http.Request, bucket,
// enforceObjectLockProtections enforces object lock protections for operations
func (s3a *S3ApiServer) enforceObjectLockProtections(request *http.Request, bucket, object, versionId string, governanceBypassAllowed bool) error {
// Quick check: if bucket doesn't have Object Lock enabled, skip the expensive entry lookup
// This optimization avoids a filer gRPC call for every DELETE operation on buckets without Object Lock
objectLockEnabled, err := s3a.isObjectLockEnabled(bucket)
if err != nil {
if errors.Is(err, filer_pb.ErrNotFound) {
// Bucket does not exist, so no protections to enforce
return nil
}
// For other errors, we can't determine lock status, so we should fail.
glog.Errorf("enforceObjectLockProtections: failed to check object lock for bucket %s: %v", bucket, err)
return err
}
if !objectLockEnabled {
// Object Lock is not enabled on this bucket, no protections to enforce
return nil
}
// Get the object entry to check both retention and legal hold
// For delete operations without versionId, we need to check the latest version
var entry *filer_pb.Entry
var err error
if versionId != "" {
// Check specific version

1
weed/s3api/s3api_server.go

@ -43,7 +43,6 @@ type S3ApiServerOption struct {
AllowedOrigins []string
BucketsPath string
GrpcDialOption grpc.DialOption
AllowEmptyFolder bool
AllowDeleteBucketNotEmpty bool
LocalFilerSocket string
DataCenter string

11
weed/server/common.go

@ -9,9 +9,9 @@ import (
"fmt"
"io"
"io/fs"
"mime"
"mime/multipart"
"net/http"
"net/url"
"path/filepath"
"strconv"
"strings"
@ -286,14 +286,15 @@ func adjustHeaderContentDisposition(w http.ResponseWriter, r *http.Request, file
return
}
if filename != "" {
filename = url.QueryEscape(filename)
contentDisposition := "inline"
dispositionType := "inline"
if r.FormValue("dl") != "" {
if dl, _ := strconv.ParseBool(r.FormValue("dl")); dl {
contentDisposition = "attachment"
dispositionType = "attachment"
}
}
w.Header().Set("Content-Disposition", contentDisposition+`; filename="`+fileNameEscaper.Replace(filename)+`"`)
// Use mime.FormatMediaType for RFC 6266 compliant Content-Disposition,
// properly handling non-ASCII characters and special characters
w.Header().Set("Content-Disposition", mime.FormatMediaType(dispositionType, map[string]string{"filename": filename}))
}
}

4
weed/server/master_grpc_server.go

@ -137,8 +137,8 @@ func (ms *MasterServer) SendHeartbeat(stream master_pb.Seaweed_SendHeartbeatServ
dcName, rackName := ms.Topo.Configuration.Locate(heartbeat.Ip, heartbeat.DataCenter, heartbeat.Rack)
dc := ms.Topo.GetOrCreateDataCenter(dcName)
rack := dc.GetOrCreateRack(rackName)
dn = rack.GetOrCreateDataNode(heartbeat.Ip, int(heartbeat.Port), int(heartbeat.GrpcPort), heartbeat.PublicUrl, heartbeat.MaxVolumeCounts)
glog.V(0).Infof("added volume server %d: %v:%d %v", dn.Counter, heartbeat.GetIp(), heartbeat.GetPort(), heartbeat.LocationUuids)
dn = rack.GetOrCreateDataNode(heartbeat.Ip, int(heartbeat.Port), int(heartbeat.GrpcPort), heartbeat.PublicUrl, heartbeat.Id, heartbeat.MaxVolumeCounts)
glog.V(0).Infof("added volume server %d: %v (id=%s, ip=%v:%d) %v", dn.Counter, dn.Id(), heartbeat.Id, heartbeat.GetIp(), heartbeat.GetPort(), heartbeat.LocationUuids)
uuidlist, err := ms.RegisterUuids(heartbeat)
if err != nil {
if stream_err := stream.Send(&master_pb.HeartbeatResponse{

2
weed/server/master_grpc_server_volume.go

@ -253,7 +253,7 @@ func (ms *MasterServer) LookupEcVolume(ctx context.Context, req *master_pb.Looku
var locations []*master_pb.Location
for _, dn := range shardLocations {
locations = append(locations, &master_pb.Location{
Url: string(dn.Id()),
Url: dn.Url(),
PublicUrl: dn.PublicUrl,
DataCenter: dn.GetDataCenterId(),
})

12
weed/server/volume_grpc_copy.go

@ -115,7 +115,7 @@ func (vs *VolumeServer) VolumeCopy(req *volume_server_pb.VolumeCopyRequest, stre
var sendErr error
var ioBytePerSecond int64
if req.IoBytePerSecond <= 0 {
ioBytePerSecond = vs.compactionBytePerSecond
ioBytePerSecond = vs.maintenanceBytePerSecond
} else {
ioBytePerSecond = req.IoBytePerSecond
}
@ -199,7 +199,7 @@ func (vs *VolumeServer) VolumeCopy(req *volume_server_pb.VolumeCopyRequest, stre
}
func (vs *VolumeServer) doCopyFile(client volume_server_pb.VolumeServerClient, isEcVolume bool, collection string, vid, compactRevision uint32, stopOffset uint64, baseFileName, ext string, isAppend, ignoreSourceFileNotFound bool, progressFn storage.ProgressFunc) (modifiedTsNs int64, err error) {
return vs.doCopyFileWithThrottler(client, isEcVolume, collection, vid, compactRevision, stopOffset, baseFileName, ext, isAppend, ignoreSourceFileNotFound, progressFn, util.NewWriteThrottler(vs.compactionBytePerSecond))
return vs.doCopyFileWithThrottler(client, isEcVolume, collection, vid, compactRevision, stopOffset, baseFileName, ext, isAppend, ignoreSourceFileNotFound, progressFn, util.NewWriteThrottler(vs.maintenanceBytePerSecond))
}
func (vs *VolumeServer) doCopyFileWithThrottler(client volume_server_pb.VolumeServerClient, isEcVolume bool, collection string, vid, compactRevision uint32, stopOffset uint64, baseFileName, ext string, isAppend, ignoreSourceFileNotFound bool, progressFn storage.ProgressFunc, throttler *util.WriteThrottler) (modifiedTsNs int64, err error) {
@ -264,7 +264,7 @@ func writeToFile(client volume_server_pb.VolumeServer_CopyFileClient, fileName s
}
dst, err := os.OpenFile(fileName, flags, 0644)
if err != nil {
return modifiedTsNs, nil
return modifiedTsNs, fmt.Errorf("open file %s: %w", fileName, err)
}
defer dst.Close()
@ -278,9 +278,11 @@ func writeToFile(client volume_server_pb.VolumeServer_CopyFileClient, fileName s
modifiedTsNs = resp.ModifiedTsNs
}
if receiveErr != nil {
return modifiedTsNs, fmt.Errorf("receiving %s: %v", fileName, receiveErr)
return modifiedTsNs, fmt.Errorf("receiving %s: %w", fileName, receiveErr)
}
if _, writeErr := dst.Write(resp.FileContent); writeErr != nil {
return modifiedTsNs, fmt.Errorf("write file %s: %w", fileName, writeErr)
}
dst.Write(resp.FileContent)
progressedBytes += int64(len(resp.FileContent))
if progressFn != nil {
if !progressFn(progressedBytes) {

7
weed/server/volume_server.go

@ -47,6 +47,7 @@ type VolumeServer struct {
FixJpgOrientation bool
ReadMode string
compactionBytePerSecond int64
maintenanceBytePerSecond int64
metricsAddress string
metricsIntervalSec int
fileSizeLimitBytes int64
@ -55,7 +56,7 @@ type VolumeServer struct {
}
func NewVolumeServer(adminMux, publicMux *http.ServeMux, ip string,
port int, grpcPort int, publicUrl string,
port int, grpcPort int, publicUrl string, id string,
folders []string, maxCounts []int32, minFreeSpaces []util.MinFreeSpace, diskTypes []types.DiskType,
idxFolder string,
needleMapKind storage.NeedleMapKind,
@ -65,6 +66,7 @@ func NewVolumeServer(adminMux, publicMux *http.ServeMux, ip string,
fixJpgOrientation bool,
readMode string,
compactionMBPerSecond int,
maintenanceMBPerSecond int,
fileSizeLimitMB int,
concurrentUploadLimit int64,
concurrentDownloadLimit int64,
@ -94,6 +96,7 @@ func NewVolumeServer(adminMux, publicMux *http.ServeMux, ip string,
ReadMode: readMode,
grpcDialOption: security.LoadClientTLS(util.GetViper(), "grpc.volume"),
compactionBytePerSecond: int64(compactionMBPerSecond) * 1024 * 1024,
maintenanceBytePerSecond: int64(maintenanceMBPerSecond) * 1024 * 1024,
fileSizeLimitBytes: int64(fileSizeLimitMB) * 1024 * 1024,
isHeartbeating: true,
stopChan: make(chan bool),
@ -114,7 +117,7 @@ func NewVolumeServer(adminMux, publicMux *http.ServeMux, ip string,
vs.checkWithMaster()
vs.store = storage.NewStore(vs.grpcDialOption, ip, port, grpcPort, publicUrl, folders, maxCounts, minFreeSpaces, idxFolder, vs.needleMapKind, diskTypes, ldbTimeout)
vs.store = storage.NewStore(vs.grpcDialOption, ip, port, grpcPort, publicUrl, id, folders, maxCounts, minFreeSpaces, idxFolder, vs.needleMapKind, diskTypes, ldbTimeout)
vs.guard = security.NewGuard(whiteList, signingKey, expiresAfterSec, readSigningKey, readExpiresAfterSec)
handleStaticResources(adminMux)

25
weed/server/volume_server_handlers_admin.go

@ -4,28 +4,33 @@ import (
"net/http"
"path/filepath"
"github.com/seaweedfs/seaweedfs/weed/topology"
"github.com/seaweedfs/seaweedfs/weed/util/version"
"github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
"github.com/seaweedfs/seaweedfs/weed/stats"
)
// healthzHandler checks the local health of the volume server.
// It only checks local conditions to avoid cascading failures when remote
// volume servers go down. Previously, this handler checked if all replicated
// volumes could reach their remote replicas, which caused healthy volume
// servers to fail health checks when a peer went down.
// See https://github.com/seaweedfs/seaweedfs/issues/6823
func (vs *VolumeServer) healthzHandler(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Server", "SeaweedFS Volume "+version.VERSION)
volumeInfos := vs.store.VolumeInfos()
for _, vinfo := range volumeInfos {
if len(vinfo.Collection) == 0 {
continue
}
if vinfo.ReplicaPlacement.GetCopyCount() > 1 {
_, err := topology.GetWritableRemoteReplications(vs.store, vs.grpcDialOption, vinfo.Id, vs.GetMaster)
if err != nil {
// Check if the server is shutting down
if vs.store.IsStopping() {
w.WriteHeader(http.StatusServiceUnavailable)
return
}
// Check if we can communicate with master
if !vs.isHeartbeating {
w.WriteHeader(http.StatusServiceUnavailable)
return
}
}
w.WriteHeader(http.StatusOK)
}

2
weed/server/volume_server_handlers_read.go

@ -34,8 +34,6 @@ import (
const reqIsProxied = "proxied"
var fileNameEscaper = strings.NewReplacer(`\`, `\\`, `"`, `\"`)
func NotFound(w http.ResponseWriter) {
stats.VolumeServerHandlerCounter.WithLabelValues(stats.ErrorGetNotFound).Inc()
w.WriteHeader(http.StatusNotFound)

5
weed/sftpd/sftp_file_writer.go

@ -72,6 +72,7 @@ func (l listerat) ListAt(ls []os.FileInfo, offset int64) (int, error) {
type SeaweedSftpFileWriter struct {
fs SftpServer
req *sftp.Request
absPath string // Absolute path after HomeDir translation
mu sync.Mutex
tmpFile *os.File
permissions os.FileMode
@ -105,6 +106,6 @@ func (w *SeaweedSftpFileWriter) Close() error {
return err
}
// Stream the file instead of loading it
return w.fs.putFile(w.req.Filepath, w.tmpFile, w.fs.user)
// Stream the file to the absolute path (after HomeDir translation)
return w.fs.putFile(w.absPath, w.tmpFile, w.fs.user)
}

82
weed/sftpd/sftp_filer.go

@ -100,18 +100,26 @@ func (fs *SftpServer) withTimeoutContext(fn func(ctx context.Context) error) err
// ==================== Command Dispatcher ====================
func (fs *SftpServer) dispatchCmd(r *sftp.Request) error {
glog.V(0).Infof("Dispatch: %s %s", r.Method, r.Filepath)
absPath, err := fs.toAbsolutePath(r.Filepath)
if err != nil {
return err
}
glog.V(1).Infof("Dispatch: %s %s (absolute: %s)", r.Method, r.Filepath, absPath)
switch r.Method {
case "Remove":
return fs.removeEntry(r)
return fs.removeEntry(absPath)
case "Rename":
return fs.renameEntry(r)
absTarget, err := fs.toAbsolutePath(r.Target)
if err != nil {
return err
}
return fs.renameEntry(absPath, absTarget)
case "Mkdir":
return fs.makeDir(r)
return fs.makeDir(absPath)
case "Rmdir":
return fs.removeDir(r)
return fs.removeDir(absPath)
case "Setstat":
return fs.setFileStat(r)
return fs.setFileStatWithRequest(absPath, r)
default:
return fmt.Errorf("unsupported: %s", r.Method)
}
@ -120,10 +128,14 @@ func (fs *SftpServer) dispatchCmd(r *sftp.Request) error {
// ==================== File Operations ====================
func (fs *SftpServer) readFile(r *sftp.Request) (io.ReaderAt, error) {
if err := fs.checkFilePermission(r.Filepath, "read"); err != nil {
absPath, err := fs.toAbsolutePath(r.Filepath)
if err != nil {
return nil, err
}
if err := fs.checkFilePermission(absPath, "read"); err != nil {
return nil, err
}
entry, err := fs.getEntry(r.Filepath)
entry, err := fs.getEntry(absPath)
if err != nil {
return nil, err
}
@ -131,7 +143,11 @@ func (fs *SftpServer) readFile(r *sftp.Request) (io.ReaderAt, error) {
}
func (fs *SftpServer) newFileWriter(r *sftp.Request) (io.WriterAt, error) {
dir, _ := util.FullPath(r.Filepath).DirAndName()
absPath, err := fs.toAbsolutePath(r.Filepath)
if err != nil {
return nil, err
}
dir, _ := util.FullPath(absPath).DirAndName()
if err := fs.checkFilePermission(dir, "write"); err != nil {
glog.Errorf("Permission denied for %s", dir)
return nil, err
@ -145,6 +161,7 @@ func (fs *SftpServer) newFileWriter(r *sftp.Request) (io.WriterAt, error) {
return &SeaweedSftpFileWriter{
fs: *fs,
req: r,
absPath: absPath,
tmpFile: tmpFile,
permissions: 0644,
uid: fs.user.Uid,
@ -153,16 +170,20 @@ func (fs *SftpServer) newFileWriter(r *sftp.Request) (io.WriterAt, error) {
}, nil
}
func (fs *SftpServer) removeEntry(r *sftp.Request) error {
return fs.deleteEntry(r.Filepath, false)
func (fs *SftpServer) removeEntry(absPath string) error {
return fs.deleteEntry(absPath, false)
}
func (fs *SftpServer) renameEntry(r *sftp.Request) error {
if err := fs.checkFilePermission(r.Filepath, "rename"); err != nil {
func (fs *SftpServer) renameEntry(absPath, absTarget string) error {
if err := fs.checkFilePermission(absPath, "rename"); err != nil {
return err
}
targetDir, _ := util.FullPath(absTarget).DirAndName()
if err := fs.checkFilePermission(targetDir, "write"); err != nil {
return err
}
oldDir, oldName := util.FullPath(r.Filepath).DirAndName()
newDir, newName := util.FullPath(r.Target).DirAndName()
oldDir, oldName := util.FullPath(absPath).DirAndName()
newDir, newName := util.FullPath(absTarget).DirAndName()
return fs.callWithClient(false, func(ctx context.Context, client filer_pb.SeaweedFilerClient) error {
_, err := client.AtomicRenameEntry(ctx, &filer_pb.AtomicRenameEntryRequest{
OldDirectory: oldDir, OldName: oldName,
@ -172,15 +193,15 @@ func (fs *SftpServer) renameEntry(r *sftp.Request) error {
})
}
func (fs *SftpServer) setFileStat(r *sftp.Request) error {
if err := fs.checkFilePermission(r.Filepath, "write"); err != nil {
func (fs *SftpServer) setFileStatWithRequest(absPath string, r *sftp.Request) error {
if err := fs.checkFilePermission(absPath, "write"); err != nil {
return err
}
entry, err := fs.getEntry(r.Filepath)
entry, err := fs.getEntry(absPath)
if err != nil {
return err
}
dir, _ := util.FullPath(r.Filepath).DirAndName()
dir, _ := util.FullPath(absPath).DirAndName()
// apply attrs
if r.AttrFlags().Permissions {
entry.Attributes.FileMode = uint32(r.Attributes().FileMode())
@ -201,18 +222,22 @@ func (fs *SftpServer) setFileStat(r *sftp.Request) error {
// ==================== Directory Operations ====================
func (fs *SftpServer) listDir(r *sftp.Request) (sftp.ListerAt, error) {
if err := fs.checkFilePermission(r.Filepath, "list"); err != nil {
absPath, err := fs.toAbsolutePath(r.Filepath)
if err != nil {
return nil, err
}
if err := fs.checkFilePermission(absPath, "list"); err != nil {
return nil, err
}
if r.Method == "Stat" || r.Method == "Lstat" {
entry, err := fs.getEntry(r.Filepath)
entry, err := fs.getEntry(absPath)
if err != nil {
return nil, err
}
fi := &EnhancedFileInfo{FileInfo: FileInfoFromEntry(entry), uid: entry.Attributes.Uid, gid: entry.Attributes.Gid}
return listerat([]os.FileInfo{fi}), nil
}
return fs.listAllPages(r.Filepath)
return fs.listAllPages(absPath)
}
func (fs *SftpServer) listAllPages(dirPath string) (sftp.ListerAt, error) {
@ -259,18 +284,19 @@ func (fs *SftpServer) fetchDirectoryPage(dirPath, start string) ([]os.FileInfo,
}
// makeDir creates a new directory with proper permissions.
func (fs *SftpServer) makeDir(r *sftp.Request) error {
func (fs *SftpServer) makeDir(absPath string) error {
if fs.user == nil {
return fmt.Errorf("cannot create directory: no user info")
}
dir, name := util.FullPath(r.Filepath).DirAndName()
if err := fs.checkFilePermission(r.Filepath, "mkdir"); err != nil {
dir, name := util.FullPath(absPath).DirAndName()
if err := fs.checkFilePermission(dir, "write"); err != nil {
return err
}
// default mode and ownership
err := filer_pb.Mkdir(context.Background(), fs, string(dir), name, func(entry *filer_pb.Entry) {
mode := uint32(0755 | os.ModeDir)
if strings.HasPrefix(r.Filepath, fs.user.HomeDir) {
// Defensive check: all paths should be under HomeDir after toAbsolutePath translation
if absPath == fs.user.HomeDir || strings.HasPrefix(absPath, fs.user.HomeDir+"/") {
mode = uint32(0700 | os.ModeDir)
}
entry.Attributes.FileMode = mode
@ -288,8 +314,8 @@ func (fs *SftpServer) makeDir(r *sftp.Request) error {
}
// removeDir deletes a directory.
func (fs *SftpServer) removeDir(r *sftp.Request) error {
return fs.deleteEntry(r.Filepath, false)
func (fs *SftpServer) removeDir(absPath string) error {
return fs.deleteEntry(absPath, false)
}
func (fs *SftpServer) putFile(filepath string, reader io.Reader, user *user.User) error {

24
weed/sftpd/sftp_server.go

@ -6,6 +6,8 @@ import (
"fmt"
"io"
"os"
"path"
"strings"
"time"
"github.com/pkg/sftp"
@ -37,6 +39,28 @@ func NewSftpServer(filerAddr pb.ServerAddress, grpcDialOption grpc.DialOption, d
}
}
// toAbsolutePath translates a user-relative path to an absolute filer path.
// When a user has HomeDir="/sftp/user", their view of "/" maps to "/sftp/user".
// This implements chroot-like behavior where the user's home directory
// becomes their root.
func (fs *SftpServer) toAbsolutePath(userPath string) (string, error) {
// If user has root as home directory, no translation needed
if fs.user.HomeDir == "" || fs.user.HomeDir == "/" {
return path.Clean(userPath), nil
}
// Concatenate home directory with user path, then clean to resolve any ".." components
p := path.Join(fs.user.HomeDir, strings.TrimPrefix(userPath, "/"))
// Security check: ensure the final path is within the home directory.
// This prevents path traversal attacks like `../..` that could escape the chroot jail.
if !strings.HasPrefix(p, fs.user.HomeDir+"/") && p != fs.user.HomeDir {
return "", fmt.Errorf("path traversal attempt: %s resolves to %s which is outside home dir %s", userPath, p, fs.user.HomeDir)
}
return p, nil
}
// Fileread is invoked for “get” requests.
func (fs *SftpServer) Fileread(req *sftp.Request) (io.ReaderAt, error) {
return fs.readFile(req)

103
weed/sftpd/sftp_server_test.go

@ -0,0 +1,103 @@
package sftpd
import (
"testing"
"github.com/seaweedfs/seaweedfs/weed/sftpd/user"
"github.com/stretchr/testify/assert"
)
func stringPtr(s string) *string {
return &s
}
func TestToAbsolutePath(t *testing.T) {
tests := []struct {
name string
homeDir *string // Use pointer to distinguish between unset and empty
userPath string
expected string
expectError bool
}{
{
name: "normal path",
userPath: "/foo.txt",
expected: "/sftp/testuser/foo.txt",
},
{
name: "root path",
userPath: "/",
expected: "/sftp/testuser",
},
{
name: "path with dot",
userPath: "/./foo.txt",
expected: "/sftp/testuser/foo.txt",
},
{
name: "path traversal attempts",
userPath: "/../foo.txt",
expectError: true,
},
{
name: "path traversal attempts 2",
userPath: "../../foo.txt",
expectError: true,
},
{
name: "path traversal attempts 3",
userPath: "/subdir/../../foo.txt",
expectError: true,
},
{
name: "empty path",
userPath: "",
expected: "/sftp/testuser",
},
{
name: "multiple slashes",
userPath: "//foo.txt",
expected: "/sftp/testuser/foo.txt",
},
{
name: "trailing slash",
userPath: "/foo/",
expected: "/sftp/testuser/foo",
},
{
name: "empty HomeDir passthrough",
homeDir: stringPtr(""),
userPath: "/foo.txt",
expected: "/foo.txt",
},
{
name: "root HomeDir passthrough",
homeDir: stringPtr("/"),
userPath: "/foo.txt",
expected: "/foo.txt",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
homeDir := "/sftp/testuser" // default
if tt.homeDir != nil {
homeDir = *tt.homeDir
}
fs := &SftpServer{
user: &user.User{
HomeDir: homeDir,
},
}
got, err := fs.toAbsolutePath(tt.userPath)
if tt.expectError {
assert.Error(t, err)
} else {
assert.NoError(t, err)
assert.Equal(t, tt.expected, got)
}
})
}
}

4
weed/sftpd/sftp_service.go

@ -284,8 +284,8 @@ func (s *SFTPService) handleChannel(newChannel ssh.NewChannel, fs *SftpServer) {
// handleSFTP starts the SFTP server on the SSH channel.
func (s *SFTPService) handleSFTP(channel ssh.Channel, fs *SftpServer) {
// Create server options with initial working directory set to user's home
serverOptions := sftp.WithStartDirectory(fs.user.HomeDir)
// Start at virtual root "/" - toAbsolutePath translates this to the user's HomeDir
serverOptions := sftp.WithStartDirectory("/")
server := sftp.NewRequestServer(channel, sftp.Handlers{
FileGet: fs,
FilePut: fs,

5
weed/sftpd/user/filestore.go

@ -5,6 +5,7 @@ import (
"encoding/json"
"fmt"
"os"
"path"
"sync"
"golang.org/x/crypto/ssh"
@ -99,6 +100,10 @@ func (s *FileStore) loadUsers() error {
user.PublicKeys[i] = string(pubKey.Marshal())
}
}
// Clean HomeDir to handle trailing slashes and normalize path
if user.HomeDir != "" {
user.HomeDir = path.Clean(user.HomeDir)
}
s.users[user.Username] = user
}

55
weed/shell/command_volume_check_disk.go

@ -64,9 +64,9 @@ func (c *commandVolumeCheckDisk) Help() string {
append entries in B and not in A to A
optionally, for each non-writable volume replica A
if volume is not full
prune late volume entries not matching its index file
select a writable volume replica B
if entries in A don't match B
prune late volume entries not matching its index file
append missing entries from B into A
mark the volume as writable (healthy)
@ -179,9 +179,16 @@ func (vcd *volumeCheckDisk) checkWritableVolumes(volumeReplicas map[uint32][]*Vo
writableReplicas = append(writableReplicas[:1], writableReplicas[2:]...)
continue
}
if err := vcd.syncTwoReplicas(a, b, true); err != nil {
vcd.write("sync volume %d on %s and %s: %v", a.info.Id, a.location.dataNode.Id, b.location.dataNode.Id, err)
modified, err := vcd.syncTwoReplicas(a, b, true)
if err != nil {
vcd.write("failed to sync volumes %d on %s and %s: %v", a.info.Id, a.location.dataNode.Id, b.location.dataNode.Id, err)
} else {
if modified {
vcd.write("synced %s and %s for volume %d", a.location.dataNode.Id, b.location.dataNode.Id, a.info.Id)
}
}
// always choose the larger volume to be the source
if a.info.FileCount > b.info.FileCount {
writableReplicas = append(writableReplicas[:1], writableReplicas[2:]...)
@ -280,19 +287,25 @@ func (vcd *volumeCheckDisk) checkReadOnlyVolumes(volumeReplicas map[uint32][]*Vo
return err
}
// ...fix it...
// TODO: test whether syncTwoReplicas() is enough to prune garbage entries on broken volumes.
if err := vcd.syncTwoReplicas(source, r, false); err != nil {
vcd.write("sync read-only volume %d on %s from %s: %v\n", vid, r.location.dataNode.Id, source.location.dataNode.Id, err)
// ...try to fix it...
// TODO: test whether syncTwoReplicas() is enough to prune garbage entries on broken volumes...
modified, err := vcd.syncTwoReplicas(source, r, false)
if err != nil {
vcd.write("sync read-only volume %d on %s from %s: %v", vid, r.location.dataNode.Id, source.location.dataNode.Id, err)
// ...or revert it back to read-only, if something went wrong.
// TODO: we should keep unchanged volumes as read-only, so we don't modify valid volumes which are full.
if roErr := vcd.makeVolumeReadonly(vid, r); roErr != nil {
return fmt.Errorf("failed to make volume %d on %s readonly after: %v: %v", vid, r.location.dataNode.Id, err, roErr)
return fmt.Errorf("failed to revert volume %d on %s to readonly after: %v: %v", vid, r.location.dataNode.Id, err, roErr)
}
vcd.write("volume %d on %s is now read-only\n", vid, r.location.dataNode.Id)
return err
} else {
if modified {
vcd.write("volume %d on %s is now synced to %d and writable", vid, r.location.dataNode.Id, source.location.dataNode.Id)
} else {
// ...or restore back to read-only, if no changes were made.
if err := vcd.makeVolumeReadonly(vid, r); err != nil {
return fmt.Errorf("failed to revert volume %d on %s to readonly: %v", vid, r.location.dataNode.Id, err)
}
}
}
return nil
@ -411,35 +424,39 @@ func (vcd *volumeCheckDisk) shouldSkipVolume(a, b *VolumeReplica) (bool, error)
// syncTwoReplicas attempts to sync all entries from a source volume replica into a target. If bi-directional mode
// is enabled, changes from target are also synced back into the source.
func (vcd *volumeCheckDisk) syncTwoReplicas(source, target *VolumeReplica, bidi bool) (err error) {
// Returns true if source and/or target were modified, false otherwise.
func (vcd *volumeCheckDisk) syncTwoReplicas(source, target *VolumeReplica, bidi bool) (modified bool, err error) {
sourceHasChanges, targetHasChanges := true, true
const maxIterations = 5
iteration := 0
modified = false
for (sourceHasChanges || targetHasChanges) && iteration < maxIterations {
iteration++
vcd.writeVerbose("sync iteration %d/%d for volume %d", iteration, maxIterations, source.info.Id)
prevSourceHasChanges, prevTargetHasChanges := sourceHasChanges, targetHasChanges
if sourceHasChanges, targetHasChanges, err = vcd.checkBoth(source, target, bidi); err != nil {
return err
return modified, err
}
modified = modified || sourceHasChanges || targetHasChanges
// Detect if we're stuck in a loop with no progress
if iteration > 1 && prevSourceHasChanges == sourceHasChanges && prevTargetHasChanges == targetHasChanges && (sourceHasChanges || targetHasChanges) {
vcd.write("volume %d sync is not making progress between %s and %s after iteration %d, stopping to prevent infinite loop",
source.info.Id, source.location.dataNode.Id, target.location.dataNode.Id, iteration)
return fmt.Errorf("sync not making progress after %d iterations", iteration)
return modified, fmt.Errorf("sync not making progress after %d iterations", iteration)
}
}
if iteration >= maxIterations && (sourceHasChanges || targetHasChanges) {
vcd.write("volume %d sync reached maximum iterations (%d) between %s and %s, may need manual intervention",
source.info.Id, maxIterations, source.location.dataNode.Id, target.location.dataNode.Id)
return fmt.Errorf("reached maximum sync iterations (%d)", maxIterations)
return modified, fmt.Errorf("reached maximum sync iterations (%d)", maxIterations)
}
return nil
return modified, nil
}
// checkBoth performs a sync between source and target volume replicas. If bi-directional mode is enabled, changes from target are also synced back into the source.
@ -628,7 +645,7 @@ func (vcd *volumeCheckDisk) copyVolumeIndexFile(collection string, volumeId uint
copyFileClient, err := volumeServerClient.CopyFile(context.Background(), &volume_server_pb.CopyFileRequest{
VolumeId: volumeId,
Ext: ".idx",
Ext: ext,
CompactionRevision: math.MaxUint32,
StopOffset: math.MaxInt64,
Collection: collection,

45
weed/shell/command_volume_server_evacuate.go

@ -4,7 +4,6 @@ import (
"flag"
"fmt"
"io"
"os"
"slices"
@ -159,6 +158,9 @@ func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandE
func (c *commandVolumeServerEvacuate) evacuateEcVolumes(commandEnv *CommandEnv, volumeServer string, skipNonMoveable, applyChange bool, writer io.Writer) error {
// Evacuate EC volumes for all disk types
// We need to handle each disk type separately because shards should be moved to nodes with the same disk type
// We collect topology once at the start and track capacity changes ourselves
// (via freeEcSlot decrement after each move) rather than repeatedly refreshing,
// which would give a false sense of correctness since topology could be stale.
diskTypes := []types.DiskType{types.HardDriveType, types.SsdType}
for _, diskType := range diskTypes {
@ -176,9 +178,9 @@ func (c *commandVolumeServerEvacuate) evacuateEcVolumes(commandEnv *CommandEnv,
continue
}
for _, ecShardInfo := range diskInfo.EcShardInfos {
hasMoved, err := c.moveAwayOneEcVolume(commandEnv, ecShardInfo, thisNode, otherNodes, applyChange, diskType)
hasMoved, err := c.moveAwayOneEcVolume(commandEnv, ecShardInfo, thisNode, otherNodes, applyChange, diskType, writer)
if err != nil {
fmt.Fprintf(writer, "move away volume %d from %s: %v", ecShardInfo.Id, volumeServer, err)
fmt.Fprintf(writer, "move away volume %d from %s: %v\n", ecShardInfo.Id, volumeServer, err)
}
if !hasMoved {
if skipNonMoveable {
@ -193,14 +195,31 @@ func (c *commandVolumeServerEvacuate) evacuateEcVolumes(commandEnv *CommandEnv,
return nil
}
func (c *commandVolumeServerEvacuate) moveAwayOneEcVolume(commandEnv *CommandEnv, ecShardInfo *master_pb.VolumeEcShardInformationMessage, thisNode *EcNode, otherNodes []*EcNode, applyChange bool, diskType types.DiskType) (hasMoved bool, err error) {
func (c *commandVolumeServerEvacuate) moveAwayOneEcVolume(commandEnv *CommandEnv, ecShardInfo *master_pb.VolumeEcShardInformationMessage, thisNode *EcNode, otherNodes []*EcNode, applyChange bool, diskType types.DiskType, writer io.Writer) (hasMoved bool, err error) {
for _, shardId := range erasure_coding.ShardBits(ecShardInfo.EcIndexBits).ShardIds() {
// Sort by: 1) fewest shards of this volume, 2) most free EC slots
// This ensures we prefer nodes with capacity and balanced shard distribution
slices.SortFunc(otherNodes, func(a, b *EcNode) int {
return a.localShardIdCount(ecShardInfo.Id) - b.localShardIdCount(ecShardInfo.Id)
aShards := a.localShardIdCount(ecShardInfo.Id)
bShards := b.localShardIdCount(ecShardInfo.Id)
if aShards != bShards {
return aShards - bShards // Prefer fewer shards
}
return b.freeEcSlot - a.freeEcSlot // Then prefer more free slots
})
shardMoved := false
skippedNodes := 0
for i := 0; i < len(otherNodes); i++ {
emptyNode := otherNodes[i]
// Skip nodes with no free EC slots
if emptyNode.freeEcSlot <= 0 {
skippedNodes++
continue
}
collectionPrefix := ""
if ecShardInfo.Collection != "" {
collectionPrefix = ecShardInfo.Collection + "_"
@ -209,19 +228,29 @@ func (c *commandVolumeServerEvacuate) moveAwayOneEcVolume(commandEnv *CommandEnv
// For evacuation, prefer same disk type but allow fallback to other types
destDiskId := pickBestDiskOnNode(emptyNode, vid, diskType, false)
if destDiskId > 0 {
fmt.Fprintf(os.Stdout, "moving ec volume %s%d.%d %s => %s (disk %d)\n", collectionPrefix, ecShardInfo.Id, shardId, thisNode.info.Id, emptyNode.info.Id, destDiskId)
fmt.Fprintf(writer, "moving ec volume %s%d.%d %s => %s (disk %d)\n", collectionPrefix, ecShardInfo.Id, shardId, thisNode.info.Id, emptyNode.info.Id, destDiskId)
} else {
fmt.Fprintf(os.Stdout, "moving ec volume %s%d.%d %s => %s\n", collectionPrefix, ecShardInfo.Id, shardId, thisNode.info.Id, emptyNode.info.Id)
fmt.Fprintf(writer, "moving ec volume %s%d.%d %s => %s\n", collectionPrefix, ecShardInfo.Id, shardId, thisNode.info.Id, emptyNode.info.Id)
}
err = moveMountedShardToEcNode(commandEnv, thisNode, ecShardInfo.Collection, vid, shardId, emptyNode, destDiskId, applyChange, diskType)
if err != nil {
hasMoved = false
return
} else {
hasMoved = true
shardMoved = true
// Update the node's free slot count after successful move
emptyNode.freeEcSlot--
break
}
}
if !hasMoved {
if !shardMoved {
if skippedNodes > 0 {
fmt.Fprintf(writer, "no available destination for ec shard %d.%d: %d nodes have no free slots\n",
ecShardInfo.Id, shardId, skippedNodes)
}
// Ensure partial moves are reported as failures to prevent data loss
hasMoved = false
return
}
}

6
weed/storage/needle/needle_parse_upload.go

@ -128,7 +128,7 @@ func parseUpload(r *http.Request, sizeLimit int64, pu *ParsedUpload) (e error) {
pu.FileName = part.FileName()
if pu.FileName != "" {
pu.FileName = path.Base(pu.FileName)
pu.FileName = util.CleanWindowsPathBase(pu.FileName)
}
dataSize, e = pu.bytesBuffer.ReadFrom(io.LimitReader(part, sizeLimit+1))
@ -169,7 +169,7 @@ func parseUpload(r *http.Request, sizeLimit int64, pu *ParsedUpload) (e error) {
// update
pu.Data = pu.bytesBuffer.Bytes()
pu.FileName = path.Base(fName)
pu.FileName = util.CleanWindowsPathBase(fName)
contentType = part.Header.Get("Content-Type")
part = part2
break
@ -207,7 +207,7 @@ func parseUpload(r *http.Request, sizeLimit int64, pu *ParsedUpload) (e error) {
}
if pu.FileName != "" {
pu.FileName = path.Base(pu.FileName)
pu.FileName = util.CleanWindowsPathBase(pu.FileName)
} else {
pu.FileName = path.Base(r.URL.Path)
}

12
weed/storage/store.go

@ -63,6 +63,7 @@ type Store struct {
Port int
GrpcPort int
PublicUrl string
Id string // volume server id, independent of ip:port for stable identification
Locations []*DiskLocation
dataCenter string // optional information, overwriting master setting if exists
rack string // optional information, overwriting master setting if exists
@ -76,13 +77,13 @@ type Store struct {
}
func (s *Store) String() (str string) {
str = fmt.Sprintf("Ip:%s, Port:%d, GrpcPort:%d PublicUrl:%s, dataCenter:%s, rack:%s, connected:%v, volumeSizeLimit:%d", s.Ip, s.Port, s.GrpcPort, s.PublicUrl, s.dataCenter, s.rack, s.connected, s.GetVolumeSizeLimit())
str = fmt.Sprintf("Id:%s, Ip:%s, Port:%d, GrpcPort:%d PublicUrl:%s, dataCenter:%s, rack:%s, connected:%v, volumeSizeLimit:%d", s.Id, s.Ip, s.Port, s.GrpcPort, s.PublicUrl, s.dataCenter, s.rack, s.connected, s.GetVolumeSizeLimit())
return
}
func NewStore(grpcDialOption grpc.DialOption, ip string, port int, grpcPort int, publicUrl string, dirnames []string, maxVolumeCounts []int32,
func NewStore(grpcDialOption grpc.DialOption, ip string, port int, grpcPort int, publicUrl string, id string, dirnames []string, maxVolumeCounts []int32,
minFreeSpaces []util.MinFreeSpace, idxFolder string, needleMapKind NeedleMapKind, diskTypes []DiskType, ldbTimeout int64) (s *Store) {
s = &Store{grpcDialOption: grpcDialOption, Port: port, Ip: ip, GrpcPort: grpcPort, PublicUrl: publicUrl, NeedleMapKind: needleMapKind}
s = &Store{grpcDialOption: grpcDialOption, Port: port, Ip: ip, GrpcPort: grpcPort, PublicUrl: publicUrl, Id: id, NeedleMapKind: needleMapKind}
s.Locations = make([]*DiskLocation, 0)
var wg sync.WaitGroup
@ -414,6 +415,7 @@ func (s *Store) CollectHeartbeat() *master_pb.Heartbeat {
Port: uint32(s.Port),
GrpcPort: uint32(s.GrpcPort),
PublicUrl: s.PublicUrl,
Id: s.Id,
MaxVolumeCounts: maxVolumeCounts,
MaxFileKey: NeedleIdToUint64(maxFileKey),
DataCenter: s.dataCenter,
@ -467,6 +469,10 @@ func (s *Store) SetStopping() {
}
}
func (s *Store) IsStopping() bool {
return s.isStopping
}
func (s *Store) LoadNewVolumes() {
for _, location := range s.Locations {
location.loadExistingVolumes(s.NeedleMapKind, 0)

17
weed/storage/store_ec_delete.go

@ -3,6 +3,7 @@ package storage
import (
"context"
"fmt"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/glog"
@ -21,7 +22,8 @@ func (s *Store) DeleteEcShardNeedle(ecVolume *erasure_coding.EcVolume, n *needle
return 0, err
}
if cookie != n.Cookie {
// cookie == 0 indicates SkipCookieCheck was requested (e.g., orphan cleanup)
if cookie != 0 && cookie != n.Cookie {
return 0, fmt.Errorf("unexpected cookie %x", cookie)
}
@ -45,21 +47,16 @@ func (s *Store) doDeleteNeedleFromAtLeastOneRemoteEcShards(ecVolume *erasure_cod
shardId, _ := intervals[0].ToShardIdAndOffset(erasure_coding.ErasureCodingLargeBlockSize, erasure_coding.ErasureCodingSmallBlockSize)
hasDeletionSuccess := false
err = s.doDeleteNeedleFromRemoteEcShardServers(shardId, ecVolume, needleId)
if err == nil {
hasDeletionSuccess = true
return nil
}
for shardId = erasure_coding.DataShardsCount; shardId < erasure_coding.TotalShardsCount; shardId++ {
if parityDeletionError := s.doDeleteNeedleFromRemoteEcShardServers(shardId, ecVolume, needleId); parityDeletionError == nil {
hasDeletionSuccess = true
}
}
if hasDeletionSuccess {
return nil
}
}
return err
@ -77,11 +74,9 @@ func (s *Store) doDeleteNeedleFromRemoteEcShardServers(shardId erasure_coding.Sh
for _, sourceDataNode := range sourceDataNodes {
glog.V(4).Infof("delete from remote ec shard %d.%d from %s", ecVolume.VolumeId, shardId, sourceDataNode)
err := s.doDeleteNeedleFromRemoteEcShard(sourceDataNode, ecVolume.VolumeId, ecVolume.Collection, ecVolume.Version, needleId)
if err != nil {
if err := s.doDeleteNeedleFromRemoteEcShard(sourceDataNode, ecVolume.VolumeId, ecVolume.Collection, ecVolume.Version, needleId); err != nil {
return err
}
glog.V(1).Infof("delete from remote ec shard %d.%d from %s: %v", ecVolume.VolumeId, shardId, sourceDataNode, err)
}
return nil

2
weed/storage/store_load_balancing_test.go

@ -31,7 +31,7 @@ func newTestStore(t *testing.T, numDirs int) *Store {
diskTypes = append(diskTypes, types.HardDriveType)
}
store := NewStore(nil, "localhost", 8080, 18080, "http://localhost:8080",
store := NewStore(nil, "localhost", 8080, 18080, "http://localhost:8080", "",
dirs, maxCounts, minFreeSpaces, "", NeedleMapInMemory, diskTypes, 3)
// Consume channel messages to prevent blocking

1
weed/topology/data_node.go

@ -269,6 +269,7 @@ func (dn *DataNode) ToDataNodeInfo() *master_pb.DataNodeInfo {
Id: string(dn.Id()),
DiskInfos: make(map[string]*master_pb.DiskInfo),
GrpcPort: uint32(dn.GrpcPort),
Address: dn.Url(), // ip:port for connecting to the volume server
}
for _, c := range dn.Children() {
disk := c.(*Disk)

63
weed/topology/rack.go

@ -5,6 +5,7 @@ import (
"strings"
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
"github.com/seaweedfs/seaweedfs/weed/storage/types"
"github.com/seaweedfs/seaweedfs/weed/util"
@ -34,17 +35,73 @@ func (r *Rack) FindDataNode(ip string, port int) *DataNode {
}
return nil
}
func (r *Rack) GetOrCreateDataNode(ip string, port int, grpcPort int, publicUrl string, maxVolumeCounts map[string]uint32) *DataNode {
// FindDataNodeById finds a DataNode by its ID using O(1) map lookup
func (r *Rack) FindDataNodeById(id string) *DataNode {
r.RLock()
defer r.RUnlock()
if c, ok := r.children[NodeId(id)]; ok {
return c.(*DataNode)
}
return nil
}
func (r *Rack) GetOrCreateDataNode(ip string, port int, grpcPort int, publicUrl string, id string, maxVolumeCounts map[string]uint32) *DataNode {
r.Lock()
defer r.Unlock()
for _, c := range r.children {
// Normalize the id parameter (trim whitespace)
id = strings.TrimSpace(id)
// Determine the node ID: use provided id, or fall back to ip:port for backward compatibility
nodeId := util.GetVolumeServerId(id, ip, port)
// First, try to find by node ID using O(1) map lookup (stable identity)
if c, ok := r.children[NodeId(nodeId)]; ok {
dn := c.(*DataNode)
// Log if IP or Port changed (e.g., pod rescheduled in K8s)
if dn.Ip != ip || dn.Port != port {
glog.V(0).Infof("DataNode %s address changed from %s:%d to %s:%d", nodeId, dn.Ip, dn.Port, ip, port)
}
// Update the IP/Port in case they changed
dn.Ip = ip
dn.Port = port
dn.GrpcPort = grpcPort
dn.PublicUrl = publicUrl
dn.LastSeen = time.Now().Unix()
return dn
}
// For backward compatibility: if explicit id was provided, also check by ip:port
// to handle transition from old (ip:port) to new (explicit id) behavior
ipPortId := util.JoinHostPort(ip, port)
if nodeId != ipPortId {
for oldId, c := range r.children {
dn := c.(*DataNode)
if dn.MatchLocation(ip, port) {
// Only transition if the oldId exactly matches ip:port (legacy identification).
// If oldId is different, this is a node with an explicit id that happens to
// reuse the same ip:port - don't incorrectly merge them.
if string(oldId) != ipPortId {
glog.Warningf("Volume server with id %s has ip:port %s which is used by node %s", nodeId, ipPortId, oldId)
continue
}
// Found a legacy node identified by ip:port, transition it to use the new explicit id
glog.V(0).Infof("Volume server %s transitioning id from %s to %s", dn.Url(), oldId, nodeId)
// Re-key the node in the children map with the new id
delete(r.children, oldId)
dn.id = NodeId(nodeId)
r.children[NodeId(nodeId)] = dn
// Update connection info in case they changed
dn.GrpcPort = grpcPort
dn.PublicUrl = publicUrl
dn.LastSeen = time.Now().Unix()
return dn
}
}
dn := NewDataNode(util.JoinHostPort(ip, port))
}
dn := NewDataNode(nodeId)
dn.Ip = ip
dn.Port = port
dn.GrpcPort = grpcPort

119
weed/topology/topology_test.go

@ -34,7 +34,7 @@ func TestHandlingVolumeServerHeartbeat(t *testing.T) {
maxVolumeCounts := make(map[string]uint32)
maxVolumeCounts[""] = 25
maxVolumeCounts["ssd"] = 12
dn := rack.GetOrCreateDataNode("127.0.0.1", 34534, 0, "127.0.0.1", maxVolumeCounts)
dn := rack.GetOrCreateDataNode("127.0.0.1", 34534, 0, "127.0.0.1", "", maxVolumeCounts)
{
volumeCount := 7
@ -180,7 +180,7 @@ func TestAddRemoveVolume(t *testing.T) {
maxVolumeCounts := make(map[string]uint32)
maxVolumeCounts[""] = 25
maxVolumeCounts["ssd"] = 12
dn := rack.GetOrCreateDataNode("127.0.0.1", 34534, 0, "127.0.0.1", maxVolumeCounts)
dn := rack.GetOrCreateDataNode("127.0.0.1", 34534, 0, "127.0.0.1", "", maxVolumeCounts)
v := storage.VolumeInfo{
Id: needle.VolumeId(1),
@ -218,7 +218,7 @@ func TestVolumeReadOnlyStatusChange(t *testing.T) {
rack := dc.GetOrCreateRack("rack1")
maxVolumeCounts := make(map[string]uint32)
maxVolumeCounts[""] = 25
dn := rack.GetOrCreateDataNode("127.0.0.1", 34534, 0, "127.0.0.1", maxVolumeCounts)
dn := rack.GetOrCreateDataNode("127.0.0.1", 34534, 0, "127.0.0.1", "", maxVolumeCounts)
// Create a writable volume
v := storage.VolumeInfo{
@ -267,7 +267,7 @@ func TestVolumeReadOnlyAndRemoteStatusChange(t *testing.T) {
rack := dc.GetOrCreateRack("rack1")
maxVolumeCounts := make(map[string]uint32)
maxVolumeCounts[""] = 25
dn := rack.GetOrCreateDataNode("127.0.0.1", 34534, 0, "127.0.0.1", maxVolumeCounts)
dn := rack.GetOrCreateDataNode("127.0.0.1", 34534, 0, "127.0.0.1", "", maxVolumeCounts)
// Create a writable, local volume
v := storage.VolumeInfo{
@ -331,7 +331,7 @@ func TestListCollections(t *testing.T) {
topo := NewTopology("weedfs", sequence.NewMemorySequencer(), 32*1024, 5, false)
dc := topo.GetOrCreateDataCenter("dc1")
rack := dc.GetOrCreateRack("rack1")
dn := rack.GetOrCreateDataNode("127.0.0.1", 34534, 0, "127.0.0.1", nil)
dn := rack.GetOrCreateDataNode("127.0.0.1", 34534, 0, "127.0.0.1", "", nil)
topo.RegisterVolumeLayout(storage.VolumeInfo{
Id: needle.VolumeId(1111),
@ -396,3 +396,112 @@ func TestListCollections(t *testing.T) {
})
}
}
func TestDataNodeIdBasedIdentification(t *testing.T) {
topo := NewTopology("weedfs", sequence.NewMemorySequencer(), 32*1024, 5, false)
dc := topo.GetOrCreateDataCenter("dc1")
rack := dc.GetOrCreateRack("rack1")
maxVolumeCounts := make(map[string]uint32)
maxVolumeCounts[""] = 10
// Test 1: Create a DataNode with explicit id
dn1 := rack.GetOrCreateDataNode("10.0.0.1", 8080, 18080, "10.0.0.1:8080", "node-1", maxVolumeCounts)
if string(dn1.Id()) != "node-1" {
t.Errorf("expected node id 'node-1', got '%s'", dn1.Id())
}
if dn1.Ip != "10.0.0.1" {
t.Errorf("expected ip '10.0.0.1', got '%s'", dn1.Ip)
}
// Test 2: Same id with different IP should return the same DataNode (K8s pod reschedule scenario)
dn2 := rack.GetOrCreateDataNode("10.0.0.2", 8080, 18080, "10.0.0.2:8080", "node-1", maxVolumeCounts)
if dn1 != dn2 {
t.Errorf("expected same DataNode for same id, got different nodes")
}
// IP should be updated to the new value
if dn2.Ip != "10.0.0.2" {
t.Errorf("expected ip to be updated to '10.0.0.2', got '%s'", dn2.Ip)
}
if dn2.PublicUrl != "10.0.0.2:8080" {
t.Errorf("expected publicUrl to be updated to '10.0.0.2:8080', got '%s'", dn2.PublicUrl)
}
// Test 3: Different id should create a new DataNode
dn3 := rack.GetOrCreateDataNode("10.0.0.3", 8080, 18080, "10.0.0.3:8080", "node-2", maxVolumeCounts)
if string(dn3.Id()) != "node-2" {
t.Errorf("expected node id 'node-2', got '%s'", dn3.Id())
}
if dn1 == dn3 {
t.Errorf("expected different DataNode for different id")
}
// Test 4: Empty id should fall back to ip:port (backward compatibility)
dn4 := rack.GetOrCreateDataNode("10.0.0.4", 8080, 18080, "10.0.0.4:8080", "", maxVolumeCounts)
if string(dn4.Id()) != "10.0.0.4:8080" {
t.Errorf("expected node id '10.0.0.4:8080' for empty id, got '%s'", dn4.Id())
}
// Test 5: Same ip:port with empty id should return the same DataNode
dn5 := rack.GetOrCreateDataNode("10.0.0.4", 8080, 18080, "10.0.0.4:8080", "", maxVolumeCounts)
if dn4 != dn5 {
t.Errorf("expected same DataNode for same ip:port with empty id")
}
// Verify we have 3 unique DataNodes total:
// - node-1 (dn1/dn2 share the same id)
// - node-2 (dn3)
// - 10.0.0.4:8080 (dn4/dn5 share the same ip:port)
children := rack.Children()
if len(children) != 3 {
t.Errorf("expected 3 DataNodes, got %d", len(children))
}
// Test 6: Transition from ip:port to explicit id
// First, the node exists with ip:port as id (dn4/dn5)
// Now the same volume server starts sending an explicit id
dn6 := rack.GetOrCreateDataNode("10.0.0.4", 8080, 18080, "10.0.0.4:8080", "node-4-explicit", maxVolumeCounts)
// Should return the same DataNode instance
if dn6 != dn4 {
t.Errorf("expected same DataNode instance during transition")
}
// But the id should now be updated to the explicit id
if string(dn6.Id()) != "node-4-explicit" {
t.Errorf("expected node id to transition to 'node-4-explicit', got '%s'", dn6.Id())
}
// The node should be re-keyed in the children map
if rack.FindDataNodeById("node-4-explicit") != dn6 {
t.Errorf("expected to find DataNode by new explicit id")
}
// Old ip:port key should no longer work
if rack.FindDataNodeById("10.0.0.4:8080") != nil {
t.Errorf("expected old ip:port id to be removed from children map")
}
// Still 3 unique DataNodes (node-1, node-2, node-4-explicit)
children = rack.Children()
if len(children) != 3 {
t.Errorf("expected 3 DataNodes after transition, got %d", len(children))
}
// Test 7: Prevent incorrect transition when a new node reuses ip:port of a node with explicit id
// Scenario: node-1 runs at 10.0.0.1:8080, dies, new node-99 starts at same ip:port
// The transition should NOT happen because node-1 already has an explicit id
dn7 := rack.GetOrCreateDataNode("10.0.0.1", 8080, 18080, "10.0.0.1:8080", "node-99", maxVolumeCounts)
// Should create a NEW DataNode, not reuse node-1
if dn7 == dn1 {
t.Errorf("expected new DataNode for node-99, got reused node-1")
}
if string(dn7.Id()) != "node-99" {
t.Errorf("expected node id 'node-99', got '%s'", dn7.Id())
}
// node-1 should still exist with its original id
if rack.FindDataNodeById("node-1") == nil {
t.Errorf("node-1 should still exist")
}
// Now we have 4 DataNodes
children = rack.Children()
if len(children) != 4 {
t.Errorf("expected 4 DataNodes, got %d", len(children))
}
}

13
weed/util/fullpath.go

@ -1,6 +1,7 @@
package util
import (
"path"
"path/filepath"
"strings"
)
@ -85,3 +86,15 @@ func StringSplit(separatedValues string, sep string) []string {
}
return strings.Split(separatedValues, sep)
}
// CleanWindowsPath normalizes Windows-style backslashes to forward slashes.
// This handles paths from Windows clients where paths use backslashes.
func CleanWindowsPath(p string) string {
return strings.ReplaceAll(p, "\\", "/")
}
// CleanWindowsPathBase normalizes Windows-style backslashes to forward slashes
// and returns the base name of the path.
func CleanWindowsPathBase(p string) string {
return path.Base(strings.ReplaceAll(p, "\\", "/"))
}

108
weed/util/http/http_global_client_util.go

@ -487,6 +487,12 @@ func RetriedFetchChunkData(ctx context.Context, buffer []byte, urlStrings []stri
)
}
// For unencrypted, non-gzipped full chunks, use direct buffer read
// This avoids the 64KB intermediate buffer and callback overhead
if cipherKey == nil && !isGzipped && isFullChunk {
return retriedFetchChunkDataDirect(ctx, buffer, urlStrings, string(jwt))
}
var shouldRetry bool
for waitTime := time.Second; waitTime < util.RetryWaitTime; waitTime += waitTime / 2 {
@ -551,3 +557,105 @@ func RetriedFetchChunkData(ctx context.Context, buffer []byte, urlStrings []stri
return n, err
}
// retriedFetchChunkDataDirect reads chunk data directly into the buffer without
// intermediate buffering. This reduces memory copies and improves throughput
// for large chunk reads.
func retriedFetchChunkDataDirect(ctx context.Context, buffer []byte, urlStrings []string, jwt string) (n int, err error) {
var shouldRetry bool
for waitTime := time.Second; waitTime < util.RetryWaitTime; waitTime += waitTime / 2 {
select {
case <-ctx.Done():
return 0, ctx.Err()
default:
}
for _, urlString := range urlStrings {
select {
case <-ctx.Done():
return 0, ctx.Err()
default:
}
n, shouldRetry, err = readUrlDirectToBuffer(ctx, urlString+"?readDeleted=true", jwt, buffer)
if err == nil {
return n, nil
}
if !shouldRetry {
break
}
glog.V(0).InfofCtx(ctx, "read %s failed, err: %v", urlString, err)
}
if err != nil && shouldRetry {
glog.V(0).InfofCtx(ctx, "retry reading in %v", waitTime)
timer := time.NewTimer(waitTime)
select {
case <-ctx.Done():
timer.Stop()
return 0, ctx.Err()
case <-timer.C:
}
} else {
break
}
}
return n, err
}
// readUrlDirectToBuffer reads HTTP response directly into the provided buffer,
// avoiding intermediate buffer allocations and copies.
func readUrlDirectToBuffer(ctx context.Context, fileUrl, jwt string, buffer []byte) (n int, retryable bool, err error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, fileUrl, nil)
if err != nil {
return 0, false, err
}
maybeAddAuth(req, jwt)
request_id.InjectToRequest(ctx, req)
r, err := GetGlobalHttpClient().Do(req)
if err != nil {
return 0, true, err
}
defer CloseResponse(r)
if r.StatusCode >= 400 {
if r.StatusCode == http.StatusNotFound {
return 0, true, fmt.Errorf("%s: %s: %w", fileUrl, r.Status, ErrNotFound)
}
if r.StatusCode == http.StatusTooManyRequests {
return 0, false, fmt.Errorf("%s: %s: %w", fileUrl, r.Status, ErrTooManyRequests)
}
retryable = r.StatusCode >= 499
return 0, retryable, fmt.Errorf("%s: %s", fileUrl, r.Status)
}
// Read directly into the buffer without intermediate copying
// This is significantly faster for large chunks (16MB+)
var totalRead int
for totalRead < len(buffer) {
select {
case <-ctx.Done():
return totalRead, false, ctx.Err()
default:
}
m, readErr := r.Body.Read(buffer[totalRead:])
totalRead += m
if readErr != nil {
if readErr == io.EOF {
// Return io.ErrUnexpectedEOF if we haven't filled the buffer
// This prevents silent data corruption from truncated responses
if totalRead < len(buffer) {
return totalRead, true, io.ErrUnexpectedEOF
}
return totalRead, false, nil
}
return totalRead, true, readErr
}
}
return totalRead, false, nil
}

105
weed/util/net_timeout.go

@ -9,22 +9,11 @@ import (
"github.com/seaweedfs/seaweedfs/weed/stats"
)
const (
// minThroughputBytesPerSecond defines the minimum expected throughput (4KB/s)
// Used to calculate timeout scaling based on data transferred
minThroughputBytesPerSecond = 4000
// graceTimeCapMultiplier caps the grace period for slow clients at 3x base timeout
// This prevents indefinite connections while allowing time for server-side chunk fetches
graceTimeCapMultiplier = 3
)
// Listener wraps a net.Listener, and gives a place to store the timeout
// parameters. On Accept, it will wrap the net.Conn with our own Conn for us.
type Listener struct {
net.Listener
ReadTimeout time.Duration
WriteTimeout time.Duration
Timeout time.Duration
}
func (l *Listener) Accept() (net.Conn, error) {
@ -35,102 +24,49 @@ func (l *Listener) Accept() (net.Conn, error) {
stats.ConnectionOpen()
tc := &Conn{
Conn: c,
ReadTimeout: l.ReadTimeout,
WriteTimeout: l.WriteTimeout,
Timeout: l.Timeout,
}
return tc, nil
}
// Conn wraps a net.Conn, and sets a deadline for every read
// and write operation.
// Conn wraps a net.Conn and implements a "no activity timeout".
// Any activity (read or write) resets the deadline, so the connection
// only times out when there's no activity in either direction.
type Conn struct {
net.Conn
ReadTimeout time.Duration
WriteTimeout time.Duration
Timeout time.Duration
isClosed bool
bytesRead int64
bytesWritten int64
lastWrite time.Time
}
// calculateBytesPerTimeout calculates the expected number of bytes that should
// be transferred during one timeout period, based on the minimum throughput.
// Returns at least 1 to prevent division by zero.
func calculateBytesPerTimeout(timeout time.Duration) int64 {
bytesPerTimeout := int64(float64(minThroughputBytesPerSecond) * timeout.Seconds())
if bytesPerTimeout <= 0 {
return 1 // Prevent division by zero
// extendDeadline extends the connection deadline from now.
// This implements "no activity timeout" - any activity keeps the connection alive.
func (c *Conn) extendDeadline() error {
if c.Timeout > 0 {
return c.Conn.SetDeadline(time.Now().Add(c.Timeout))
}
return bytesPerTimeout
return nil
}
func (c *Conn) Read(b []byte) (count int, e error) {
if c.ReadTimeout != 0 {
// Calculate expected bytes per timeout period based on minimum throughput (4KB/s)
// Example: with ReadTimeout=30s, bytesPerTimeout = 4000 * 30 = 120KB
// After reading 1MB: multiplier = 1,000,000/120,000 + 1 ≈ 9, deadline = 30s * 9 = 270s
bytesPerTimeout := calculateBytesPerTimeout(c.ReadTimeout)
timeoutMultiplier := time.Duration(c.bytesRead/bytesPerTimeout + 1)
err := c.Conn.SetReadDeadline(time.Now().Add(c.ReadTimeout * timeoutMultiplier))
if err != nil {
// Extend deadline before reading - any activity keeps connection alive
if err := c.extendDeadline(); err != nil {
return 0, err
}
}
count, e = c.Conn.Read(b)
if e == nil {
stats.BytesIn(int64(count))
c.bytesRead += int64(count)
}
return
}
func (c *Conn) Write(b []byte) (count int, e error) {
if c.WriteTimeout != 0 {
now := time.Now()
// Calculate timeout with two components:
// 1. Base timeout scaled by cumulative data (minimum throughput of 4KB/s)
// 2. Additional grace period if there was a gap since last write (for chunk fetch delays)
// Calculate expected bytes per timeout period based on minimum throughput (4KB/s)
// Example: with WriteTimeout=30s, bytesPerTimeout = 4000 * 30 = 120KB
// After writing 1MB: multiplier = 1,000,000/120,000 + 1 ≈ 9, baseTimeout = 30s * 9 = 270s
bytesPerTimeout := calculateBytesPerTimeout(c.WriteTimeout)
timeoutMultiplier := time.Duration(c.bytesWritten/bytesPerTimeout + 1)
baseTimeout := c.WriteTimeout * timeoutMultiplier
// If it's been a while since last write, add grace time for server-side chunk fetches
// But cap it to avoid keeping slow clients connected indefinitely
//
// The comparison uses unscaled WriteTimeout intentionally: triggers grace when idle time
// exceeds base timeout, independent of throughput scaling.
if !c.lastWrite.IsZero() {
timeSinceLastWrite := now.Sub(c.lastWrite)
if timeSinceLastWrite > c.WriteTimeout {
// Add grace time capped at graceTimeCapMultiplier * scaled timeout.
// This allows total deadline up to 4x scaled timeout for server-side delays.
//
// Example: WriteTimeout=30s, 1MB written (multiplier≈9), baseTimeout=270s
// If 400s gap occurs fetching chunks: graceTime capped at 270s*3=810s
// Final deadline: 270s + 810s = 1080s (~18min) to accommodate slow storage
// But if only 50s gap: graceTime = 50s, final deadline = 270s + 50s = 320s
graceTime := timeSinceLastWrite
if graceTime > baseTimeout*graceTimeCapMultiplier {
graceTime = baseTimeout * graceTimeCapMultiplier
}
baseTimeout += graceTime
}
}
err := c.Conn.SetWriteDeadline(now.Add(baseTimeout))
if err != nil {
// Extend deadline before writing - any activity keeps connection alive
if err := c.extendDeadline(); err != nil {
return 0, err
}
}
count, e = c.Conn.Write(b)
if e == nil {
stats.BytesOut(int64(count))
c.bytesWritten += int64(count)
c.lastWrite = time.Now()
}
return
}
@ -154,8 +90,7 @@ func NewListener(addr string, timeout time.Duration) (ipListener net.Listener, e
ipListener = &Listener{
Listener: listener,
ReadTimeout: timeout,
WriteTimeout: timeout,
Timeout: timeout,
}
return
@ -169,8 +104,7 @@ func NewIpAndLocalListeners(host string, port int, timeout time.Duration) (ipLis
ipListener = &Listener{
Listener: listener,
ReadTimeout: timeout,
WriteTimeout: timeout,
Timeout: timeout,
}
if host != "localhost" && host != "" && host != "0.0.0.0" && host != "127.0.0.1" && host != "[::]" && host != "[::1]" {
@ -182,8 +116,7 @@ func NewIpAndLocalListeners(host string, port int, timeout time.Duration) (ipLis
localListener = &Listener{
Listener: listener,
ReadTimeout: timeout,
WriteTimeout: timeout,
Timeout: timeout,
}
}

11
weed/util/network.go

@ -64,3 +64,14 @@ func JoinHostPort(host string, port int) string {
}
return net.JoinHostPort(host, portStr)
}
// GetVolumeServerId returns the volume server ID.
// If id is provided (non-empty after trimming), use it as the identifier.
// Otherwise, fall back to ip:port for backward compatibility.
func GetVolumeServerId(id, ip string, port int) string {
volumeServerId := strings.TrimSpace(id)
if volumeServerId == "" {
volumeServerId = JoinHostPort(ip, port)
}
return volumeServerId
}
Loading…
Cancel
Save