Browse Source

Merge master into ec-disk-type-support

Merged changes from master including:
- Comments about topology collection strategy for EC evacuation
- Kept multi-disk-type iteration for evacuation
- Function signature now includes both diskType and writer parameters
chrislusf 3 days ago
parent
commit
f1c1e3fb36
  1. 1
      .github/workflows/container_release_unified.yml
  2. 74
      .github/workflows/helm_ci.yml
  3. 10
      .github/workflows/s3tests.yml
  4. 93
      .github/workflows/sftp-tests.yml
  5. 4
      Makefile
  6. 65
      README.md
  7. 2
      docker/compose/local-s3tests-compose.yml
  8. 2
      docker/compose/test-tarantool-filer.yml
  9. 2
      docker/compose/test-ydb-filer.yml
  10. 2
      k8s/charts/seaweedfs/Chart.yaml
  11. 116
      k8s/charts/seaweedfs/templates/all-in-one/all-in-one-deployment.yaml
  12. 25
      k8s/charts/seaweedfs/templates/all-in-one/all-in-one-pvc.yaml
  13. 18
      k8s/charts/seaweedfs/templates/all-in-one/all-in-one-service.yml
  14. 13
      k8s/charts/seaweedfs/templates/filer/filer-ingress.yaml
  15. 7
      k8s/charts/seaweedfs/templates/filer/filer-statefulset.yaml
  16. 4
      k8s/charts/seaweedfs/templates/master/master-statefulset.yaml
  17. 7
      k8s/charts/seaweedfs/templates/s3/s3-deployment.yaml
  18. 16
      k8s/charts/seaweedfs/templates/s3/s3-ingress.yaml
  19. 71
      k8s/charts/seaweedfs/templates/shared/post-install-bucket-hook.yaml
  20. 4
      k8s/charts/seaweedfs/templates/volume/volume-statefulset.yaml
  21. 106
      k8s/charts/seaweedfs/values.yaml
  22. 2
      test/foundationdb/docker-compose.arm64.yml
  23. 2
      test/foundationdb/docker-compose.yml
  24. 1
      test/postgres/docker-compose.yml
  25. 3
      test/s3/cors/Makefile
  26. 3
      test/s3/retention/Makefile
  27. 2
      test/s3/retention/s3_object_lock_headers_test.go
  28. 13
      test/s3/retention/s3_retention_test.go
  29. 3
      test/s3/tagging/Makefile
  30. 7
      test/s3/versioning/Makefile
  31. 41
      test/sftp/Makefile
  32. 92
      test/sftp/README.md
  33. 652
      test/sftp/basic_test.go
  34. 423
      test/sftp/framework.go
  35. 17
      test/sftp/go.mod
  36. 64
      test/sftp/go.sum
  37. 37
      test/sftp/testdata/userstore.json
  38. 28
      weed/admin/dash/admin_server.go
  39. 240
      weed/admin/handlers/file_browser_handlers.go
  40. 4
      weed/command/filer.go
  41. 6
      weed/command/s3.go
  42. 6
      weed/command/server.go
  43. 10
      weed/command/volume.go
  44. 4
      weed/credential/filer_etc/filer_etc_store.go
  45. 207
      weed/filer/empty_folder_cleanup/cleanup_queue.go
  46. 371
      weed/filer/empty_folder_cleanup/cleanup_queue_test.go
  47. 436
      weed/filer/empty_folder_cleanup/empty_folder_cleaner.go
  48. 569
      weed/filer/empty_folder_cleanup/empty_folder_cleaner_test.go
  49. 8
      weed/filer/filer.go
  50. 39
      weed/filer/filer_notify.go
  51. 39
      weed/filer/filer_on_meta_event.go
  52. 13
      weed/filer/filer_search.go
  53. 138
      weed/filer/reader_at.go
  54. 86
      weed/filer/reader_cache.go
  55. 505
      weed/filer/reader_cache_test.go
  56. 12
      weed/operation/upload_content.go
  57. 2
      weed/pb/master.proto
  58. 26
      weed/pb/master_pb/master.pb.go
  59. 12
      weed/pb/server_address.go
  60. 19
      weed/s3api/auth_signature_v4.go
  61. 10
      weed/s3api/chunked_reader_v4.go
  62. 144
      weed/s3api/chunked_reader_v4_test.go
  63. 5
      weed/s3api/filer_multipart.go
  64. 16
      weed/s3api/s3api_auth.go
  65. 17
      weed/s3api/s3api_bucket_config.go
  66. 78
      weed/s3api/s3api_bucket_handlers.go
  67. 42
      weed/s3api/s3api_object_handlers.go
  68. 58
      weed/s3api/s3api_object_handlers_delete.go
  69. 10
      weed/s3api/s3api_object_handlers_list.go
  70. 30
      weed/s3api/s3api_object_handlers_put.go
  71. 18
      weed/s3api/s3api_object_retention.go
  72. 1
      weed/s3api/s3api_server.go
  73. 11
      weed/server/common.go
  74. 4
      weed/server/master_grpc_server.go
  75. 2
      weed/server/master_grpc_server_volume.go
  76. 12
      weed/server/volume_grpc_copy.go
  77. 27
      weed/server/volume_server.go
  78. 31
      weed/server/volume_server_handlers_admin.go
  79. 2
      weed/server/volume_server_handlers_read.go
  80. 5
      weed/sftpd/sftp_file_writer.go
  81. 82
      weed/sftpd/sftp_filer.go
  82. 24
      weed/sftpd/sftp_server.go
  83. 103
      weed/sftpd/sftp_server_test.go
  84. 4
      weed/sftpd/sftp_service.go
  85. 5
      weed/sftpd/user/filestore.go
  86. 55
      weed/shell/command_volume_check_disk.go
  87. 45
      weed/shell/command_volume_server_evacuate.go
  88. 6
      weed/storage/needle/needle_parse_upload.go
  89. 12
      weed/storage/store.go
  90. 17
      weed/storage/store_ec_delete.go
  91. 2
      weed/storage/store_load_balancing_test.go
  92. 1
      weed/topology/data_node.go
  93. 69
      weed/topology/rack.go
  94. 119
      weed/topology/topology_test.go
  95. 13
      weed/util/fullpath.go
  96. 108
      weed/util/http/http_global_client_util.go
  97. 119
      weed/util/net_timeout.go
  98. 11
      weed/util/network.go

1
.github/workflows/container_release_unified.yml

@ -223,3 +223,4 @@ jobs:
echo "✓ Successfully copied ${{ matrix.variant }} to Docker Hub"

74
.github/workflows/helm_ci.yml

@ -44,6 +44,80 @@ jobs:
- name: Run chart-testing (lint)
run: ct lint --target-branch ${{ github.event.repository.default_branch }} --all --validate-maintainers=false --chart-dirs k8s/charts
- name: Verify template rendering
run: |
set -e
CHART_DIR="k8s/charts/seaweedfs"
echo "=== Testing default configuration ==="
helm template test $CHART_DIR > /tmp/default.yaml
echo "✓ Default configuration renders successfully"
echo "=== Testing with S3 enabled ==="
helm template test $CHART_DIR --set s3.enabled=true > /tmp/s3.yaml
grep -q "kind: Deployment" /tmp/s3.yaml && grep -q "seaweedfs-s3" /tmp/s3.yaml
echo "✓ S3 deployment renders correctly"
echo "=== Testing with all-in-one mode ==="
helm template test $CHART_DIR --set allInOne.enabled=true > /tmp/allinone.yaml
grep -q "seaweedfs-all-in-one" /tmp/allinone.yaml
echo "✓ All-in-one deployment renders correctly"
echo "=== Testing with security enabled ==="
helm template test $CHART_DIR --set global.enableSecurity=true > /tmp/security.yaml
grep -q "security-config" /tmp/security.yaml
echo "✓ Security configuration renders correctly"
echo "=== Testing with monitoring enabled ==="
helm template test $CHART_DIR \
--set global.monitoring.enabled=true \
--set global.monitoring.gatewayHost=prometheus \
--set global.monitoring.gatewayPort=9091 > /tmp/monitoring.yaml
echo "✓ Monitoring configuration renders correctly"
echo "=== Testing with PVC storage ==="
helm template test $CHART_DIR \
--set master.data.type=persistentVolumeClaim \
--set master.data.size=10Gi \
--set master.data.storageClass=standard > /tmp/pvc.yaml
grep -q "PersistentVolumeClaim" /tmp/pvc.yaml
echo "✓ PVC configuration renders correctly"
echo "=== Testing with custom replicas ==="
helm template test $CHART_DIR \
--set master.replicas=3 \
--set filer.replicas=2 \
--set volume.replicas=3 > /tmp/replicas.yaml
echo "✓ Custom replicas configuration renders correctly"
echo "=== Testing filer with S3 gateway ==="
helm template test $CHART_DIR \
--set filer.s3.enabled=true \
--set filer.s3.enableAuth=true > /tmp/filer-s3.yaml
echo "✓ Filer S3 gateway renders correctly"
echo "=== Testing SFTP enabled ==="
helm template test $CHART_DIR --set sftp.enabled=true > /tmp/sftp.yaml
grep -q "seaweedfs-sftp" /tmp/sftp.yaml
echo "✓ SFTP deployment renders correctly"
echo "=== Testing ingress configurations ==="
helm template test $CHART_DIR \
--set master.ingress.enabled=true \
--set filer.ingress.enabled=true \
--set s3.enabled=true \
--set s3.ingress.enabled=true > /tmp/ingress.yaml
grep -q "kind: Ingress" /tmp/ingress.yaml
echo "✓ Ingress configurations render correctly"
echo "=== Testing COSI driver ==="
helm template test $CHART_DIR --set cosi.enabled=true > /tmp/cosi.yaml
grep -q "seaweedfs-cosi" /tmp/cosi.yaml
echo "✓ COSI driver renders correctly"
echo ""
echo "✅ All template rendering tests passed!"
- name: Create kind cluster
uses: helm/kind-action@v1.13.0

10
.github/workflows/s3tests.yml

@ -64,7 +64,7 @@ jobs:
-master.raftHashicorp -master.electionTimeout 1s -master.volumeSizeLimitMB=100 \
-volume.max=100 -volume.preStopSeconds=1 \
-master.port=9333 -volume.port=8080 -filer.port=8888 -s3.port=8000 -metricsPort=9324 \
-s3.allowEmptyFolder=false -s3.allowDeleteBucketNotEmpty=true -s3.config="$GITHUB_WORKSPACE/docker/compose/s3.json" -master.peers=none &
-s3.allowDeleteBucketNotEmpty=true -s3.config="$GITHUB_WORKSPACE/docker/compose/s3.json" -master.peers=none &
pid=$!
# Wait for all SeaweedFS components to be ready
@ -368,7 +368,7 @@ jobs:
-master.raftHashicorp -master.electionTimeout 1s -master.volumeSizeLimitMB=100 \
-volume.max=100 -volume.preStopSeconds=1 \
-master.port=9334 -volume.port=8081 -filer.port=8889 -s3.port=8001 -metricsPort=9325 \
-s3.allowEmptyFolder=false -s3.allowDeleteBucketNotEmpty=true -s3.config="$GITHUB_WORKSPACE/docker/compose/s3.json" -master.peers=none &
-s3.allowDeleteBucketNotEmpty=true -s3.config="$GITHUB_WORKSPACE/docker/compose/s3.json" -master.peers=none &
pid=$!
# Wait for all SeaweedFS components to be ready
@ -526,7 +526,7 @@ jobs:
-master.raftHashicorp -master.electionTimeout 1s -master.volumeSizeLimitMB=100 \
-volume.max=100 -volume.preStopSeconds=1 \
-master.port=9335 -volume.port=8082 -filer.port=8890 -s3.port=8002 -metricsPort=9326 \
-s3.allowEmptyFolder=false -s3.allowDeleteBucketNotEmpty=true -s3.config="$GITHUB_WORKSPACE/docker/compose/s3.json" -master.peers=none &
-s3.allowDeleteBucketNotEmpty=true -s3.config="$GITHUB_WORKSPACE/docker/compose/s3.json" -master.peers=none &
pid=$!
# Wait for all SeaweedFS components to be ready
@ -636,7 +636,7 @@ jobs:
-master.raftHashicorp -master.electionTimeout 1s -master.volumeSizeLimitMB=100 \
-volume.max=100 -volume.preStopSeconds=1 \
-master.port=9336 -volume.port=8083 -filer.port=8891 -s3.port=8003 -metricsPort=9327 \
-s3.allowEmptyFolder=false -s3.allowDeleteBucketNotEmpty=true -s3.config="$GITHUB_WORKSPACE/docker/compose/s3.json" -master.peers=none &
-s3.allowDeleteBucketNotEmpty=true -s3.config="$GITHUB_WORKSPACE/docker/compose/s3.json" -master.peers=none &
pid=$!
# Wait for all SeaweedFS components to be ready
@ -817,7 +817,7 @@ jobs:
-master.raftHashicorp -master.electionTimeout 1s -master.volumeSizeLimitMB=100 \
-volume.max=100 -volume.preStopSeconds=1 \
-master.port=9337 -volume.port=8085 -filer.port=8892 -s3.port=8004 -metricsPort=9328 \
-s3.allowEmptyFolder=false -s3.allowDeleteBucketNotEmpty=true -s3.config="$GITHUB_WORKSPACE/docker/compose/s3.json" \
-s3.allowDeleteBucketNotEmpty=true -s3.config="$GITHUB_WORKSPACE/docker/compose/s3.json" \
-master.peers=none \
> /tmp/seaweedfs-sql-server.log 2>&1 &
pid=$!

93
.github/workflows/sftp-tests.yml

@ -0,0 +1,93 @@
name: "SFTP Integration Tests"
on:
push:
branches: [ master, main ]
paths:
- 'weed/sftpd/**'
- 'weed/command/sftp.go'
- 'test/sftp/**'
- '.github/workflows/sftp-tests.yml'
pull_request:
branches: [ master, main ]
paths:
- 'weed/sftpd/**'
- 'weed/command/sftp.go'
- 'test/sftp/**'
- '.github/workflows/sftp-tests.yml'
concurrency:
group: ${{ github.head_ref }}/sftp-tests
cancel-in-progress: true
permissions:
contents: read
env:
GO_VERSION: '1.24'
TEST_TIMEOUT: '15m'
jobs:
sftp-integration:
name: SFTP Integration Testing
runs-on: ubuntu-22.04
timeout-minutes: 20
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Go ${{ env.GO_VERSION }}
uses: actions/setup-go@v5
with:
go-version: ${{ env.GO_VERSION }}
- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install -y openssh-client
- name: Build SeaweedFS
run: |
cd weed
go build -o weed .
chmod +x weed
./weed version
- name: Run SFTP Integration Tests
run: |
cd test/sftp
echo "🧪 Running SFTP integration tests..."
echo "============================================"
# Install test dependencies
go mod download
# Run all SFTP tests
go test -v -timeout=${{ env.TEST_TIMEOUT }} ./...
echo "============================================"
echo "✅ SFTP integration tests completed"
- name: Test Summary
if: always()
run: |
echo "## 🔐 SFTP Integration Test Summary" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "### Test Coverage" >> $GITHUB_STEP_SUMMARY
echo "- ✅ **HomeDir Path Translation**: User home directory mapping (fixes #7470)" >> $GITHUB_STEP_SUMMARY
echo "- ✅ **File Operations**: Upload, download, delete" >> $GITHUB_STEP_SUMMARY
echo "- ✅ **Directory Operations**: Create, list, remove" >> $GITHUB_STEP_SUMMARY
echo "- ✅ **Large File Handling**: 1MB+ file support" >> $GITHUB_STEP_SUMMARY
echo "- ✅ **Path Edge Cases**: Unicode, trailing slashes, .. paths" >> $GITHUB_STEP_SUMMARY
echo "- ✅ **Admin Access**: Root user verification" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "### Test Configuration" >> $GITHUB_STEP_SUMMARY
echo "| User | HomeDir | Permissions |" >> $GITHUB_STEP_SUMMARY
echo "|------|---------|-------------|" >> $GITHUB_STEP_SUMMARY
echo "| admin | / | Full access |" >> $GITHUB_STEP_SUMMARY
echo "| testuser | /sftp/testuser | Home directory only |" >> $GITHUB_STEP_SUMMARY
echo "| readonly | /public | Read-only |" >> $GITHUB_STEP_SUMMARY

4
Makefile

@ -18,12 +18,12 @@ full_install: admin-generate
cd weed; go install -tags "elastic gocdk sqlite ydb tarantool tikv rclone"
server: install
weed -v 0 server -s3 -filer -filer.maxMB=64 -volume.max=0 -master.volumeSizeLimitMB=100 -volume.preStopSeconds=1 -s3.port=8000 -s3.allowEmptyFolder=false -s3.allowDeleteBucketNotEmpty=true -s3.config=./docker/compose/s3.json -metricsPort=9324
weed -v 0 server -s3 -filer -filer.maxMB=64 -volume.max=0 -master.volumeSizeLimitMB=100 -volume.preStopSeconds=1 -s3.port=8000 -s3.allowDeleteBucketNotEmpty=true -s3.config=./docker/compose/s3.json -metricsPort=9324
benchmark: install warp_install
pkill weed || true
pkill warp || true
weed server -debug=$(debug) -s3 -filer -volume.max=0 -master.volumeSizeLimitMB=100 -volume.preStopSeconds=1 -s3.port=8000 -s3.allowEmptyFolder=false -s3.allowDeleteBucketNotEmpty=false -s3.config=./docker/compose/s3.json &
weed server -debug=$(debug) -s3 -filer -volume.max=0 -master.volumeSizeLimitMB=100 -volume.preStopSeconds=1 -s3.port=8000 -s3.allowDeleteBucketNotEmpty=false -s3.config=./docker/compose/s3.json &
warp client &
while ! nc -z localhost 8000 ; do sleep 1 ; done
warp mixed --host=127.0.0.1:8000 --access-key=some_access_key1 --secret-key=some_secret_key1 --autoterm

65
README.md

@ -592,65 +592,22 @@ Percentage of the requests served within a certain time (ms)
```
make benchmark
warp: Benchmark data written to "warp-mixed-2023-10-16[102354]-l70a.csv.zst"
Mixed operations.
Operation: DELETE, 10%, Concurrency: 20, Ran 4m59s.
* Throughput: 6.19 obj/s
warp: Benchmark data written to "warp-mixed-2025-12-05[194844]-kBpU.csv.zst"
Operation: GET, 45%, Concurrency: 20, Ran 5m0s.
* Throughput: 279.85 MiB/s, 27.99 obj/s
Mixed operations.
Operation: DELETE, 10%, Concurrency: 20, Ran 42s.
* Throughput: 55.13 obj/s
Operation: PUT, 15%, Concurrency: 20, Ran 5m0s.
* Throughput: 89.86 MiB/s, 8.99 obj/s
Operation: GET, 45%, Concurrency: 20, Ran 42s.
* Throughput: 2477.45 MiB/s, 247.75 obj/s
Operation: STAT, 30%, Concurrency: 20, Ran 5m0s.
* Throughput: 18.63 obj/s
Operation: PUT, 15%, Concurrency: 20, Ran 42s.
* Throughput: 825.85 MiB/s, 82.59 obj/s
Cluster Total: 369.74 MiB/s, 61.79 obj/s, 0 errors over 5m0s.
```
Operation: STAT, 30%, Concurrency: 20, Ran 42s.
* Throughput: 165.27 obj/s
To see segmented request statistics, use the --analyze.v parameter.
```
warp analyze --analyze.v warp-mixed-2023-10-16[102354]-l70a.csv.zst
18642 operations loaded... Done!
Mixed operations.
----------------------------------------
Operation: DELETE - total: 1854, 10.0%, Concurrency: 20, Ran 5m0s, starting 2023-10-16 10:23:57.115 +0500 +05
* Throughput: 6.19 obj/s
Requests considered: 1855:
* Avg: 104ms, 50%: 30ms, 90%: 207ms, 99%: 1.355s, Fastest: 1ms, Slowest: 4.613s, StdDev: 320ms
----------------------------------------
Operation: GET - total: 8388, 45.3%, Size: 10485760 bytes. Concurrency: 20, Ran 5m0s, starting 2023-10-16 10:23:57.12 +0500 +05
* Throughput: 279.77 MiB/s, 27.98 obj/s
Requests considered: 8389:
* Avg: 221ms, 50%: 106ms, 90%: 492ms, 99%: 1.739s, Fastest: 8ms, Slowest: 8.633s, StdDev: 383ms
* TTFB: Avg: 81ms, Best: 2ms, 25th: 24ms, Median: 39ms, 75th: 65ms, 90th: 171ms, 99th: 669ms, Worst: 4.783s StdDev: 163ms
* First Access: Avg: 240ms, 50%: 105ms, 90%: 511ms, 99%: 2.08s, Fastest: 12ms, Slowest: 8.633s, StdDev: 480ms
* First Access TTFB: Avg: 88ms, Best: 2ms, 25th: 24ms, Median: 38ms, 75th: 64ms, 90th: 179ms, 99th: 919ms, Worst: 4.783s StdDev: 199ms
* Last Access: Avg: 219ms, 50%: 106ms, 90%: 463ms, 99%: 1.782s, Fastest: 9ms, Slowest: 8.633s, StdDev: 416ms
* Last Access TTFB: Avg: 81ms, Best: 2ms, 25th: 24ms, Median: 39ms, 75th: 65ms, 90th: 161ms, 99th: 657ms, Worst: 4.783s StdDev: 176ms
----------------------------------------
Operation: PUT - total: 2688, 14.5%, Size: 10485760 bytes. Concurrency: 20, Ran 5m0s, starting 2023-10-16 10:23:57.115 +0500 +05
* Throughput: 89.83 MiB/s, 8.98 obj/s
Requests considered: 2689:
* Avg: 1.165s, 50%: 878ms, 90%: 2.015s, 99%: 5.74s, Fastest: 99ms, Slowest: 8.264s, StdDev: 968ms
----------------------------------------
Operation: STAT - total: 5586, 30.2%, Concurrency: 20, Ran 5m0s, starting 2023-10-16 10:23:57.113 +0500 +05
* Throughput: 18.63 obj/s
Requests considered: 5587:
* Avg: 15ms, 50%: 11ms, 90%: 34ms, 99%: 80ms, Fastest: 0s, Slowest: 245ms, StdDev: 17ms
* First Access: Avg: 14ms, 50%: 10ms, 90%: 33ms, 99%: 69ms, Fastest: 0s, Slowest: 203ms, StdDev: 16ms
* Last Access: Avg: 15ms, 50%: 11ms, 90%: 34ms, 99%: 74ms, Fastest: 0s, Slowest: 203ms, StdDev: 17ms
Cluster Total: 369.64 MiB/s, 61.77 obj/s, 0 errors over 5m0s.
Total Errors:0.
Cluster Total: 3302.88 MiB/s, 550.51 obj/s over 43s.
```
[Back to TOC](#table-of-contents)

2
docker/compose/local-s3tests-compose.yml

@ -24,7 +24,7 @@ services:
- 8888:8888
- 18888:18888
- 8000:8000
command: 'filer -master="master:9333" -s3 -s3.config=/etc/seaweedfs/s3.json -s3.port=8000 -s3.allowEmptyFolder=false -s3.allowDeleteBucketNotEmpty=false'
command: 'filer -master="master:9333" -s3 -s3.config=/etc/seaweedfs/s3.json -s3.port=8000 -s3.allowDeleteBucketNotEmpty=false'
volumes:
- ./s3.json:/etc/seaweedfs/s3.json
depends_on:

2
docker/compose/test-tarantool-filer.yml

@ -15,7 +15,7 @@ services:
s3:
image: chrislusf/seaweedfs:local
command: "server -ip=127.0.0.1 -filer -master.volumeSizeLimitMB=16 -volume.max=0 -volume -volume.preStopSeconds=1 -s3 -s3.config=/etc/seaweedfs/s3.json -s3.port=8000 -s3.allowEmptyFolder=false -s3.allowDeleteBucketNotEmpty=false"
command: "server -ip=127.0.0.1 -filer -master.volumeSizeLimitMB=16 -volume.max=0 -volume -volume.preStopSeconds=1 -s3 -s3.config=/etc/seaweedfs/s3.json -s3.port=8000 -s3.allowDeleteBucketNotEmpty=false"
volumes:
- ./s3.json:/etc/seaweedfs/s3.json
environment:

2
docker/compose/test-ydb-filer.yml

@ -20,7 +20,7 @@ services:
- 8888:8888
- 8000:8000
- 18888:18888
command: "server -ip=s3 -filer -master.volumeSizeLimitMB=16 -volume.max=0 -volume -volume.preStopSeconds=1 -s3 -s3.config=/etc/seaweedfs/s3.json -s3.port=8000 -s3.allowEmptyFolder=false -s3.allowDeleteBucketNotEmpty=false"
command: "server -ip=s3 -filer -master.volumeSizeLimitMB=16 -volume.max=0 -volume -volume.preStopSeconds=1 -s3 -s3.config=/etc/seaweedfs/s3.json -s3.port=8000 -s3.allowDeleteBucketNotEmpty=false"
volumes:
- ./s3.json:/etc/seaweedfs/s3.json
environment:

2
k8s/charts/seaweedfs/Chart.yaml

@ -3,4 +3,4 @@ description: SeaweedFS
name: seaweedfs
appVersion: "4.01"
# Dev note: Trigger a helm chart release by `git tag -a helm-<version>`
version: 4.0.401
version: 4.0.401

116
k8s/charts/seaweedfs/templates/all-in-one/all-in-one-deployment.yaml

@ -15,9 +15,9 @@ metadata:
{{- toYaml .Values.allInOne.annotations | nindent 4 }}
{{- end }}
spec:
replicas: 1
replicas: {{ .Values.allInOne.replicas | default 1 }}
strategy:
type: Recreate
type: {{ .Values.allInOne.updateStrategy.type | default "Recreate" }}
selector:
matchLabels:
app.kubernetes.io/name: {{ template "seaweedfs.name" . }}
@ -130,12 +130,23 @@ spec:
value: {{ include "seaweedfs.cluster.masterAddress" . | quote }}
- name: {{ $clusterFilerKey }}
value: {{ include "seaweedfs.cluster.filerAddress" . | quote }}
{{- if .Values.allInOne.secretExtraEnvironmentVars }}
{{- range $key, $value := .Values.allInOne.secretExtraEnvironmentVars }}
- name: {{ $key }}
valueFrom:
{{ toYaml $value | nindent 16 }}
{{- end }}
{{- end }}
command:
- "/bin/sh"
- "-ec"
- |
/usr/bin/weed \
{{- if .Values.allInOne.loggingOverrideLevel }}
-v={{ .Values.allInOne.loggingOverrideLevel }} \
{{- else }}
-v={{ .Values.global.loggingLevel }} \
{{- end }}
server \
-dir=/data \
-master \
@ -191,6 +202,9 @@ spec:
{{- else if .Values.master.metricsPort }}
-metricsPort={{ .Values.master.metricsPort }} \
{{- end }}
{{- if .Values.allInOne.metricsIp }}
-metricsIp={{ .Values.allInOne.metricsIp }} \
{{- end }}
-filer \
-filer.port={{ .Values.filer.port }} \
{{- if .Values.filer.disableDirListing }}
@ -219,61 +233,75 @@ spec:
{{- end }}
{{- if .Values.allInOne.s3.enabled }}
-s3 \
-s3.port={{ .Values.s3.port }} \
{{- if .Values.s3.domainName }}
-s3.domainName={{ .Values.s3.domainName }} \
-s3.port={{ .Values.allInOne.s3.port | default .Values.s3.port }} \
{{- $domainName := .Values.allInOne.s3.domainName | default .Values.s3.domainName }}
{{- if $domainName }}
-s3.domainName={{ $domainName }} \
{{- end }}
{{- if .Values.global.enableSecurity }}
{{- if .Values.s3.httpsPort }}
-s3.port.https={{ .Values.s3.httpsPort }} \
{{- $httpsPort := .Values.allInOne.s3.httpsPort | default .Values.s3.httpsPort }}
{{- if $httpsPort }}
-s3.port.https={{ $httpsPort }} \
{{- end }}
-s3.cert.file=/usr/local/share/ca-certificates/client/tls.crt \
-s3.key.file=/usr/local/share/ca-certificates/client/tls.key \
{{- end }}
{{- if eq (typeOf .Values.s3.allowEmptyFolder) "bool" }}
-s3.allowEmptyFolder={{ .Values.s3.allowEmptyFolder }} \
{{- end }}
{{- if .Values.s3.enableAuth }}
{{- if or .Values.allInOne.s3.enableAuth .Values.s3.enableAuth .Values.filer.s3.enableAuth }}
-s3.config=/etc/sw/s3/seaweedfs_s3_config \
{{- end }}
{{- if .Values.s3.auditLogConfig }}
{{- $auditLogConfig := .Values.allInOne.s3.auditLogConfig | default .Values.s3.auditLogConfig }}
{{- if $auditLogConfig }}
-s3.auditLogConfig=/etc/sw/s3/s3_auditLogConfig.json \
{{- end }}
{{- end }}
{{- if .Values.allInOne.sftp.enabled }}
-sftp \
-sftp.port={{ .Values.sftp.port }} \
{{- if .Values.sftp.sshPrivateKey }}
-sftp.sshPrivateKey={{ .Values.sftp.sshPrivateKey }} \
-sftp.port={{ .Values.allInOne.sftp.port | default .Values.sftp.port }} \
{{- $sshPrivateKey := .Values.allInOne.sftp.sshPrivateKey | default .Values.sftp.sshPrivateKey }}
{{- if $sshPrivateKey }}
-sftp.sshPrivateKey={{ $sshPrivateKey }} \
{{- end }}
{{- if .Values.sftp.hostKeysFolder }}
-sftp.hostKeysFolder={{ .Values.sftp.hostKeysFolder }} \
{{- $hostKeysFolder := .Values.allInOne.sftp.hostKeysFolder | default .Values.sftp.hostKeysFolder }}
{{- if $hostKeysFolder }}
-sftp.hostKeysFolder={{ $hostKeysFolder }} \
{{- end }}
{{- if .Values.sftp.authMethods }}
-sftp.authMethods={{ .Values.sftp.authMethods }} \
{{- $authMethods := .Values.allInOne.sftp.authMethods | default .Values.sftp.authMethods }}
{{- if $authMethods }}
-sftp.authMethods={{ $authMethods }} \
{{- end }}
{{- if .Values.sftp.maxAuthTries }}
-sftp.maxAuthTries={{ .Values.sftp.maxAuthTries }} \
{{- $maxAuthTries := .Values.allInOne.sftp.maxAuthTries | default .Values.sftp.maxAuthTries }}
{{- if $maxAuthTries }}
-sftp.maxAuthTries={{ $maxAuthTries }} \
{{- end }}
{{- if .Values.sftp.bannerMessage }}
-sftp.bannerMessage="{{ .Values.sftp.bannerMessage }}" \
{{- $bannerMessage := .Values.allInOne.sftp.bannerMessage | default .Values.sftp.bannerMessage }}
{{- if $bannerMessage }}
-sftp.bannerMessage="{{ $bannerMessage }}" \
{{- end }}
{{- if .Values.sftp.loginGraceTime }}
-sftp.loginGraceTime={{ .Values.sftp.loginGraceTime }} \
{{- $loginGraceTime := .Values.allInOne.sftp.loginGraceTime | default .Values.sftp.loginGraceTime }}
{{- if $loginGraceTime }}
-sftp.loginGraceTime={{ $loginGraceTime }} \
{{- end }}
{{- if .Values.sftp.clientAliveInterval }}
-sftp.clientAliveInterval={{ .Values.sftp.clientAliveInterval }} \
{{- $clientAliveInterval := .Values.allInOne.sftp.clientAliveInterval | default .Values.sftp.clientAliveInterval }}
{{- if $clientAliveInterval }}
-sftp.clientAliveInterval={{ $clientAliveInterval }} \
{{- end }}
{{- if .Values.sftp.clientAliveCountMax }}
-sftp.clientAliveCountMax={{ .Values.sftp.clientAliveCountMax }} \
{{- $clientAliveCountMax := .Values.allInOne.sftp.clientAliveCountMax | default .Values.sftp.clientAliveCountMax }}
{{- if $clientAliveCountMax }}
-sftp.clientAliveCountMax={{ $clientAliveCountMax }} \
{{- end }}
{{- if or .Values.allInOne.sftp.enableAuth .Values.sftp.enableAuth }}
-sftp.userStoreFile=/etc/sw/sftp/seaweedfs_sftp_config \
{{- end }}
{{- end }}
{{- $extraArgsCount := len .Values.allInOne.extraArgs }}
{{- range $i, $arg := .Values.allInOne.extraArgs }}
{{ $arg | quote }}{{ if ne (add1 $i) $extraArgsCount }} \{{ end }}
{{- end }}
volumeMounts:
- name: data
mountPath: /data
{{- if and .Values.allInOne.s3.enabled (or .Values.s3.enableAuth .Values.filer.s3.enableAuth) }}
{{- if and .Values.allInOne.s3.enabled (or .Values.allInOne.s3.enableAuth .Values.s3.enableAuth .Values.filer.s3.enableAuth) }}
- name: config-s3-users
mountPath: /etc/sw/s3
readOnly: true
@ -282,10 +310,12 @@ spec:
- name: config-ssh
mountPath: /etc/sw/ssh
readOnly: true
{{- if or .Values.allInOne.sftp.enableAuth .Values.sftp.enableAuth }}
- mountPath: /etc/sw/sftp
name: config-users
readOnly: true
{{- end }}
{{- end }}
{{- if .Values.filer.notificationConfig }}
- name: notification-config
mountPath: /etc/seaweedfs/notification.toml
@ -332,15 +362,16 @@ spec:
- containerPort: {{ .Values.filer.grpcPort }}
name: swfs-fil-grpc
{{- if .Values.allInOne.s3.enabled }}
- containerPort: {{ .Values.s3.port }}
- containerPort: {{ .Values.allInOne.s3.port | default .Values.s3.port }}
name: swfs-s3
{{- if .Values.s3.httpsPort }}
- containerPort: {{ .Values.s3.httpsPort }}
{{- $httpsPort := .Values.allInOne.s3.httpsPort | default .Values.s3.httpsPort }}
{{- if $httpsPort }}
- containerPort: {{ $httpsPort }}
name: swfs-s3-tls
{{- end }}
{{- end }}
{{- if .Values.allInOne.sftp.enabled }}
- containerPort: {{ .Values.sftp.port }}
- containerPort: {{ .Values.allInOne.sftp.port | default .Values.sftp.port }}
name: swfs-sftp
{{- end }}
{{- if .Values.allInOne.metricsPort }}
@ -352,7 +383,7 @@ spec:
httpGet:
path: {{ .Values.allInOne.readinessProbe.httpGet.path }}
port: {{ .Values.master.port }}
scheme: {{ .Values.allInOne.readinessProbe.scheme }}
scheme: {{ .Values.allInOne.readinessProbe.httpGet.scheme }}
initialDelaySeconds: {{ .Values.allInOne.readinessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.allInOne.readinessProbe.periodSeconds }}
successThreshold: {{ .Values.allInOne.readinessProbe.successThreshold }}
@ -364,7 +395,7 @@ spec:
httpGet:
path: {{ .Values.allInOne.livenessProbe.httpGet.path }}
port: {{ .Values.master.port }}
scheme: {{ .Values.allInOne.livenessProbe.scheme }}
scheme: {{ .Values.allInOne.livenessProbe.httpGet.scheme }}
initialDelaySeconds: {{ .Values.allInOne.livenessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.allInOne.livenessProbe.periodSeconds }}
successThreshold: {{ .Values.allInOne.livenessProbe.successThreshold }}
@ -389,26 +420,31 @@ spec:
path: {{ .Values.allInOne.data.hostPathPrefix }}/seaweedfs-all-in-one-data/
type: DirectoryOrCreate
{{- else if eq .Values.allInOne.data.type "persistentVolumeClaim" }}
persistentVolumeClaim:
claimName: {{ template "seaweedfs.name" . }}-all-in-one-data
{{- else if eq .Values.allInOne.data.type "existingClaim" }}
persistentVolumeClaim:
claimName: {{ .Values.allInOne.data.claimName }}
{{- else if eq .Values.allInOne.data.type "emptyDir" }}
emptyDir: {}
{{- end }}
{{- if and .Values.allInOne.s3.enabled (or .Values.s3.enableAuth .Values.filer.s3.enableAuth) }}
{{- if and .Values.allInOne.s3.enabled (or .Values.allInOne.s3.enableAuth .Values.s3.enableAuth .Values.filer.s3.enableAuth) }}
- name: config-s3-users
secret:
defaultMode: 420
secretName: {{ default (printf "%s-s3-secret" (include "seaweedfs.name" .)) (or .Values.s3.existingConfigSecret .Values.filer.s3.existingConfigSecret) }}
secretName: {{ default (printf "%s-s3-secret" (include "seaweedfs.name" .)) (or .Values.allInOne.s3.existingConfigSecret .Values.s3.existingConfigSecret .Values.filer.s3.existingConfigSecret) }}
{{- end }}
{{- if .Values.allInOne.sftp.enabled }}
- name: config-ssh
secret:
defaultMode: 420
secretName: {{ default (printf "%s-sftp-ssh-secret" (include "seaweedfs.name" .)) .Values.sftp.existingSshConfigSecret }}
secretName: {{ default (printf "%s-sftp-ssh-secret" (include "seaweedfs.name" .)) (or .Values.allInOne.sftp.existingSshConfigSecret .Values.sftp.existingSshConfigSecret) }}
{{- if or .Values.allInOne.sftp.enableAuth .Values.sftp.enableAuth }}
- name: config-users
secret:
defaultMode: 420
secretName: {{ default (printf "%s-sftp-secret" (include "seaweedfs.name" .)) .Values.sftp.existingConfigSecret }}
secretName: {{ default (printf "%s-sftp-secret" (include "seaweedfs.name" .)) (or .Values.allInOne.sftp.existingConfigSecret .Values.sftp.existingConfigSecret) }}
{{- end }}
{{- end }}
{{- if .Values.filer.notificationConfig }}
- name: notification-config

25
k8s/charts/seaweedfs/templates/all-in-one/all-in-one-pvc.yaml

@ -1,21 +1,28 @@
{{- if and .Values.allInOne.enabled (eq .Values.allInOne.data.type "persistentVolumeClaim") }}
{{- if .Values.allInOne.enabled }}
{{- if eq .Values.allInOne.data.type "persistentVolumeClaim" }}
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: {{ .Values.allInOne.data.claimName }}
name: {{ template "seaweedfs.name" . }}-all-in-one-data
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: {{ template "seaweedfs.name" . }}
helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/component: seaweedfs-all-in-one
{{- if .Values.allInOne.annotations }}
{{- with .Values.allInOne.data.annotations }}
annotations:
{{- toYaml .Values.allInOne.annotations | nindent 4 }}
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: {{ .Values.allInOne.data.size }}
{{- toYaml (.Values.allInOne.data.accessModes | default (list "ReadWriteOnce")) | nindent 4 }}
{{- if .Values.allInOne.data.storageClass }}
storageClassName: {{ .Values.allInOne.data.storageClass }}
{{- end }}
{{- end }}
resources:
requests:
storage: {{ .Values.allInOne.data.size | default "10Gi" }}
{{- end }}
{{- end }}

18
k8s/charts/seaweedfs/templates/all-in-one/all-in-one-service.yml

@ -15,6 +15,7 @@ metadata:
{{- toYaml .Values.allInOne.service.annotations | nindent 4 }}
{{- end }}
spec:
type: {{ .Values.allInOne.service.type | default "ClusterIP" }}
internalTrafficPolicy: {{ .Values.allInOne.service.internalTrafficPolicy | default "Cluster" }}
ports:
# Master ports
@ -50,13 +51,14 @@ spec:
# S3 ports (if enabled)
{{- if .Values.allInOne.s3.enabled }}
- name: "swfs-s3"
port: {{ if .Values.allInOne.s3.enabled }}{{ .Values.s3.port }}{{ else }}{{ .Values.filer.s3.port }}{{ end }}
targetPort: {{ if .Values.allInOne.s3.enabled }}{{ .Values.s3.port }}{{ else }}{{ .Values.filer.s3.port }}{{ end }}
port: {{ .Values.allInOne.s3.port | default .Values.s3.port }}
targetPort: {{ .Values.allInOne.s3.port | default .Values.s3.port }}
protocol: TCP
{{- if and .Values.allInOne.s3.enabled .Values.s3.httpsPort }}
{{- $httpsPort := .Values.allInOne.s3.httpsPort | default .Values.s3.httpsPort }}
{{- if $httpsPort }}
- name: "swfs-s3-tls"
port: {{ .Values.s3.httpsPort }}
targetPort: {{ .Values.s3.httpsPort }}
port: {{ $httpsPort }}
targetPort: {{ $httpsPort }}
protocol: TCP
{{- end }}
{{- end }}
@ -64,8 +66,8 @@ spec:
# SFTP ports (if enabled)
{{- if .Values.allInOne.sftp.enabled }}
- name: "swfs-sftp"
port: {{ .Values.sftp.port }}
targetPort: {{ .Values.sftp.port }}
port: {{ .Values.allInOne.sftp.port | default .Values.sftp.port }}
targetPort: {{ .Values.allInOne.sftp.port | default .Values.sftp.port }}
protocol: TCP
{{- end }}
@ -80,4 +82,4 @@ spec:
selector:
app.kubernetes.io/name: {{ template "seaweedfs.name" . }}
app.kubernetes.io/component: seaweedfs-all-in-one
{{- end }}
{{- end }}

13
k8s/charts/seaweedfs/templates/filer/filer-ingress.yaml

@ -1,5 +1,8 @@
{{- if .Values.filer.enabled }}
{{- if .Values.filer.ingress.enabled }}
{{- /* Filer ingress works for both normal mode (filer.enabled) and all-in-one mode (allInOne.enabled) */}}
{{- $filerEnabled := or .Values.filer.enabled .Values.allInOne.enabled }}
{{- if and $filerEnabled .Values.filer.ingress.enabled }}
{{- /* Determine service name based on deployment mode */}}
{{- $serviceName := ternary (printf "%s-all-in-one" (include "seaweedfs.name" .)) (printf "%s-filer" (include "seaweedfs.name" .)) .Values.allInOne.enabled }}
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion }}
apiVersion: networking.k8s.io/v1
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion }}
@ -33,16 +36,14 @@ spec:
backend:
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion }}
service:
name: {{ template "seaweedfs.name" . }}-filer
name: {{ $serviceName }}
port:
number: {{ .Values.filer.port }}
#name:
{{- else }}
serviceName: {{ template "seaweedfs.name" . }}-filer
serviceName: {{ $serviceName }}
servicePort: {{ .Values.filer.port }}
{{- end }}
{{- if .Values.filer.ingress.host }}
host: {{ .Values.filer.ingress.host }}
{{- end }}
{{- end }}
{{- end }}

7
k8s/charts/seaweedfs/templates/filer/filer-statefulset.yaml

@ -213,9 +213,6 @@ spec:
-s3.cert.file=/usr/local/share/ca-certificates/client/tls.crt \
-s3.key.file=/usr/local/share/ca-certificates/client/tls.key \
{{- end }}
{{- if eq (typeOf .Values.filer.s3.allowEmptyFolder) "bool" }}
-s3.allowEmptyFolder={{ .Values.filer.s3.allowEmptyFolder }} \
{{- end }}
{{- if .Values.filer.s3.enableAuth }}
-s3.config=/etc/sw/seaweedfs_s3_config \
{{- end }}
@ -289,7 +286,7 @@ spec:
httpGet:
path: {{ .Values.filer.readinessProbe.httpGet.path }}
port: {{ .Values.filer.port }}
scheme: {{ .Values.filer.readinessProbe.scheme }}
scheme: {{ .Values.filer.readinessProbe.httpGet.scheme }}
initialDelaySeconds: {{ .Values.filer.readinessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.filer.readinessProbe.periodSeconds }}
successThreshold: {{ .Values.filer.readinessProbe.successThreshold }}
@ -301,7 +298,7 @@ spec:
httpGet:
path: {{ .Values.filer.livenessProbe.httpGet.path }}
port: {{ .Values.filer.port }}
scheme: {{ .Values.filer.livenessProbe.scheme }}
scheme: {{ .Values.filer.livenessProbe.httpGet.scheme }}
initialDelaySeconds: {{ .Values.filer.livenessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.filer.livenessProbe.periodSeconds }}
successThreshold: {{ .Values.filer.livenessProbe.successThreshold }}

4
k8s/charts/seaweedfs/templates/master/master-statefulset.yaml

@ -235,7 +235,7 @@ spec:
httpGet:
path: {{ .Values.master.readinessProbe.httpGet.path }}
port: {{ .Values.master.port }}
scheme: {{ .Values.master.readinessProbe.scheme }}
scheme: {{ .Values.master.readinessProbe.httpGet.scheme }}
initialDelaySeconds: {{ .Values.master.readinessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.master.readinessProbe.periodSeconds }}
successThreshold: {{ .Values.master.readinessProbe.successThreshold }}
@ -247,7 +247,7 @@ spec:
httpGet:
path: {{ .Values.master.livenessProbe.httpGet.path }}
port: {{ .Values.master.port }}
scheme: {{ .Values.master.livenessProbe.scheme }}
scheme: {{ .Values.master.livenessProbe.httpGet.scheme }}
initialDelaySeconds: {{ .Values.master.livenessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.master.livenessProbe.periodSeconds }}
successThreshold: {{ .Values.master.livenessProbe.successThreshold }}

7
k8s/charts/seaweedfs/templates/s3/s3-deployment.yaml

@ -143,9 +143,6 @@ spec:
{{- if .Values.s3.domainName }}
-domainName={{ .Values.s3.domainName }} \
{{- end }}
{{- if eq (typeOf .Values.s3.allowEmptyFolder) "bool" }}
-allowEmptyFolder={{ .Values.s3.allowEmptyFolder }} \
{{- end }}
{{- if .Values.s3.enableAuth }}
-config=/etc/sw/seaweedfs_s3_config \
{{- end }}
@ -204,7 +201,7 @@ spec:
httpGet:
path: {{ .Values.s3.readinessProbe.httpGet.path }}
port: {{ .Values.s3.port }}
scheme: {{ .Values.s3.readinessProbe.scheme }}
scheme: {{ .Values.s3.readinessProbe.httpGet.scheme }}
initialDelaySeconds: {{ .Values.s3.readinessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.s3.readinessProbe.periodSeconds }}
successThreshold: {{ .Values.s3.readinessProbe.successThreshold }}
@ -216,7 +213,7 @@ spec:
httpGet:
path: {{ .Values.s3.livenessProbe.httpGet.path }}
port: {{ .Values.s3.port }}
scheme: {{ .Values.s3.livenessProbe.scheme }}
scheme: {{ .Values.s3.livenessProbe.httpGet.scheme }}
initialDelaySeconds: {{ .Values.s3.livenessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.s3.livenessProbe.periodSeconds }}
successThreshold: {{ .Values.s3.livenessProbe.successThreshold }}

16
k8s/charts/seaweedfs/templates/s3/s3-ingress.yaml

@ -1,4 +1,9 @@
{{- if .Values.s3.ingress.enabled }}
{{- /* S3 ingress works for standalone S3 gateway (s3.enabled), S3 on Filer (filer.s3.enabled), and all-in-one mode (allInOne.s3.enabled) */}}
{{- $s3Enabled := or .Values.s3.enabled (and .Values.filer.s3.enabled (not .Values.allInOne.enabled)) (and .Values.allInOne.enabled .Values.allInOne.s3.enabled) }}
{{- if and $s3Enabled .Values.s3.ingress.enabled }}
{{- /* Determine service name based on deployment mode */}}
{{- $serviceName := ternary (printf "%s-all-in-one" (include "seaweedfs.name" .)) (printf "%s-s3" (include "seaweedfs.name" .)) .Values.allInOne.enabled }}
{{- $s3Port := .Values.allInOne.s3.port | default .Values.s3.port }}
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion }}
apiVersion: networking.k8s.io/v1
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion }}
@ -32,13 +37,12 @@ spec:
backend:
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion }}
service:
name: {{ template "seaweedfs.name" . }}-s3
name: {{ $serviceName }}
port:
number: {{ .Values.s3.port }}
#name:
number: {{ $s3Port }}
{{- else }}
serviceName: {{ template "seaweedfs.name" . }}-s3
servicePort: {{ .Values.s3.port }}
serviceName: {{ $serviceName }}
servicePort: {{ $s3Port }}
{{- end }}
{{- if .Values.s3.ingress.host }}
host: {{ .Values.s3.ingress.host | quote }}

71
k8s/charts/seaweedfs/templates/shared/post-install-bucket-hook.yaml

@ -1,6 +1,32 @@
{{- if .Values.master.enabled }}
{{- if .Values.filer.s3.enabled }}
{{- if .Values.filer.s3.createBuckets }}
{{- /* Support bucket creation for both standalone filer.s3 and allInOne modes */}}
{{- $createBuckets := list }}
{{- $s3Enabled := false }}
{{- $enableAuth := false }}
{{- $existingConfigSecret := "" }}
{{- /* Check allInOne mode first */}}
{{- if .Values.allInOne.enabled }}
{{- if .Values.allInOne.s3.enabled }}
{{- $s3Enabled = true }}
{{- if .Values.allInOne.s3.createBuckets }}
{{- $createBuckets = .Values.allInOne.s3.createBuckets }}
{{- end }}
{{- $enableAuth = or .Values.allInOne.s3.enableAuth .Values.s3.enableAuth .Values.filer.s3.enableAuth }}
{{- $existingConfigSecret = or .Values.allInOne.s3.existingConfigSecret .Values.s3.existingConfigSecret .Values.filer.s3.existingConfigSecret }}
{{- end }}
{{- else if .Values.master.enabled }}
{{- /* Check standalone filer.s3 mode */}}
{{- if .Values.filer.s3.enabled }}
{{- $s3Enabled = true }}
{{- if .Values.filer.s3.createBuckets }}
{{- $createBuckets = .Values.filer.s3.createBuckets }}
{{- end }}
{{- $enableAuth = .Values.filer.s3.enableAuth }}
{{- $existingConfigSecret = .Values.filer.s3.existingConfigSecret }}
{{- end }}
{{- end }}
{{- if and $s3Enabled $createBuckets }}
---
apiVersion: batch/v1
kind: Job
@ -32,9 +58,9 @@ spec:
- name: WEED_CLUSTER_DEFAULT
value: "sw"
- name: WEED_CLUSTER_SW_MASTER
value: "{{ template "seaweedfs.name" . }}-master.{{ .Release.Namespace }}:{{ .Values.master.port }}"
value: {{ include "seaweedfs.cluster.masterAddress" . | quote }}
- name: WEED_CLUSTER_SW_FILER
value: "{{ template "seaweedfs.name" . }}-filer-client.{{ .Release.Namespace }}:{{ .Values.filer.port }}"
value: {{ include "seaweedfs.cluster.filerAddress" . | quote }}
- name: POD_IP
valueFrom:
fieldRef:
@ -71,24 +97,29 @@ spec:
echo "Service at $url failed to become ready within 5 minutes"
exit 1
}
{{- if .Values.allInOne.enabled }}
wait_for_service "http://$WEED_CLUSTER_SW_MASTER{{ .Values.allInOne.readinessProbe.httpGet.path }}"
wait_for_service "http://$WEED_CLUSTER_SW_FILER{{ .Values.filer.readinessProbe.httpGet.path }}"
{{- else }}
wait_for_service "http://$WEED_CLUSTER_SW_MASTER{{ .Values.master.readinessProbe.httpGet.path }}"
wait_for_service "http://$WEED_CLUSTER_SW_FILER{{ .Values.filer.readinessProbe.httpGet.path }}"
{{- range $reg, $props := $.Values.filer.s3.createBuckets }}
exec /bin/echo \
"s3.bucket.create --name {{ $props.name }}" |\
{{- end }}
{{- range $createBuckets }}
/bin/echo \
"s3.bucket.create --name {{ .name }}" |\
/usr/bin/weed shell
{{- end }}
{{- range $reg, $props := $.Values.filer.s3.createBuckets }}
{{- if $props.anonymousRead }}
exec /bin/echo \
{{- range $createBuckets }}
{{- if .anonymousRead }}
/bin/echo \
"s3.configure --user anonymous \
--buckets {{ $props.name }} \
--buckets {{ .name }} \
--actions Read \
--apply true" |\
/usr/bin/weed shell
{{- end }}
{{- end }}
{{- if .Values.filer.s3.enableAuth }}
{{- if $enableAuth }}
volumeMounts:
- name: config-users
mountPath: /etc/sw
@ -106,17 +137,15 @@ spec:
{{- if .Values.filer.containerSecurityContext.enabled }}
securityContext: {{- omit .Values.filer.containerSecurityContext "enabled" | toYaml | nindent 12 }}
{{- end }}
{{- if .Values.filer.s3.enableAuth }}
{{- if $enableAuth }}
volumes:
- name: config-users
secret:
defaultMode: 420
{{- if not (empty .Values.filer.s3.existingConfigSecret) }}
secretName: {{ .Values.filer.s3.existingConfigSecret }}
{{- if $existingConfigSecret }}
secretName: {{ $existingConfigSecret }}
{{- else }}
secretName: seaweedfs-s3-secret
secretName: {{ template "seaweedfs.name" . }}-s3-secret
{{- end }}
{{- end }}{{/** if .Values.filer.s3.enableAuth **/}}
{{- end }}{{/** if .Values.master.enabled **/}}
{{- end }}{{/** if .Values.filer.s3.enabled **/}}
{{- end }}{{/** if .Values.filer.s3.createBuckets **/}}
{{- end }}
{{- end }}

4
k8s/charts/seaweedfs/templates/volume/volume-statefulset.yaml

@ -251,7 +251,7 @@ spec:
httpGet:
path: {{ $volume.readinessProbe.httpGet.path }}
port: {{ $volume.port }}
scheme: {{ $volume.readinessProbe.scheme }}
scheme: {{ $volume.readinessProbe.httpGet.scheme }}
initialDelaySeconds: {{ $volume.readinessProbe.initialDelaySeconds }}
periodSeconds: {{ $volume.readinessProbe.periodSeconds }}
successThreshold: {{ $volume.readinessProbe.successThreshold }}
@ -263,7 +263,7 @@ spec:
httpGet:
path: {{ $volume.livenessProbe.httpGet.path }}
port: {{ $volume.port }}
scheme: {{ $volume.livenessProbe.scheme }}
scheme: {{ $volume.livenessProbe.httpGet.scheme }}
initialDelaySeconds: {{ $volume.livenessProbe.initialDelaySeconds }}
periodSeconds: {{ $volume.livenessProbe.periodSeconds }}
successThreshold: {{ $volume.livenessProbe.successThreshold }}

106
k8s/charts/seaweedfs/values.yaml

@ -856,8 +856,6 @@ filer:
port: 8333
# add additional https port
httpsPort: 0
# allow empty folders
allowEmptyFolder: false
# Suffix of the host name, {bucket}.{domainName}
domainName: ""
# enable user & permission to s3 (need to inject to all services)
@ -885,8 +883,6 @@ s3:
httpsPort: 0
metricsPort: 9327
loggingOverrideLevel: null
# allow empty folders
allowEmptyFolder: true
# enable user & permission to s3 (need to inject to all services)
enableAuth: false
# set to the name of an existing kubernetes Secret with the s3 json config file
@ -979,9 +975,9 @@ s3:
extraEnvironmentVars:
# Custom command line arguments to add to the s3 command
# Example to fix connection idle seconds:
extraArgs: ["-idleTimeout=30"]
# extraArgs: []
# Default idleTimeout is 120 seconds. Example to customize:
# extraArgs: ["-idleTimeout=300"]
extraArgs: []
# used to configure livenessProbe on s3 containers
#
@ -1097,6 +1093,7 @@ allInOne:
enabled: false
imageOverride: null
restartPolicy: Always
replicas: 1 # Number of replicas (note: multiple replicas may require shared storage)
# Core configuration
idleTimeout: 30 # Connection idle seconds
@ -1108,24 +1105,85 @@ allInOne:
metricsIp: "" # Metrics listen IP. If empty, defaults to bindAddress
loggingOverrideLevel: null # Override logging level
# Service configuration
# Custom command line arguments to add to the server command
# Example to fix IPv6 metrics connectivity issues:
# extraArgs: ["-metricsIp", "0.0.0.0"]
# Example with multiple args:
# extraArgs: ["-customFlag", "value", "-anotherFlag"]
extraArgs: []
# Update strategy configuration
# type: Recreate or RollingUpdate
# For single replica, Recreate is recommended to avoid data conflicts.
# For multiple replicas with RollingUpdate, you MUST use shared storage
# (e.g., data.type: persistentVolumeClaim with ReadWriteMany access mode)
# to avoid data loss or inconsistency between pods.
updateStrategy:
type: Recreate
# S3 gateway configuration
# Note: Most parameters below default to null, which means they inherit from
# the global s3.* settings. Set explicit values here to override for allInOne only.
s3:
enabled: false # Whether to enable S3 gateway
port: null # S3 gateway port (null inherits from s3.port)
httpsPort: null # S3 gateway HTTPS port (null inherits from s3.httpsPort)
domainName: null # Suffix of the host name (null inherits from s3.domainName)
enableAuth: false # Enable user & permission to S3
# Set to the name of an existing kubernetes Secret with the s3 json config file
# should have a secret key called seaweedfs_s3_config with an inline json config
existingConfigSecret: null
auditLogConfig: null # S3 audit log configuration (null inherits from s3.auditLogConfig)
# You may specify buckets to be created during the install process.
# Buckets may be exposed publicly by setting `anonymousRead` to `true`
# createBuckets:
# - name: bucket-a
# anonymousRead: true
# - name: bucket-b
# anonymousRead: false
# SFTP server configuration
# Note: Most parameters below default to null, which means they inherit from
# the global sftp.* settings. Set explicit values here to override for allInOne only.
sftp:
enabled: false # Whether to enable SFTP server
port: null # SFTP port (null inherits from sftp.port)
sshPrivateKey: null # Path to SSH private key (null inherits from sftp.sshPrivateKey)
hostKeysFolder: null # Path to SSH host keys folder (null inherits from sftp.hostKeysFolder)
authMethods: null # Comma-separated auth methods (null inherits from sftp.authMethods)
maxAuthTries: null # Maximum authentication attempts (null inherits from sftp.maxAuthTries)
bannerMessage: null # Banner message (null inherits from sftp.bannerMessage)
loginGraceTime: null # Login grace time (null inherits from sftp.loginGraceTime)
clientAliveInterval: null # Client keep-alive interval (null inherits from sftp.clientAliveInterval)
clientAliveCountMax: null # Maximum missed keep-alive messages (null inherits from sftp.clientAliveCountMax)
enableAuth: false # Enable SFTP authentication
# Set to the name of an existing kubernetes Secret with the sftp json config file
existingConfigSecret: null
# Set to the name of an existing kubernetes Secret with the SSH keys
existingSshConfigSecret: null
# Service settings
service:
annotations: {} # Annotations for the service
type: ClusterIP # Service type (ClusterIP, NodePort, LoadBalancer)
internalTrafficPolicy: Cluster # Internal traffic policy
# Note: For ingress in all-in-one mode, use the standard s3.ingress and
# filer.ingress settings. The templates automatically detect all-in-one mode
# and point to the correct service (seaweedfs-all-in-one instead of
# seaweedfs-s3 or seaweedfs-filer).
# Storage configuration
data:
type: "emptyDir" # Options: "hostPath", "persistentVolumeClaim", "emptyDir"
type: "emptyDir" # Options: "hostPath", "persistentVolumeClaim", "emptyDir", "existingClaim"
hostPathPrefix: /mnt/data # Path prefix for hostPath volumes
claimName: seaweedfs-data-pvc # Name of the PVC to use
size: "" # Size of the PVC
storageClass: "" # Storage class for the PVC
claimName: seaweedfs-data-pvc # Name of the PVC to use (for existingClaim type)
size: null # Size of the PVC (null defaults to 10Gi for persistentVolumeClaim type)
storageClass: null # Storage class for the PVC (null uses cluster default)
# accessModes for the PVC. Default is ["ReadWriteOnce"].
# For multi-replica deployments, use ["ReadWriteMany"] with a compatible storage class.
accessModes: []
annotations: {} # Annotations for the PVC
# Health checks
readinessProbe:
@ -1133,7 +1191,7 @@ allInOne:
httpGet:
path: /cluster/status
port: 9333
scheme: HTTP
scheme: HTTP
initialDelaySeconds: 10
periodSeconds: 15
successThreshold: 1
@ -1145,7 +1203,7 @@ allInOne:
httpGet:
path: /cluster/status
port: 9333
scheme: HTTP
scheme: HTTP
initialDelaySeconds: 20
periodSeconds: 30
successThreshold: 1
@ -1154,6 +1212,18 @@ allInOne:
# Additional resources
extraEnvironmentVars: {} # Additional environment variables
# Secret environment variables (for database credentials, etc.)
# Example:
# secretExtraEnvironmentVars:
# WEED_POSTGRES_USERNAME:
# secretKeyRef:
# name: postgres-credentials
# key: username
# WEED_POSTGRES_PASSWORD:
# secretKeyRef:
# name: postgres-credentials
# key: password
secretExtraEnvironmentVars: {}
extraVolumeMounts: "" # Additional volume mounts
extraVolumes: "" # Additional volumes
initContainers: "" # Init containers
@ -1173,7 +1243,7 @@ allInOne:
matchLabels:
app.kubernetes.io/name: {{ template "seaweedfs.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/component: master
app.kubernetes.io/component: seaweedfs-all-in-one
topologyKey: kubernetes.io/hostname
# Topology Spread Constraints Settings
@ -1181,16 +1251,16 @@ allInOne:
# for a PodSpec. By Default no constraints are set.
topologySpreadConstraints: ""
# Toleration Settings for master pods
# Toleration Settings for pods
# This should be a multi-line string matching the Toleration array
# in a PodSpec.
tolerations: ""
# nodeSelector labels for master pod assignment, formatted as a muli-line string.
# nodeSelector labels for pod assignment, formatted as a muli-line string.
# ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector
nodeSelector: ""
# Used to assign priority to master pods
# Used to assign priority to pods
# ref: https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/
priorityClassName: ""

2
test/foundationdb/docker-compose.arm64.yml

@ -147,7 +147,7 @@ services:
- "8888:8888"
- "8333:8333"
- "18888:18888"
command: "server -ip=seaweedfs -filer -master.volumeSizeLimitMB=16 -volume.max=0 -volume -volume.preStopSeconds=1 -s3 -s3.config=/etc/seaweedfs/s3.json -s3.port=8333 -s3.allowEmptyFolder=false -s3.allowDeleteBucketNotEmpty=false"
command: "server -ip=seaweedfs -filer -master.volumeSizeLimitMB=16 -volume.max=0 -volume -volume.preStopSeconds=1 -s3 -s3.config=/etc/seaweedfs/s3.json -s3.port=8333 -s3.allowDeleteBucketNotEmpty=false"
volumes:
- ./s3.json:/etc/seaweedfs/s3.json
- ./filer.toml:/etc/seaweedfs/filer.toml

2
test/foundationdb/docker-compose.yml

@ -116,7 +116,7 @@ services:
- WEED_FOUNDATIONDB_MAX_RETRY_DELAY
- WEED_MASTER_VOLUME_GROWTH_COPY_1=1
- WEED_MASTER_VOLUME_GROWTH_COPY_OTHER=1
command: "weed server -ip=seaweedfs -filer -master.volumeSizeLimitMB=16 -volume.max=0 -volume -volume.preStopSeconds=1 -s3 -s3.config=/etc/seaweedfs/s3.json -s3.port=8333 -s3.allowEmptyFolder=false -s3.allowDeleteBucketNotEmpty=false"
command: "weed server -ip=seaweedfs -filer -master.volumeSizeLimitMB=16 -volume.max=0 -volume -volume.preStopSeconds=1 -s3 -s3.config=/etc/seaweedfs/s3.json -s3.port=8333 -s3.allowDeleteBucketNotEmpty=false"
configs:
fdb.cluster:

1
test/postgres/docker-compose.yml

@ -30,7 +30,6 @@ services:
- -s3=true
- -s3.port=8333
- -webdav=false
- -s3.allowEmptyFolder=false
- -mq.broker=true
- -mq.agent=true
- -ip=seaweedfs

3
test/s3/cors/Makefile

@ -79,12 +79,11 @@ start-server: check-deps
@echo "🔍 DEBUG: Creating volume directory..."
@mkdir -p ./test-volume-data
@echo "🔍 DEBUG: Launching SeaweedFS server in background..."
@echo "🔍 DEBUG: Command: $(WEED_BINARY) server -debug -s3 -s3.port=$(S3_PORT) -s3.allowEmptyFolder=false -s3.allowDeleteBucketNotEmpty=true -s3.config=../../../docker/compose/s3.json -filer -filer.maxMB=64 -master.volumeSizeLimitMB=50 -volume.max=100 -dir=./test-volume-data -volume.preStopSeconds=1 -metricsPort=9324"
@echo "🔍 DEBUG: Command: $(WEED_BINARY) server -debug -s3 -s3.port=$(S3_PORT) -s3.allowDeleteBucketNotEmpty=true -s3.config=../../../docker/compose/s3.json -filer -filer.maxMB=64 -master.volumeSizeLimitMB=50 -volume.max=100 -dir=./test-volume-data -volume.preStopSeconds=1 -metricsPort=9324"
@$(WEED_BINARY) server \
-debug \
-s3 \
-s3.port=$(S3_PORT) \
-s3.allowEmptyFolder=false \
-s3.allowDeleteBucketNotEmpty=true \
-s3.config=../../../docker/compose/s3.json \
-filer \

3
test/s3/retention/Makefile

@ -81,12 +81,11 @@ start-server: check-deps
@echo "🔍 DEBUG: Creating volume directory..."
@mkdir -p ./test-volume-data
@echo "🔍 DEBUG: Launching SeaweedFS server in background..."
@echo "🔍 DEBUG: Command: $(WEED_BINARY) server -debug -s3 -s3.port=$(S3_PORT) -s3.allowEmptyFolder=false -s3.allowDeleteBucketNotEmpty=true -s3.config=../../../docker/compose/s3.json -filer -filer.maxMB=64 -master.volumeSizeLimitMB=50 -volume.max=100 -dir=./test-volume-data -volume.preStopSeconds=1 -metricsPort=9324"
@echo "🔍 DEBUG: Command: $(WEED_BINARY) server -debug -s3 -s3.port=$(S3_PORT) -s3.allowDeleteBucketNotEmpty=true -s3.config=../../../docker/compose/s3.json -filer -filer.maxMB=64 -master.volumeSizeLimitMB=50 -volume.max=100 -dir=./test-volume-data -volume.preStopSeconds=1 -metricsPort=9324"
@$(WEED_BINARY) server \
-debug \
-s3 \
-s3.port=$(S3_PORT) \
-s3.allowEmptyFolder=false \
-s3.allowDeleteBucketNotEmpty=true \
-s3.config=../../../docker/compose/s3.json \
-filer \

2
test/s3/retention/s3_object_lock_headers_test.go

@ -236,7 +236,7 @@ func TestObjectLockHeadersNonVersionedBucket(t *testing.T) {
bucketName := getNewBucketName()
// Create regular bucket without object lock/versioning
createBucket(t, client, bucketName)
createBucketWithoutObjectLock(t, client, bucketName)
defer deleteBucket(t, client, bucketName)
key := "test-non-versioned"

13
test/s3/retention/s3_retention_test.go

@ -69,8 +69,19 @@ func getNewBucketName() string {
return fmt.Sprintf("%s%d", defaultConfig.BucketPrefix, timestamp)
}
// createBucket creates a new bucket for testing
// createBucket creates a new bucket for testing with Object Lock enabled
// Object Lock is required for retention and legal hold functionality per AWS S3 specification
func createBucket(t *testing.T, client *s3.Client, bucketName string) {
_, err := client.CreateBucket(context.TODO(), &s3.CreateBucketInput{
Bucket: aws.String(bucketName),
ObjectLockEnabledForBucket: aws.Bool(true),
})
require.NoError(t, err)
}
// createBucketWithoutObjectLock creates a new bucket without Object Lock enabled
// Use this only for tests that specifically need to verify non-Object-Lock bucket behavior
func createBucketWithoutObjectLock(t *testing.T, client *s3.Client, bucketName string) {
_, err := client.CreateBucket(context.TODO(), &s3.CreateBucketInput{
Bucket: aws.String(bucketName),
})

3
test/s3/tagging/Makefile

@ -77,7 +77,7 @@ start-server: check-deps
@echo "🔍 DEBUG: Creating volume directory..."
@mkdir -p ./test-volume-data
@echo "🔍 DEBUG: Launching SeaweedFS server in background..."
@echo "🔍 DEBUG: Command: $(WEED_BINARY) server -filer -filer.maxMB=64 -s3 -ip.bind 0.0.0.0 -dir=./test-volume-data -master.raftHashicorp -master.electionTimeout 1s -master.volumeSizeLimitMB=100 -volume.max=100 -volume.preStopSeconds=1 -master.port=$(MASTER_PORT) -volume.port=$(VOLUME_PORT) -filer.port=$(FILER_PORT) -s3.port=$(S3_PORT) -metricsPort=9329 -s3.allowEmptyFolder=false -s3.allowDeleteBucketNotEmpty=true -s3.config=../../../docker/compose/s3.json -master.peers=none"
@echo "🔍 DEBUG: Command: $(WEED_BINARY) server -filer -filer.maxMB=64 -s3 -ip.bind 0.0.0.0 -dir=./test-volume-data -master.raftHashicorp -master.electionTimeout 1s -master.volumeSizeLimitMB=100 -volume.max=100 -volume.preStopSeconds=1 -master.port=$(MASTER_PORT) -volume.port=$(VOLUME_PORT) -filer.port=$(FILER_PORT) -s3.port=$(S3_PORT) -metricsPort=9329 -s3.allowDeleteBucketNotEmpty=true -s3.config=../../../docker/compose/s3.json -master.peers=none"
@$(WEED_BINARY) server \
-filer \
-filer.maxMB=64 \
@ -94,7 +94,6 @@ start-server: check-deps
-filer.port=$(FILER_PORT) \
-s3.port=$(S3_PORT) \
-metricsPort=9329 \
-s3.allowEmptyFolder=false \
-s3.allowDeleteBucketNotEmpty=true \
-s3.config=../../../docker/compose/s3.json \
-master.peers=none \

7
test/s3/versioning/Makefile

@ -81,12 +81,11 @@ start-server: check-deps
@echo "🔍 DEBUG: Creating volume directory..."
@mkdir -p ./test-volume-data
@echo "🔍 DEBUG: Launching SeaweedFS server in background..."
@echo "🔍 DEBUG: Command: $(WEED_BINARY) server -debug -s3 -s3.port=$(S3_PORT) -s3.allowEmptyFolder=false -s3.allowDeleteBucketNotEmpty=true -s3.config=../../../docker/compose/s3.json -filer -filer.maxMB=64 -master.volumeSizeLimitMB=50 -volume.max=100 -dir=./test-volume-data -volume.preStopSeconds=1 -metricsPort=9324"
@echo "🔍 DEBUG: Command: $(WEED_BINARY) server -debug -s3 -s3.port=$(S3_PORT) -s3.allowDeleteBucketNotEmpty=true -s3.config=../../../docker/compose/s3.json -filer -filer.maxMB=64 -master.volumeSizeLimitMB=50 -volume.max=100 -dir=./test-volume-data -volume.preStopSeconds=1 -metricsPort=9324"
@$(WEED_BINARY) server \
-debug \
-s3 \
-s3.port=$(S3_PORT) \
-s3.allowEmptyFolder=false \
-s3.allowDeleteBucketNotEmpty=true \
-s3.config=../../../docker/compose/s3.json \
-filer \
@ -222,7 +221,7 @@ test-with-server: start-server
test-versioning-with-configs: check-deps
@echo "Testing with different S3 configurations..."
@echo "Testing with empty folder allowed..."
@$(WEED_BINARY) server -s3 -s3.port=$(S3_PORT) -s3.allowEmptyFolder=true -filer -master.volumeSizeLimitMB=100 -volume.max=100 > weed-test-config1.log 2>&1 & echo $$! > weed-config1.pid
@$(WEED_BINARY) server -s3 -s3.port=$(S3_PORT) -filer -master.volumeSizeLimitMB=100 -volume.max=100 > weed-test-config1.log 2>&1 & echo $$! > weed-config1.pid
@sleep 5
@go test -v -timeout=5m -run "TestVersioningBasicWorkflow" . || true
@if [ -f weed-config1.pid ]; then kill -TERM $$(cat weed-config1.pid) 2>/dev/null || true; rm -f weed-config1.pid; fi
@ -268,7 +267,6 @@ debug-server:
-debug \
-s3 \
-s3.port=$(S3_PORT) \
-s3.allowEmptyFolder=false \
-s3.allowDeleteBucketNotEmpty=true \
-s3.config=../../../docker/compose/s3.json \
-filer \
@ -317,7 +315,6 @@ start-server-simple: check-deps
-debug \
-s3 \
-s3.port=$(S3_PORT) \
-s3.allowEmptyFolder=false \
-s3.allowDeleteBucketNotEmpty=true \
-s3.config=../../../docker/compose/s3.json \
-filer \

41
test/sftp/Makefile

@ -0,0 +1,41 @@
.PHONY: all build test test-verbose test-short test-homedir test-debug clean deps tidy
all: build test
# Build the weed binary first
build:
cd ../../weed && go build -o weed .
# Install test dependencies
deps:
go mod download
# Run all tests
test: build deps
go test -timeout 5m ./...
# Run tests with verbose output
test-verbose: build deps
go test -v -timeout 5m ./...
# Run quick tests only (skip integration tests)
test-short: deps
go test -short -v ./...
# Run specific test
test-homedir: build deps
go test -v -timeout 5m -run TestHomeDirPathTranslation ./...
# Run tests with debug output from SeaweedFS
test-debug: build deps
go test -v -timeout 5m ./... 2>&1 | tee test.log
# Clean up test artifacts
clean:
rm -f test.log
go clean -testcache
# Update go.sum
tidy:
go mod tidy

92
test/sftp/README.md

@ -0,0 +1,92 @@
# SeaweedFS SFTP Integration Tests
This directory contains integration tests for the SeaweedFS SFTP server.
## Prerequisites
1. Build the SeaweedFS binary:
```bash
cd ../../weed
go build -o weed .
```
2. Ensure `ssh-keygen` is available (for generating test SSH host keys)
## Running Tests
### Run all tests
```bash
make test
```
### Run tests with verbose output
```bash
make test-verbose
```
### Run a specific test
```bash
go test -v -run TestHomeDirPathTranslation
```
### Skip long-running tests
```bash
go test -short ./...
```
## Test Structure
- `framework.go` - Test framework that starts SeaweedFS cluster with SFTP
- `basic_test.go` - Basic SFTP operation tests including:
- HomeDir path translation (fixes issue #7470)
- File upload/download
- Directory operations
- Large file handling
- Edge cases
## Test Configuration
Tests use `testdata/userstore.json` which defines test users:
| Username | Password | HomeDir | Permissions |
|----------|----------|---------|-------------|
| admin | adminpassword | / | Full access |
| testuser | testuserpassword | /sftp/testuser | Full access to home |
| readonly | readonlypassword | /public | Read-only |
## Key Tests
### TestHomeDirPathTranslation
Tests the fix for [issue #7470](https://github.com/seaweedfs/seaweedfs/issues/7470) where
users with a non-root HomeDir (e.g., `/sftp/testuser`) could not upload files to `/`
because the path wasn't being translated to their home directory.
The test verifies:
- Uploading to `/` correctly maps to the user's HomeDir
- Creating directories at `/` works
- Listing `/` shows the user's home directory contents
- All path operations respect the HomeDir translation
## Debugging
To debug test failures:
1. Enable verbose output:
```bash
go test -v -run TestName
```
2. Keep test artifacts (don't cleanup):
```go
config := DefaultTestConfig()
config.SkipCleanup = true
```
3. Enable debug logging:
```go
config := DefaultTestConfig()
config.EnableDebug = true
```

652
test/sftp/basic_test.go

@ -0,0 +1,652 @@
package sftp
import (
"bytes"
"io"
"path"
"testing"
"github.com/stretchr/testify/require"
)
// TestHomeDirPathTranslation tests that SFTP operations correctly translate
// paths relative to the user's HomeDir.
// This is the fix for https://github.com/seaweedfs/seaweedfs/issues/7470
func TestHomeDirPathTranslation(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
config := DefaultTestConfig()
config.EnableDebug = testing.Verbose()
fw := NewSftpTestFramework(t, config)
err := fw.Setup(config)
require.NoError(t, err, "failed to setup test framework")
defer fw.Cleanup()
// Test with user "testuser" who has HomeDir="/sftp/testuser"
// When they upload to "/", it should actually go to "/sftp/testuser"
sftpClient, sshConn, err := fw.ConnectSFTP("testuser", "testuserpassword")
require.NoError(t, err, "failed to connect as testuser")
defer sshConn.Close()
defer sftpClient.Close()
// Test 1: Upload file to "/" (should map to /sftp/testuser/)
t.Run("UploadToRoot", func(t *testing.T) {
testContent := []byte("Hello from SFTP test!")
filename := "test_upload.txt"
// Create file at "/" from user's perspective
file, err := sftpClient.Create("/" + filename)
require.NoError(t, err, "should be able to create file at /")
_, err = file.Write(testContent)
require.NoError(t, err, "should be able to write to file")
err = file.Close()
require.NoError(t, err, "should be able to close file")
// Verify file exists and has correct content
readFile, err := sftpClient.Open("/" + filename)
require.NoError(t, err, "should be able to open file")
defer readFile.Close()
content, err := io.ReadAll(readFile)
require.NoError(t, err, "should be able to read file")
require.Equal(t, testContent, content, "file content should match")
// Clean up
err = sftpClient.Remove("/" + filename)
require.NoError(t, err, "should be able to remove file")
})
// Test 2: Create directory at "/" (should map to /sftp/testuser/)
t.Run("CreateDirAtRoot", func(t *testing.T) {
dirname := "test_dir"
err := sftpClient.Mkdir("/" + dirname)
require.NoError(t, err, "should be able to create directory at /")
// Verify directory exists
info, err := sftpClient.Stat("/" + dirname)
require.NoError(t, err, "should be able to stat directory")
require.True(t, info.IsDir(), "should be a directory")
// Clean up
err = sftpClient.RemoveDirectory("/" + dirname)
require.NoError(t, err, "should be able to remove directory")
})
// Test 3: List directory at "/" (should list /sftp/testuser/)
t.Run("ListRoot", func(t *testing.T) {
// Create a test file first
testContent := []byte("list test content")
filename := "list_test.txt"
file, err := sftpClient.Create("/" + filename)
require.NoError(t, err)
_, err = file.Write(testContent)
require.NoError(t, err)
file.Close()
// List root directory
files, err := sftpClient.ReadDir("/")
require.NoError(t, err, "should be able to list root directory")
// Should find our test file
found := false
for _, f := range files {
if f.Name() == filename {
found = true
break
}
}
require.True(t, found, "should find test file in listing")
// Clean up
err = sftpClient.Remove("/" + filename)
require.NoError(t, err)
})
// Test 4: Nested directory operations
t.Run("NestedOperations", func(t *testing.T) {
// Create nested directory structure
err := sftpClient.MkdirAll("/nested/dir/structure")
require.NoError(t, err, "should be able to create nested directories")
// Create file in nested directory
testContent := []byte("nested file content")
file, err := sftpClient.Create("/nested/dir/structure/file.txt")
require.NoError(t, err)
_, err = file.Write(testContent)
require.NoError(t, err)
file.Close()
// Verify file exists
readFile, err := sftpClient.Open("/nested/dir/structure/file.txt")
require.NoError(t, err)
content, err := io.ReadAll(readFile)
require.NoError(t, err)
readFile.Close()
require.Equal(t, testContent, content)
// Clean up
err = sftpClient.Remove("/nested/dir/structure/file.txt")
require.NoError(t, err)
err = sftpClient.RemoveDirectory("/nested/dir/structure")
require.NoError(t, err)
err = sftpClient.RemoveDirectory("/nested/dir")
require.NoError(t, err)
err = sftpClient.RemoveDirectory("/nested")
require.NoError(t, err)
})
// Test 5: Rename operation
t.Run("RenameFile", func(t *testing.T) {
testContent := []byte("rename test content")
file, err := sftpClient.Create("/original.txt")
require.NoError(t, err)
_, err = file.Write(testContent)
require.NoError(t, err)
file.Close()
// Rename file
err = sftpClient.Rename("/original.txt", "/renamed.txt")
require.NoError(t, err, "should be able to rename file")
// Verify old file doesn't exist
_, err = sftpClient.Stat("/original.txt")
require.Error(t, err, "original file should not exist")
// Verify new file exists with correct content
readFile, err := sftpClient.Open("/renamed.txt")
require.NoError(t, err, "renamed file should exist")
content, err := io.ReadAll(readFile)
require.NoError(t, err)
readFile.Close()
require.Equal(t, testContent, content)
// Clean up
err = sftpClient.Remove("/renamed.txt")
require.NoError(t, err)
})
}
// TestAdminRootAccess tests that admin user with HomeDir="/" can access everything
func TestAdminRootAccess(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
config := DefaultTestConfig()
config.EnableDebug = testing.Verbose()
fw := NewSftpTestFramework(t, config)
err := fw.Setup(config)
require.NoError(t, err, "failed to setup test framework")
defer fw.Cleanup()
// Connect as admin with HomeDir="/"
sftpClient, sshConn, err := fw.ConnectSFTP("admin", "adminpassword")
require.NoError(t, err, "failed to connect as admin")
defer sshConn.Close()
defer sftpClient.Close()
// Admin should be able to create directories anywhere
t.Run("CreateAnyDirectory", func(t *testing.T) {
// Create the user's home directory structure
err := sftpClient.MkdirAll("/sftp/testuser")
require.NoError(t, err, "admin should be able to create any directory")
// Create file in that directory
testContent := []byte("admin created this")
file, err := sftpClient.Create("/sftp/testuser/admin_file.txt")
require.NoError(t, err)
_, err = file.Write(testContent)
require.NoError(t, err)
file.Close()
// Verify file exists
info, err := sftpClient.Stat("/sftp/testuser/admin_file.txt")
require.NoError(t, err)
require.False(t, info.IsDir())
// Clean up
err = sftpClient.Remove("/sftp/testuser/admin_file.txt")
require.NoError(t, err)
})
}
// TestLargeFileUpload tests uploading larger files through SFTP
func TestLargeFileUpload(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
config := DefaultTestConfig()
config.EnableDebug = testing.Verbose()
fw := NewSftpTestFramework(t, config)
err := fw.Setup(config)
require.NoError(t, err, "failed to setup test framework")
defer fw.Cleanup()
sftpClient, sshConn, err := fw.ConnectSFTP("testuser", "testuserpassword")
require.NoError(t, err, "failed to connect as testuser")
defer sshConn.Close()
defer sftpClient.Close()
// Create a 1MB file
t.Run("Upload1MB", func(t *testing.T) {
size := 1024 * 1024 // 1MB
testData := bytes.Repeat([]byte("A"), size)
file, err := sftpClient.Create("/large_file.bin")
require.NoError(t, err)
n, err := file.Write(testData)
require.NoError(t, err)
require.Equal(t, size, n)
file.Close()
// Verify file size
info, err := sftpClient.Stat("/large_file.bin")
require.NoError(t, err)
require.Equal(t, int64(size), info.Size())
// Verify content
readFile, err := sftpClient.Open("/large_file.bin")
require.NoError(t, err)
content, err := io.ReadAll(readFile)
require.NoError(t, err)
readFile.Close()
require.Equal(t, testData, content)
// Clean up
err = sftpClient.Remove("/large_file.bin")
require.NoError(t, err)
})
}
// TestStatOperations tests Stat and Lstat operations
func TestStatOperations(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
config := DefaultTestConfig()
config.EnableDebug = testing.Verbose()
fw := NewSftpTestFramework(t, config)
err := fw.Setup(config)
require.NoError(t, err, "failed to setup test framework")
defer fw.Cleanup()
sftpClient, sshConn, err := fw.ConnectSFTP("testuser", "testuserpassword")
require.NoError(t, err, "failed to connect as testuser")
defer sshConn.Close()
defer sftpClient.Close()
// Create a test file
testContent := []byte("stat test content")
file, err := sftpClient.Create("/stat_test.txt")
require.NoError(t, err)
_, err = file.Write(testContent)
require.NoError(t, err)
file.Close()
t.Run("StatFile", func(t *testing.T) {
info, err := sftpClient.Stat("/stat_test.txt")
require.NoError(t, err)
require.Equal(t, "stat_test.txt", info.Name())
require.Equal(t, int64(len(testContent)), info.Size())
require.False(t, info.IsDir())
})
t.Run("StatDirectory", func(t *testing.T) {
err := sftpClient.Mkdir("/stat_dir")
require.NoError(t, err)
info, err := sftpClient.Stat("/stat_dir")
require.NoError(t, err)
require.Equal(t, "stat_dir", info.Name())
require.True(t, info.IsDir())
// Clean up
err = sftpClient.RemoveDirectory("/stat_dir")
require.NoError(t, err)
})
t.Run("StatRoot", func(t *testing.T) {
// Should be able to stat "/" which maps to user's home directory
info, err := sftpClient.Stat("/")
require.NoError(t, err, "should be able to stat root (home) directory")
require.True(t, info.IsDir(), "root should be a directory")
})
// Clean up
err = sftpClient.Remove("/stat_test.txt")
require.NoError(t, err)
}
// TestWalk tests walking directory trees
func TestWalk(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
config := DefaultTestConfig()
config.EnableDebug = testing.Verbose()
fw := NewSftpTestFramework(t, config)
err := fw.Setup(config)
require.NoError(t, err, "failed to setup test framework")
defer fw.Cleanup()
sftpClient, sshConn, err := fw.ConnectSFTP("testuser", "testuserpassword")
require.NoError(t, err, "failed to connect as testuser")
defer sshConn.Close()
defer sftpClient.Close()
// Create directory structure
err = sftpClient.MkdirAll("/walk/a/b")
require.NoError(t, err)
err = sftpClient.MkdirAll("/walk/c")
require.NoError(t, err)
// Create files
for _, p := range []string{"/walk/file1.txt", "/walk/a/file2.txt", "/walk/a/b/file3.txt", "/walk/c/file4.txt"} {
file, err := sftpClient.Create(p)
require.NoError(t, err)
file.Write([]byte("test"))
file.Close()
}
t.Run("WalkEntireTree", func(t *testing.T) {
var paths []string
walker := sftpClient.Walk("/walk")
for walker.Step() {
if walker.Err() != nil {
continue
}
paths = append(paths, walker.Path())
}
// Should find all directories and files
require.Contains(t, paths, "/walk")
require.Contains(t, paths, "/walk/a")
require.Contains(t, paths, "/walk/a/b")
require.Contains(t, paths, "/walk/c")
})
// Clean up
for _, p := range []string{"/walk/file1.txt", "/walk/a/file2.txt", "/walk/a/b/file3.txt", "/walk/c/file4.txt"} {
require.NoError(t, sftpClient.Remove(p))
}
for _, p := range []string{"/walk/a/b", "/walk/a", "/walk/c", "/walk"} {
require.NoError(t, sftpClient.RemoveDirectory(p))
}
}
// TestCurrentWorkingDirectory tests that Getwd and Chdir work correctly
func TestCurrentWorkingDirectory(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
config := DefaultTestConfig()
config.EnableDebug = testing.Verbose()
fw := NewSftpTestFramework(t, config)
err := fw.Setup(config)
require.NoError(t, err, "failed to setup test framework")
defer fw.Cleanup()
sftpClient, sshConn, err := fw.ConnectSFTP("testuser", "testuserpassword")
require.NoError(t, err, "failed to connect as testuser")
defer sshConn.Close()
defer sftpClient.Close()
// Create test directory
err = sftpClient.Mkdir("/cwd_test")
require.NoError(t, err)
t.Run("GetCurrentDir", func(t *testing.T) {
cwd, err := sftpClient.Getwd()
require.NoError(t, err)
// The initial working directory should be the user's home directory
// which from the user's perspective is "/"
require.Equal(t, "/", cwd, "initial working directory should be the virtual root")
})
t.Run("ChangeAndCreate", func(t *testing.T) {
// Create file in subdirectory using relative path after chdir
// Note: pkg/sftp doesn't support Chdir, so we test using absolute paths
file, err := sftpClient.Create("/cwd_test/relative_file.txt")
require.NoError(t, err)
file.Write([]byte("test"))
file.Close()
// Verify using absolute path
_, err = sftpClient.Stat("/cwd_test/relative_file.txt")
require.NoError(t, err)
// Clean up
sftpClient.Remove("/cwd_test/relative_file.txt")
})
// Clean up
err = sftpClient.RemoveDirectory("/cwd_test")
require.NoError(t, err)
}
// TestPathEdgeCases tests various edge cases in path handling
func TestPathEdgeCases(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
config := DefaultTestConfig()
config.EnableDebug = testing.Verbose()
fw := NewSftpTestFramework(t, config)
err := fw.Setup(config)
require.NoError(t, err, "failed to setup test framework")
defer fw.Cleanup()
sftpClient, sshConn, err := fw.ConnectSFTP("testuser", "testuserpassword")
require.NoError(t, err, "failed to connect as testuser")
defer sshConn.Close()
defer sftpClient.Close()
t.Run("PathWithDotDot", func(t *testing.T) {
// Create directory structure
err := sftpClient.MkdirAll("/edge/subdir")
require.NoError(t, err)
// Create file using path with ..
file, err := sftpClient.Create("/edge/subdir/../file.txt")
require.NoError(t, err)
file.Write([]byte("test"))
file.Close()
// Verify file was created in /edge
_, err = sftpClient.Stat("/edge/file.txt")
require.NoError(t, err, "file should be created in parent directory")
// Clean up
sftpClient.Remove("/edge/file.txt")
sftpClient.RemoveDirectory("/edge/subdir")
sftpClient.RemoveDirectory("/edge")
})
t.Run("PathWithTrailingSlash", func(t *testing.T) {
err := sftpClient.Mkdir("/trailing")
require.NoError(t, err)
// Stat with trailing slash
info, err := sftpClient.Stat("/trailing/")
require.NoError(t, err)
require.True(t, info.IsDir())
// Clean up
sftpClient.RemoveDirectory("/trailing")
})
t.Run("CreateFileAtRootPath", func(t *testing.T) {
// This is the exact scenario from issue #7470
// User with HomeDir="/sftp/testuser" uploads to "/"
file, err := sftpClient.Create("/issue7470.txt")
require.NoError(t, err, "should be able to create file at / (issue #7470)")
file.Write([]byte("This tests the fix for issue #7470"))
file.Close()
// Verify
_, err = sftpClient.Stat("/issue7470.txt")
require.NoError(t, err)
// Clean up
sftpClient.Remove("/issue7470.txt")
})
// Security test: path traversal attacks should be blocked
t.Run("PathTraversalPrevention", func(t *testing.T) {
// User's HomeDir is "/sftp/testuser"
// Attempting to escape via "../.." should NOT create files outside home directory
// First, create a valid file to ensure we can write
validFile, err := sftpClient.Create("/valid.txt")
require.NoError(t, err)
validFile.Write([]byte("valid"))
validFile.Close()
// Try various path traversal attempts
// These should either:
// 1. Be blocked (error returned), OR
// 2. Be safely resolved to stay within home directory
traversalPaths := []string{
"/../escape.txt",
"/../../escape.txt",
"/../../../escape.txt",
"/subdir/../../escape.txt",
"/./../../escape.txt",
}
for _, traversalPath := range traversalPaths {
t.Run(traversalPath, func(t *testing.T) {
// Note: The pkg/sftp client sanitizes paths locally before sending them to the server.
// So "/../escape.txt" becomes "/escape.txt" on the wire.
// Therefore, we cannot trigger the server-side path traversal block with this client.
// Instead, we verify that the file is created successfully within the jail (contained).
// The server-side protection logic is verified in unit tests (sftpd/sftp_server_test.go).
file, err := sftpClient.Create(traversalPath)
require.NoError(t, err, "creation should succeed because client sanitizes path")
file.Close()
// Clean up
err = sftpClient.Remove(traversalPath)
require.NoError(t, err)
})
}
// Clean up
sftpClient.Remove("/valid.txt")
})
}
// TestFileContent tests reading and writing file content correctly
func TestFileContent(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
config := DefaultTestConfig()
config.EnableDebug = testing.Verbose()
fw := NewSftpTestFramework(t, config)
err := fw.Setup(config)
require.NoError(t, err, "failed to setup test framework")
defer fw.Cleanup()
sftpClient, sshConn, err := fw.ConnectSFTP("testuser", "testuserpassword")
require.NoError(t, err, "failed to connect as testuser")
defer sshConn.Close()
defer sftpClient.Close()
t.Run("BinaryContent", func(t *testing.T) {
// Create binary data with all byte values
data := make([]byte, 256)
for i := 0; i < 256; i++ {
data[i] = byte(i)
}
file, err := sftpClient.Create("/binary.bin")
require.NoError(t, err)
n, err := file.Write(data)
require.NoError(t, err)
require.Equal(t, 256, n)
file.Close()
// Read back
readFile, err := sftpClient.Open("/binary.bin")
require.NoError(t, err)
content, err := io.ReadAll(readFile)
require.NoError(t, err)
readFile.Close()
require.Equal(t, data, content, "binary content should match")
// Clean up
sftpClient.Remove("/binary.bin")
})
t.Run("EmptyFile", func(t *testing.T) {
file, err := sftpClient.Create("/empty.txt")
require.NoError(t, err)
file.Close()
info, err := sftpClient.Stat("/empty.txt")
require.NoError(t, err)
require.Equal(t, int64(0), info.Size())
// Clean up
sftpClient.Remove("/empty.txt")
})
t.Run("UnicodeFilename", func(t *testing.T) {
filename := "/文件名.txt"
content := []byte("Unicode content: 你好世界")
file, err := sftpClient.Create(filename)
require.NoError(t, err)
file.Write(content)
file.Close()
// Read back
readFile, err := sftpClient.Open(filename)
require.NoError(t, err)
readContent, err := io.ReadAll(readFile)
require.NoError(t, err)
readFile.Close()
require.Equal(t, content, readContent)
// Verify in listing
files, err := sftpClient.ReadDir("/")
require.NoError(t, err)
found := false
for _, f := range files {
if f.Name() == path.Base(filename) {
found = true
break
}
}
require.True(t, found, "should find unicode filename in listing")
// Clean up
sftpClient.Remove(filename)
})
}

423
test/sftp/framework.go

@ -0,0 +1,423 @@
package sftp
import (
"fmt"
"net"
"os"
"os/exec"
"path/filepath"
"runtime"
"syscall"
"testing"
"time"
"github.com/pkg/sftp"
"github.com/stretchr/testify/require"
"golang.org/x/crypto/ssh"
)
// SftpTestFramework provides utilities for SFTP integration testing
type SftpTestFramework struct {
t *testing.T
tempDir string
dataDir string
masterProcess *os.Process
volumeProcess *os.Process
filerProcess *os.Process
sftpProcess *os.Process
masterAddr string
volumeAddr string
filerAddr string
sftpAddr string
weedBinary string
userStoreFile string
hostKeyFile string
isSetup bool
skipCleanup bool
}
// TestConfig holds configuration for SFTP tests
type TestConfig struct {
NumVolumes int
EnableDebug bool
SkipCleanup bool // for debugging failed tests
UserStoreFile string
}
// DefaultTestConfig returns a default configuration for SFTP tests
func DefaultTestConfig() *TestConfig {
return &TestConfig{
NumVolumes: 3,
EnableDebug: false,
SkipCleanup: false,
UserStoreFile: "",
}
}
// NewSftpTestFramework creates a new SFTP testing framework
func NewSftpTestFramework(t *testing.T, config *TestConfig) *SftpTestFramework {
if config == nil {
config = DefaultTestConfig()
}
tempDir, err := os.MkdirTemp("", "seaweedfs_sftp_test_")
require.NoError(t, err)
// Generate SSH host key for SFTP server
hostKeyFile := filepath.Join(tempDir, "ssh_host_key")
cmd := exec.Command("ssh-keygen", "-t", "ed25519", "-f", hostKeyFile, "-N", "")
err = cmd.Run()
require.NoError(t, err, "failed to generate SSH host key")
// Use provided userstore or copy the test one
userStoreFile := config.UserStoreFile
if userStoreFile == "" {
// Copy test userstore to temp dir
userStoreFile = filepath.Join(tempDir, "userstore.json")
testDataPath := findTestDataPath()
input, err := os.ReadFile(filepath.Join(testDataPath, "userstore.json"))
require.NoError(t, err, "failed to read test userstore.json")
err = os.WriteFile(userStoreFile, input, 0644)
require.NoError(t, err, "failed to write userstore.json")
}
return &SftpTestFramework{
t: t,
tempDir: tempDir,
dataDir: filepath.Join(tempDir, "data"),
masterAddr: "127.0.0.1:19333",
volumeAddr: "127.0.0.1:18080",
filerAddr: "127.0.0.1:18888",
sftpAddr: "127.0.0.1:12022",
weedBinary: findWeedBinary(),
userStoreFile: userStoreFile,
hostKeyFile: hostKeyFile,
isSetup: false,
}
}
// Setup starts SeaweedFS cluster with SFTP server
func (f *SftpTestFramework) Setup(config *TestConfig) error {
if f.isSetup {
return fmt.Errorf("framework already setup")
}
// Create all data directories
dirs := []string{
f.dataDir,
filepath.Join(f.dataDir, "master"),
filepath.Join(f.dataDir, "volume"),
}
for _, dir := range dirs {
if err := os.MkdirAll(dir, 0755); err != nil {
return fmt.Errorf("failed to create directory %s: %v", dir, err)
}
}
// Start master
if err := f.startMaster(config); err != nil {
return fmt.Errorf("failed to start master: %v", err)
}
// Wait for master to be ready
if err := f.waitForService(f.masterAddr, 30*time.Second); err != nil {
return fmt.Errorf("master not ready: %v", err)
}
// Start volume server
if err := f.startVolumeServer(config); err != nil {
return fmt.Errorf("failed to start volume server: %v", err)
}
// Wait for volume server to be ready
if err := f.waitForService(f.volumeAddr, 30*time.Second); err != nil {
return fmt.Errorf("volume server not ready: %v", err)
}
// Start filer
if err := f.startFiler(config); err != nil {
return fmt.Errorf("failed to start filer: %v", err)
}
// Wait for filer to be ready
if err := f.waitForService(f.filerAddr, 30*time.Second); err != nil {
return fmt.Errorf("filer not ready: %v", err)
}
// Start SFTP server
if err := f.startSftpServer(config); err != nil {
return fmt.Errorf("failed to start SFTP server: %v", err)
}
// Wait for SFTP server to be ready
if err := f.waitForService(f.sftpAddr, 30*time.Second); err != nil {
return fmt.Errorf("SFTP server not ready: %v", err)
}
// Additional wait for all services to stabilize (gRPC endpoints)
time.Sleep(500 * time.Millisecond)
f.skipCleanup = config.SkipCleanup
f.isSetup = true
return nil
}
// Cleanup stops all processes and removes temporary files
func (f *SftpTestFramework) Cleanup() {
// Stop processes in reverse order
processes := []*os.Process{f.sftpProcess, f.filerProcess, f.volumeProcess, f.masterProcess}
for _, proc := range processes {
if proc != nil {
proc.Signal(syscall.SIGTERM)
proc.Wait()
}
}
// Remove temp directory
if !f.skipCleanup {
os.RemoveAll(f.tempDir)
}
}
// GetSftpAddr returns the SFTP server address
func (f *SftpTestFramework) GetSftpAddr() string {
return f.sftpAddr
}
// GetFilerAddr returns the filer address
func (f *SftpTestFramework) GetFilerAddr() string {
return f.filerAddr
}
// ConnectSFTP creates an SFTP client connection with the given credentials
func (f *SftpTestFramework) ConnectSFTP(username, password string) (*sftp.Client, *ssh.Client, error) {
// Load the known host public key for verification
hostKeyCallback, err := f.getHostKeyCallback()
if err != nil {
return nil, nil, fmt.Errorf("failed to get host key callback: %v", err)
}
config := &ssh.ClientConfig{
User: username,
Auth: []ssh.AuthMethod{
ssh.Password(password),
},
HostKeyCallback: hostKeyCallback,
Timeout: 5 * time.Second,
}
sshConn, err := ssh.Dial("tcp", f.sftpAddr, config)
if err != nil {
return nil, nil, fmt.Errorf("failed to connect SSH: %v", err)
}
sftpClient, err := sftp.NewClient(sshConn)
if err != nil {
sshConn.Close()
return nil, nil, fmt.Errorf("failed to create SFTP client: %v", err)
}
return sftpClient, sshConn, nil
}
// getHostKeyCallback returns a callback that verifies the server's host key
// matches the known test server key we generated
func (f *SftpTestFramework) getHostKeyCallback() (ssh.HostKeyCallback, error) {
// Read the public key file generated alongside the private key
pubKeyFile := f.hostKeyFile + ".pub"
pubKeyBytes, err := os.ReadFile(pubKeyFile)
if err != nil {
return nil, fmt.Errorf("failed to read host public key: %v", err)
}
// Parse the public key
pubKey, _, _, _, err := ssh.ParseAuthorizedKey(pubKeyBytes)
if err != nil {
return nil, fmt.Errorf("failed to parse host public key: %v", err)
}
// Return a callback that verifies the server key matches our known key
return ssh.FixedHostKey(pubKey), nil
}
// startMaster starts the SeaweedFS master server
func (f *SftpTestFramework) startMaster(config *TestConfig) error {
args := []string{
"master",
"-ip=127.0.0.1",
"-port=19333",
"-mdir=" + filepath.Join(f.dataDir, "master"),
"-raftBootstrap",
"-peers=none",
}
cmd := exec.Command(f.weedBinary, args...)
cmd.Dir = f.tempDir
if config.EnableDebug {
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
}
if err := cmd.Start(); err != nil {
return err
}
f.masterProcess = cmd.Process
return nil
}
// startVolumeServer starts SeaweedFS volume server
func (f *SftpTestFramework) startVolumeServer(config *TestConfig) error {
args := []string{
"volume",
"-mserver=" + f.masterAddr,
"-ip=127.0.0.1",
"-port=18080",
"-dir=" + filepath.Join(f.dataDir, "volume"),
fmt.Sprintf("-max=%d", config.NumVolumes),
}
cmd := exec.Command(f.weedBinary, args...)
cmd.Dir = f.tempDir
if config.EnableDebug {
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
}
if err := cmd.Start(); err != nil {
return err
}
f.volumeProcess = cmd.Process
return nil
}
// startFiler starts the SeaweedFS filer server
func (f *SftpTestFramework) startFiler(config *TestConfig) error {
args := []string{
"filer",
"-master=" + f.masterAddr,
"-ip=127.0.0.1",
"-port=18888",
}
cmd := exec.Command(f.weedBinary, args...)
cmd.Dir = f.tempDir
if config.EnableDebug {
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
}
if err := cmd.Start(); err != nil {
return err
}
f.filerProcess = cmd.Process
return nil
}
// startSftpServer starts the SeaweedFS SFTP server
func (f *SftpTestFramework) startSftpServer(config *TestConfig) error {
args := []string{
"sftp",
"-filer=" + f.filerAddr,
"-ip.bind=127.0.0.1",
"-port=12022",
"-sshPrivateKey=" + f.hostKeyFile,
"-userStoreFile=" + f.userStoreFile,
}
cmd := exec.Command(f.weedBinary, args...)
cmd.Dir = f.tempDir
if config.EnableDebug {
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
}
if err := cmd.Start(); err != nil {
return err
}
f.sftpProcess = cmd.Process
return nil
}
// waitForService waits for a service to be available
func (f *SftpTestFramework) waitForService(addr string, timeout time.Duration) error {
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
conn, err := net.DialTimeout("tcp", addr, 1*time.Second)
if err == nil {
conn.Close()
return nil
}
time.Sleep(100 * time.Millisecond)
}
return fmt.Errorf("service at %s not ready within timeout", addr)
}
// findWeedBinary locates the weed binary
// Prefers local build over system-installed weed to ensure we test the latest code
func findWeedBinary() string {
// Get the directory where this source file is located
// This ensures we find the locally built weed binary first
_, thisFile, _, ok := runtime.Caller(0)
if ok {
thisDir := filepath.Dir(thisFile)
// From test/sftp/, the weed binary should be at ../../weed/weed
candidates := []string{
filepath.Join(thisDir, "../../weed/weed"),
filepath.Join(thisDir, "../weed/weed"),
}
for _, candidate := range candidates {
if _, err := os.Stat(candidate); err == nil {
abs, _ := filepath.Abs(candidate)
return abs
}
}
}
// Try relative paths from current working directory
cwd, _ := os.Getwd()
candidates := []string{
filepath.Join(cwd, "../../weed/weed"),
filepath.Join(cwd, "../weed/weed"),
filepath.Join(cwd, "./weed"),
}
for _, candidate := range candidates {
if _, err := os.Stat(candidate); err == nil {
abs, _ := filepath.Abs(candidate)
return abs
}
}
// Fallback to PATH only if local build not found
if path, err := exec.LookPath("weed"); err == nil {
return path
}
// Default fallback
return "weed"
}
// findTestDataPath locates the testdata directory
func findTestDataPath() string {
// Get the directory where this source file is located
_, thisFile, _, ok := runtime.Caller(0)
if ok {
thisDir := filepath.Dir(thisFile)
testDataPath := filepath.Join(thisDir, "testdata")
if _, err := os.Stat(testDataPath); err == nil {
return testDataPath
}
}
// Try relative paths from current working directory
cwd, _ := os.Getwd()
candidates := []string{
filepath.Join(cwd, "testdata"),
filepath.Join(cwd, "../sftp/testdata"),
filepath.Join(cwd, "test/sftp/testdata"),
}
for _, candidate := range candidates {
if _, err := os.Stat(candidate); err == nil {
return candidate
}
}
return "./testdata"
}

17
test/sftp/go.mod

@ -0,0 +1,17 @@
module seaweedfs-sftp-tests
go 1.24.0
require (
github.com/pkg/sftp v1.13.7
github.com/stretchr/testify v1.10.0
golang.org/x/crypto v0.45.0
)
require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/kr/fs v0.1.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
golang.org/x/sys v0.38.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

64
test/sftp/go.sum

@ -0,0 +1,64 @@
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/kr/fs v0.1.0 h1:Jskdu9ieNAYnjxsi0LbQp1ulIKZV1LAFgK1tWhpZgl8=
github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg=
github.com/pkg/sftp v1.13.7 h1:uv+I3nNJvlKZIQGSr8JVQLNHFU9YhhNpvC14Y6KgmSM=
github.com/pkg/sftp v1.13.7/go.mod h1:KMKI0t3T6hfA+lTR/ssZdunHo+uwq7ghoN09/FSu3DY=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.17.0/go.mod h1:gCAAfMLgwOJRpTjQ2zCCt2OcSfYMTeZVSRtQlPC7Nq4=
golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q=
golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
golang.org/x/term v0.15.0/go.mod h1:BDl952bC7+uMoWR75FIrCDx79TPU9oHkTZ9yRbYOrX0=
golang.org/x/term v0.37.0 h1:8EGAD0qCmHYZg6J17DvsMy9/wJ7/D/4pV/wfnld5lTU=
golang.org/x/term v0.37.0/go.mod h1:5pB4lxRNYYVZuTLmy8oR2BH8dflOR+IbTYFD8fi3254=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

37
test/sftp/testdata/userstore.json

@ -0,0 +1,37 @@
[
{
"Username": "admin",
"Password": "adminpassword",
"PublicKeys": [],
"HomeDir": "/",
"Permissions": {
"/": ["*"]
},
"Uid": 0,
"Gid": 0
},
{
"Username": "testuser",
"Password": "testuserpassword",
"PublicKeys": [],
"HomeDir": "/sftp/testuser",
"Permissions": {
"/sftp/testuser": ["*"]
},
"Uid": 1001,
"Gid": 1001
},
{
"Username": "readonly",
"Password": "readonlypassword",
"PublicKeys": [],
"HomeDir": "/public",
"Permissions": {
"/public": ["read", "list"]
},
"Uid": 1002,
"Gid": 1002
}
]

28
weed/admin/dash/admin_server.go

@ -99,28 +99,22 @@ func NewAdminServer(masters string, templateFS http.FileSystem, dataDir string)
// Continue without credential manager - will fall back to legacy approach
} else {
server.credentialManager = credentialManager
glog.V(0).Infof("Credential manager initialized with store type: %s", credentialManager.GetStore().GetName())
// For stores that need filer address function, set them
// For stores that need filer address function, configure them
if store := credentialManager.GetStore(); store != nil {
if filerFuncSetter, ok := store.(interface {
SetFilerAddressFunc(func() pb.ServerAddress, grpc.DialOption)
}); ok {
// Set up a goroutine to configure filer address function once we discover filers
go func() {
for {
filerAddr := server.GetFilerAddress()
if filerAddr != "" {
// Configure the function to dynamically return the current active filer (HA-aware)
filerFuncSetter.SetFilerAddressFunc(func() pb.ServerAddress {
return pb.ServerAddress(server.GetFilerAddress())
}, server.grpcDialOption)
glog.V(1).Infof("Set filer address function for credential manager: %s", filerAddr)
break
}
glog.V(1).Infof("Waiting for filer discovery for credential manager...")
time.Sleep(5 * time.Second)
}
}()
// Configure the filer address function to dynamically return the current active filer
// This function will be called each time credentials need to be loaded/saved,
// so it will automatically use whatever filer is currently available (HA-aware)
filerFuncSetter.SetFilerAddressFunc(func() pb.ServerAddress {
return pb.ServerAddress(server.GetFilerAddress())
}, server.grpcDialOption)
glog.V(0).Infof("Credential store configured with dynamic filer address function")
} else {
glog.V(0).Infof("Credential store %s does not support filer address function", store.GetName())
}
}
}

240
weed/admin/handlers/file_browser_handlers.go

@ -5,10 +5,12 @@ import (
"context"
"fmt"
"io"
"mime"
"mime/multipart"
"net"
"net/http"
"os"
"path"
"path/filepath"
"strconv"
"strings"
@ -20,15 +22,37 @@ import (
"github.com/seaweedfs/seaweedfs/weed/admin/view/layout"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
"github.com/seaweedfs/seaweedfs/weed/util/http/client"
)
type FileBrowserHandlers struct {
adminServer *dash.AdminServer
httpClient *client.HTTPClient
}
func NewFileBrowserHandlers(adminServer *dash.AdminServer) *FileBrowserHandlers {
// Create HTTP client with TLS support from https.client configuration
// The client is created without a timeout - each operation will set its own timeout
// If TLS is enabled but misconfigured, fail fast to alert the operator immediately
// rather than silently falling back to HTTP and causing confusing runtime errors
httpClient, err := client.NewHttpClient(client.Client)
if err != nil {
glog.Fatalf("Failed to create HTTPS client for file browser: %v", err)
}
return &FileBrowserHandlers{
adminServer: adminServer,
httpClient: httpClient,
}
}
// newClientWithTimeout creates a temporary http.Client with the specified timeout,
// reusing the TLS transport from the shared httpClient.
func (h *FileBrowserHandlers) newClientWithTimeout(timeout time.Duration) http.Client {
return http.Client{
Transport: h.httpClient.Client.Transport,
Timeout: timeout,
}
}
@ -245,8 +269,12 @@ func (h *FileBrowserHandlers) UploadFile(c *gin.Context) {
continue
}
// Create full path for the file
fullPath := filepath.Join(currentPath, fileName)
// Normalize Windows-style backslashes to forward slashes
fileName = util.CleanWindowsPath(fileName)
// Create full path for the file using path.Join for URL path semantics
// path.Join handles double slashes and is not OS-specific like filepath.Join
fullPath := path.Join(currentPath, fileName)
if !strings.HasPrefix(fullPath, "/") {
fullPath = "/" + fullPath
}
@ -327,8 +355,10 @@ func (h *FileBrowserHandlers) uploadFileToFiler(filePath string, fileHeader *mul
var body bytes.Buffer
writer := multipart.NewWriter(&body)
// Create form file field
part, err := writer.CreateFormFile("file", fileHeader.Filename)
// Create form file field with normalized base filename
// Use path.Base (not filepath.Base) since cleanFilePath uses URL path semantics
baseFileName := path.Base(cleanFilePath)
part, err := writer.CreateFormFile("file", baseFileName)
if err != nil {
return fmt.Errorf("failed to create form file: %w", err)
}
@ -345,8 +375,15 @@ func (h *FileBrowserHandlers) uploadFileToFiler(filePath string, fileHeader *mul
return fmt.Errorf("failed to close multipart writer: %w", err)
}
// Create the upload URL with validated components
uploadURL := fmt.Sprintf("http://%s%s", filerAddress, cleanFilePath)
// Create the upload URL - the httpClient will normalize to the correct scheme (http/https)
// based on the https.client configuration in security.toml
uploadURL := fmt.Sprintf("%s%s", filerAddress, cleanFilePath)
// Normalize the URL scheme based on TLS configuration
uploadURL, err = h.httpClient.NormalizeHttpScheme(uploadURL)
if err != nil {
return fmt.Errorf("failed to normalize URL scheme: %w", err)
}
// Create HTTP request
req, err := http.NewRequest("POST", uploadURL, &body)
@ -357,11 +394,11 @@ func (h *FileBrowserHandlers) uploadFileToFiler(filePath string, fileHeader *mul
// Set content type with boundary
req.Header.Set("Content-Type", writer.FormDataContentType())
// Send request
client := &http.Client{Timeout: 60 * time.Second} // Increased timeout for larger files
// Send request using TLS-aware HTTP client with 60s timeout for large file uploads
// lgtm[go/ssrf]
// Safe: filerAddress validated by validateFilerAddress() to match configured filer
// Safe: cleanFilePath validated and cleaned by validateAndCleanFilePath() to prevent path traversal
client := h.newClientWithTimeout(60 * time.Second)
resp, err := client.Do(req)
if err != nil {
return fmt.Errorf("failed to upload file: %w", err)
@ -423,8 +460,12 @@ func (h *FileBrowserHandlers) validateAndCleanFilePath(filePath string) (string,
return "", fmt.Errorf("file path cannot be empty")
}
// Normalize Windows-style backslashes to forward slashes
filePath = util.CleanWindowsPath(filePath)
// Clean the path to remove any .. or . components
cleanPath := filepath.Clean(filePath)
// Use path.Clean (not filepath.Clean) since this is a URL path
cleanPath := path.Clean(filePath)
// Ensure the path starts with /
if !strings.HasPrefix(cleanPath, "/") {
@ -444,7 +485,57 @@ func (h *FileBrowserHandlers) validateAndCleanFilePath(filePath string) (string,
return cleanPath, nil
}
// DownloadFile handles file download requests
// fetchFileContent fetches file content from the filer and returns the content or an error.
func (h *FileBrowserHandlers) fetchFileContent(filePath string, timeout time.Duration) (string, error) {
filerAddress := h.adminServer.GetFilerAddress()
if filerAddress == "" {
return "", fmt.Errorf("filer address not configured")
}
if err := h.validateFilerAddress(filerAddress); err != nil {
return "", fmt.Errorf("invalid filer address configuration: %w", err)
}
cleanFilePath, err := h.validateAndCleanFilePath(filePath)
if err != nil {
return "", err
}
// Create the file URL with proper scheme based on TLS configuration
fileURL := fmt.Sprintf("%s%s", filerAddress, cleanFilePath)
fileURL, err = h.httpClient.NormalizeHttpScheme(fileURL)
if err != nil {
return "", fmt.Errorf("failed to construct file URL: %w", err)
}
// lgtm[go/ssrf]
// Safe: filerAddress validated by validateFilerAddress() to match configured filer
// Safe: cleanFilePath validated and cleaned by validateAndCleanFilePath() to prevent path traversal
client := h.newClientWithTimeout(timeout)
resp, err := client.Get(fileURL)
if err != nil {
return "", fmt.Errorf("failed to fetch file from filer: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("filer returned status %d but failed to read response body: %w", resp.StatusCode, err)
}
return "", fmt.Errorf("filer returned status %d: %s", resp.StatusCode, string(body))
}
contentBytes, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("failed to read file content: %w", err)
}
return string(contentBytes), nil
}
// DownloadFile handles file download requests by proxying through the Admin UI server
// This ensures mTLS works correctly since the Admin UI server has the client certificates
func (h *FileBrowserHandlers) DownloadFile(c *gin.Context) {
filePath := c.Query("path")
if filePath == "" {
@ -459,6 +550,12 @@ func (h *FileBrowserHandlers) DownloadFile(c *gin.Context) {
return
}
// Validate filer address to prevent SSRF
if err := h.validateFilerAddress(filerAddress); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Invalid filer address configuration"})
return
}
// Validate and sanitize the file path
cleanFilePath, err := h.validateAndCleanFilePath(filePath)
if err != nil {
@ -466,16 +563,66 @@ func (h *FileBrowserHandlers) DownloadFile(c *gin.Context) {
return
}
// Create the download URL
downloadURL := fmt.Sprintf("http://%s%s", filerAddress, cleanFilePath)
// Create the download URL with proper scheme based on TLS configuration
downloadURL := fmt.Sprintf("%s%s", filerAddress, cleanFilePath)
downloadURL, err = h.httpClient.NormalizeHttpScheme(downloadURL)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to construct download URL: " + err.Error()})
return
}
// Proxy the download through the Admin UI server to support mTLS
// lgtm[go/ssrf]
// Safe: filerAddress validated by validateFilerAddress() to match configured filer
// Safe: cleanFilePath validated and cleaned by validateAndCleanFilePath() to prevent path traversal
// Use request context so download is cancelled when client disconnects
req, err := http.NewRequestWithContext(c.Request.Context(), "GET", downloadURL, nil)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to create request: " + err.Error()})
return
}
client := h.newClientWithTimeout(5 * time.Minute) // Longer timeout for large file downloads
resp, err := client.Do(req)
if err != nil {
c.JSON(http.StatusBadGateway, gin.H{"error": "Failed to fetch file from filer: " + err.Error()})
return
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, err := io.ReadAll(resp.Body)
if err != nil {
c.JSON(resp.StatusCode, gin.H{"error": fmt.Sprintf("Filer returned status %d but failed to read response body: %v", resp.StatusCode, err)})
return
}
c.JSON(resp.StatusCode, gin.H{"error": fmt.Sprintf("Filer returned status %d: %s", resp.StatusCode, string(body))})
return
}
// Set headers for file download
fileName := filepath.Base(cleanFilePath)
c.Header("Content-Disposition", fmt.Sprintf("attachment; filename=\"%s\"", fileName))
c.Header("Content-Type", "application/octet-stream")
// Use mime.FormatMediaType for RFC 6266 compliant Content-Disposition,
// properly handling non-ASCII characters and special characters
c.Header("Content-Disposition", mime.FormatMediaType("attachment", map[string]string{"filename": fileName}))
// Use content type from filer response, or default to octet-stream
contentType := resp.Header.Get("Content-Type")
if contentType == "" {
contentType = "application/octet-stream"
}
c.Header("Content-Type", contentType)
// Set content length if available
if resp.ContentLength > 0 {
c.Header("Content-Length", fmt.Sprintf("%d", resp.ContentLength))
}
// Proxy the request to filer
c.Redirect(http.StatusFound, downloadURL)
// Stream the response body to the client
c.Status(http.StatusOK)
_, err = io.Copy(c.Writer, resp.Body)
if err != nil {
glog.Errorf("Error streaming file download: %v", err)
}
}
// ViewFile handles file viewing requests (for text files, images, etc.)
@ -559,46 +706,13 @@ func (h *FileBrowserHandlers) ViewFile(c *gin.Context) {
viewable = false
reason = "File too large for viewing (>1MB)"
} else {
// Get file content from filer
filerAddress := h.adminServer.GetFilerAddress()
if filerAddress != "" {
// Validate filer address to prevent SSRF
if err := h.validateFilerAddress(filerAddress); err != nil {
viewable = false
reason = "Invalid filer address configuration"
} else {
cleanFilePath, err := h.validateAndCleanFilePath(filePath)
if err == nil {
fileURL := fmt.Sprintf("http://%s%s", filerAddress, cleanFilePath)
client := &http.Client{Timeout: 30 * time.Second}
// lgtm[go/ssrf]
// Safe: filerAddress validated by validateFilerAddress() to match configured filer
// Safe: cleanFilePath validated and cleaned by validateAndCleanFilePath() to prevent path traversal
resp, err := client.Get(fileURL)
if err == nil && resp.StatusCode == http.StatusOK {
defer resp.Body.Close()
contentBytes, err := io.ReadAll(resp.Body)
if err == nil {
content = string(contentBytes)
viewable = true
} else {
viewable = false
reason = "Failed to read file content"
}
} else {
viewable = false
reason = "Failed to fetch file from filer"
}
} else {
viewable = false
reason = "Invalid file path"
}
}
} else {
viewable = false
reason = "Filer address not configured"
// Fetch file content from filer
var err error
content, err = h.fetchFileContent(filePath, 30*time.Second)
if err != nil {
reason = err.Error()
}
viewable = (err == nil)
}
} else {
// Not a text file, but might be viewable as image or PDF
@ -893,18 +1007,28 @@ func (h *FileBrowserHandlers) isLikelyTextFile(filePath string, maxCheckSize int
return false
}
fileURL := fmt.Sprintf("http://%s%s", filerAddress, cleanFilePath)
// Create the file URL with proper scheme based on TLS configuration
fileURL := fmt.Sprintf("%s%s", filerAddress, cleanFilePath)
fileURL, err = h.httpClient.NormalizeHttpScheme(fileURL)
if err != nil {
glog.Errorf("Failed to normalize URL scheme: %v", err)
return false
}
client := &http.Client{Timeout: 10 * time.Second}
// lgtm[go/ssrf]
// Safe: filerAddress validated by validateFilerAddress() to match configured filer
// Safe: cleanFilePath validated and cleaned by validateAndCleanFilePath() to prevent path traversal
client := h.newClientWithTimeout(10 * time.Second)
resp, err := client.Get(fileURL)
if err != nil || resp.StatusCode != http.StatusOK {
if err != nil {
return false
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return false
}
// Read first few bytes to check if it's text
buffer := make([]byte, min(maxCheckSize, 512))
n, err := resp.Body.Read(buffer)

4
weed/command/filer.go

@ -122,13 +122,13 @@ func init() {
filerS3Options.tlsCertificate = cmdFiler.Flag.String("s3.cert.file", "", "path to the TLS certificate file")
filerS3Options.config = cmdFiler.Flag.String("s3.config", "", "path to the config file")
filerS3Options.auditLogConfig = cmdFiler.Flag.String("s3.auditLogConfig", "", "path to the audit log config file")
filerS3Options.allowEmptyFolder = cmdFiler.Flag.Bool("s3.allowEmptyFolder", true, "allow empty folders")
cmdFiler.Flag.Bool("s3.allowEmptyFolder", true, "deprecated, ignored. Empty folder cleanup is now automatic.")
filerS3Options.allowDeleteBucketNotEmpty = cmdFiler.Flag.Bool("s3.allowDeleteBucketNotEmpty", true, "allow recursive deleting all entries along with bucket")
filerS3Options.localSocket = cmdFiler.Flag.String("s3.localSocket", "", "default to /tmp/seaweedfs-s3-<port>.sock")
filerS3Options.tlsCACertificate = cmdFiler.Flag.String("s3.cacert.file", "", "path to the TLS CA certificate file")
filerS3Options.tlsVerifyClientCert = cmdFiler.Flag.Bool("s3.tlsVerifyClientCert", false, "whether to verify the client's certificate")
filerS3Options.bindIp = cmdFiler.Flag.String("s3.ip.bind", "", "ip address to bind to. If empty, default to same as -ip.bind option.")
filerS3Options.idleTimeout = cmdFiler.Flag.Int("s3.idleTimeout", 10, "connection idle seconds")
filerS3Options.idleTimeout = cmdFiler.Flag.Int("s3.idleTimeout", 120, "connection idle seconds")
filerS3Options.concurrentUploadLimitMB = cmdFiler.Flag.Int("s3.concurrentUploadLimitMB", 128, "limit total concurrent upload size for S3")
filerS3Options.concurrentFileUploadLimit = cmdFiler.Flag.Int("s3.concurrentFileUploadLimit", 0, "limit number of concurrent file uploads for S3, 0 means unlimited")

6
weed/command/s3.go

@ -49,7 +49,6 @@ type S3Options struct {
tlsVerifyClientCert *bool
metricsHttpPort *int
metricsHttpIp *string
allowEmptyFolder *bool
allowDeleteBucketNotEmpty *bool
auditLogConfig *string
localFilerSocket *string
@ -80,11 +79,11 @@ func init() {
s3StandaloneOptions.tlsVerifyClientCert = cmdS3.Flag.Bool("tlsVerifyClientCert", false, "whether to verify the client's certificate")
s3StandaloneOptions.metricsHttpPort = cmdS3.Flag.Int("metricsPort", 0, "Prometheus metrics listen port")
s3StandaloneOptions.metricsHttpIp = cmdS3.Flag.String("metricsIp", "", "metrics listen ip. If empty, default to same as -ip.bind option.")
s3StandaloneOptions.allowEmptyFolder = cmdS3.Flag.Bool("allowEmptyFolder", true, "allow empty folders")
cmdS3.Flag.Bool("allowEmptyFolder", true, "deprecated, ignored. Empty folder cleanup is now automatic.")
s3StandaloneOptions.allowDeleteBucketNotEmpty = cmdS3.Flag.Bool("allowDeleteBucketNotEmpty", true, "allow recursive deleting all entries along with bucket")
s3StandaloneOptions.localFilerSocket = cmdS3.Flag.String("localFilerSocket", "", "local filer socket path")
s3StandaloneOptions.localSocket = cmdS3.Flag.String("localSocket", "", "default to /tmp/seaweedfs-s3-<port>.sock")
s3StandaloneOptions.idleTimeout = cmdS3.Flag.Int("idleTimeout", 10, "connection idle seconds")
s3StandaloneOptions.idleTimeout = cmdS3.Flag.Int("idleTimeout", 120, "connection idle seconds")
s3StandaloneOptions.concurrentUploadLimitMB = cmdS3.Flag.Int("concurrentUploadLimitMB", 128, "limit total concurrent upload size")
s3StandaloneOptions.concurrentFileUploadLimit = cmdS3.Flag.Int("concurrentFileUploadLimit", 0, "limit number of concurrent file uploads, 0 means unlimited")
}
@ -273,7 +272,6 @@ func (s3opt *S3Options) startS3Server() bool {
AllowedOrigins: strings.Split(*s3opt.allowedOrigins, ","),
BucketsPath: filerBucketsPath,
GrpcDialOption: grpcDialOption,
AllowEmptyFolder: *s3opt.allowEmptyFolder,
AllowDeleteBucketNotEmpty: *s3opt.allowDeleteBucketNotEmpty,
LocalFilerSocket: localFilerSocket,
DataCenter: *s3opt.dataCenter,

6
weed/command/server.go

@ -133,11 +133,13 @@ func init() {
serverOptions.v.port = cmdServer.Flag.Int("volume.port", 8080, "volume server http listen port")
serverOptions.v.portGrpc = cmdServer.Flag.Int("volume.port.grpc", 0, "volume server grpc listen port")
serverOptions.v.publicPort = cmdServer.Flag.Int("volume.port.public", 0, "volume server public port")
serverOptions.v.id = cmdServer.Flag.String("volume.id", "", "volume server id. If empty, default to ip:port")
serverOptions.v.indexType = cmdServer.Flag.String("volume.index", "memory", "Choose [memory|leveldb|leveldbMedium|leveldbLarge] mode for memory~performance balance.")
serverOptions.v.diskType = cmdServer.Flag.String("volume.disk", "", "[hdd|ssd|<tag>] hard drive or solid state drive or any tag")
serverOptions.v.fixJpgOrientation = cmdServer.Flag.Bool("volume.images.fix.orientation", false, "Adjust jpg orientation when uploading.")
serverOptions.v.readMode = cmdServer.Flag.String("volume.readMode", "proxy", "[local|proxy|redirect] how to deal with non-local volume: 'not found|read in remote node|redirect volume location'.")
serverOptions.v.compactionMBPerSecond = cmdServer.Flag.Int("volume.compactionMBps", 0, "limit compaction speed in mega bytes per second")
serverOptions.v.maintenanceMBPerSecond = cmdServer.Flag.Int("volume.maintenanceMBps", 0, "limit maintenance (replication / balance) IO rate in MB/s. Unset is 0, no limitation.")
serverOptions.v.fileSizeLimitMB = cmdServer.Flag.Int("volume.fileSizeLimitMB", 256, "limit file size to avoid out of memory")
serverOptions.v.ldbTimeout = cmdServer.Flag.Int64("volume.index.leveldbTimeout", 0, "alive time for leveldb (default to 0). If leveldb of volume is not accessed in ldbTimeout hours, it will be off loaded to reduce opened files and memory consumption.")
serverOptions.v.concurrentUploadLimitMB = cmdServer.Flag.Int("volume.concurrentUploadLimitMB", 64, "limit total concurrent upload size")
@ -164,11 +166,11 @@ func init() {
s3Options.config = cmdServer.Flag.String("s3.config", "", "path to the config file")
s3Options.iamConfig = cmdServer.Flag.String("s3.iam.config", "", "path to the advanced IAM config file for S3. Overrides -iam.config if both are provided.")
s3Options.auditLogConfig = cmdServer.Flag.String("s3.auditLogConfig", "", "path to the audit log config file")
s3Options.allowEmptyFolder = cmdServer.Flag.Bool("s3.allowEmptyFolder", true, "allow empty folders")
cmdServer.Flag.Bool("s3.allowEmptyFolder", true, "deprecated, ignored. Empty folder cleanup is now automatic.")
s3Options.allowDeleteBucketNotEmpty = cmdServer.Flag.Bool("s3.allowDeleteBucketNotEmpty", true, "allow recursive deleting all entries along with bucket")
s3Options.localSocket = cmdServer.Flag.String("s3.localSocket", "", "default to /tmp/seaweedfs-s3-<port>.sock")
s3Options.bindIp = cmdServer.Flag.String("s3.ip.bind", "", "ip address to bind to. If empty, default to same as -ip.bind option.")
s3Options.idleTimeout = cmdServer.Flag.Int("s3.idleTimeout", 10, "connection idle seconds")
s3Options.idleTimeout = cmdServer.Flag.Int("s3.idleTimeout", 120, "connection idle seconds")
s3Options.concurrentUploadLimitMB = cmdServer.Flag.Int("s3.concurrentUploadLimitMB", 128, "limit total concurrent upload size for S3")
s3Options.concurrentFileUploadLimit = cmdServer.Flag.Int("s3.concurrentFileUploadLimit", 0, "limit number of concurrent file uploads for S3, 0 means unlimited")

10
weed/command/volume.go

@ -41,6 +41,7 @@ type VolumeServerOptions struct {
folderMaxLimits []int32
idxFolder *string
ip *string
id *string
publicUrl *string
bindIp *string
mastersString *string
@ -57,6 +58,7 @@ type VolumeServerOptions struct {
cpuProfile *string
memProfile *string
compactionMBPerSecond *int
maintenanceMBPerSecond *int
fileSizeLimitMB *int
concurrentUploadLimitMB *int
concurrentDownloadLimitMB *int
@ -78,6 +80,7 @@ func init() {
v.portGrpc = cmdVolume.Flag.Int("port.grpc", 0, "grpc listen port")
v.publicPort = cmdVolume.Flag.Int("port.public", 0, "port opened to public")
v.ip = cmdVolume.Flag.String("ip", util.DetectedHostAddress(), "ip or server name, also used as identifier")
v.id = cmdVolume.Flag.String("id", "", "volume server id. If empty, default to ip:port")
v.publicUrl = cmdVolume.Flag.String("publicUrl", "", "Publicly accessible address")
v.bindIp = cmdVolume.Flag.String("ip.bind", "", "ip address to bind to. If empty, default to same as -ip option.")
v.mastersString = cmdVolume.Flag.String("master", "localhost:9333", "comma-separated master servers")
@ -94,6 +97,7 @@ func init() {
v.cpuProfile = cmdVolume.Flag.String("cpuprofile", "", "cpu profile output file")
v.memProfile = cmdVolume.Flag.String("memprofile", "", "memory profile output file")
v.compactionMBPerSecond = cmdVolume.Flag.Int("compactionMBps", 0, "limit background compaction or copying speed in mega bytes per second")
v.maintenanceMBPerSecond = cmdVolume.Flag.Int("maintenanceMBps", 0, "limit maintenance (replication / balance) IO rate in MB/s. Unset is 0, no limitation.")
v.fileSizeLimitMB = cmdVolume.Flag.Int("fileSizeLimitMB", 256, "limit file size to avoid out of memory")
v.ldbTimeout = cmdVolume.Flag.Int64("index.leveldbTimeout", 0, "alive time for leveldb (default to 0). If leveldb of volume is not accessed in ldbTimeout hours, it will be off loaded to reduce opened files and memory consumption.")
v.concurrentUploadLimitMB = cmdVolume.Flag.Int("concurrentUploadLimitMB", 256, "limit total concurrent upload size")
@ -253,8 +257,11 @@ func (v VolumeServerOptions) startVolumeServer(volumeFolders, maxVolumeCounts, v
volumeNeedleMapKind = storage.NeedleMapLevelDbLarge
}
// Determine volume server ID: if not specified, use ip:port
volumeServerId := util.GetVolumeServerId(*v.id, *v.ip, *v.port)
volumeServer := weed_server.NewVolumeServer(volumeMux, publicVolumeMux,
*v.ip, *v.port, *v.portGrpc, *v.publicUrl,
*v.ip, *v.port, *v.portGrpc, *v.publicUrl, volumeServerId,
v.folders, v.folderMaxLimits, minFreeSpaces, diskTypes,
*v.idxFolder,
volumeNeedleMapKind,
@ -262,6 +269,7 @@ func (v VolumeServerOptions) startVolumeServer(volumeFolders, maxVolumeCounts, v
v.whiteList,
*v.fixJpgOrientation, *v.readMode,
*v.compactionMBPerSecond,
*v.maintenanceMBPerSecond,
*v.fileSizeLimitMB,
int64(*v.concurrentUploadLimitMB)*1024*1024,
int64(*v.concurrentDownloadLimitMB)*1024*1024,

4
weed/credential/filer_etc/filer_etc_store.go

@ -58,7 +58,7 @@ func (store *FilerEtcStore) withFilerClient(fn func(client filer_pb.SeaweedFiler
store.mu.RLock()
if store.filerAddressFunc == nil {
store.mu.RUnlock()
return fmt.Errorf("filer_etc: filer address function not configured")
return fmt.Errorf("filer_etc: filer not yet available - please wait for filer discovery to complete and try again")
}
filerAddress := store.filerAddressFunc()
@ -66,7 +66,7 @@ func (store *FilerEtcStore) withFilerClient(fn func(client filer_pb.SeaweedFiler
store.mu.RUnlock()
if filerAddress == "" {
return fmt.Errorf("filer_etc: filer address is empty")
return fmt.Errorf("filer_etc: no filer discovered yet - please ensure a filer is running and accessible")
}
// Use the pb.WithGrpcFilerClient helper similar to existing code

207
weed/filer/empty_folder_cleanup/cleanup_queue.go

@ -0,0 +1,207 @@
package empty_folder_cleanup
import (
"container/list"
"sync"
"time"
)
// CleanupQueue manages a deduplicated queue of folders pending cleanup.
// It uses a doubly-linked list ordered by event time (oldest at front) and a map for O(1) deduplication.
// Processing is triggered when:
// - Queue size reaches maxSize, OR
// - Oldest item exceeds maxAge
type CleanupQueue struct {
mu sync.Mutex
items *list.List // Linked list of *queueItem ordered by time (front = oldest)
itemsMap map[string]*list.Element // folder -> list element for O(1) lookup
maxSize int // Max queue size before triggering cleanup
maxAge time.Duration // Max age before triggering cleanup
}
// queueItem represents an item in the cleanup queue
type queueItem struct {
folder string
queueTime time.Time
}
// NewCleanupQueue creates a new CleanupQueue with the specified limits
func NewCleanupQueue(maxSize int, maxAge time.Duration) *CleanupQueue {
return &CleanupQueue{
items: list.New(),
itemsMap: make(map[string]*list.Element),
maxSize: maxSize,
maxAge: maxAge,
}
}
// Add adds a folder to the queue with the specified event time.
// The item is inserted in time-sorted order (oldest at front) to handle out-of-order events.
// If folder already exists with an older time, the time is updated and position adjusted.
// Returns true if the folder was newly added, false if it was updated.
func (q *CleanupQueue) Add(folder string, eventTime time.Time) bool {
q.mu.Lock()
defer q.mu.Unlock()
// Check if folder already exists
if elem, exists := q.itemsMap[folder]; exists {
existingItem := elem.Value.(*queueItem)
// Only update if new event is later
if eventTime.After(existingItem.queueTime) {
// Remove from current position
q.items.Remove(elem)
// Re-insert with new time in sorted position
newElem := q.insertSorted(folder, eventTime)
q.itemsMap[folder] = newElem
}
return false
}
// Insert new folder in sorted position
elem := q.insertSorted(folder, eventTime)
q.itemsMap[folder] = elem
return true
}
// insertSorted inserts an item in the correct position to maintain time ordering (oldest at front)
func (q *CleanupQueue) insertSorted(folder string, eventTime time.Time) *list.Element {
item := &queueItem{
folder: folder,
queueTime: eventTime,
}
// Find the correct position (insert before the first item with a later time)
for elem := q.items.Back(); elem != nil; elem = elem.Prev() {
existingItem := elem.Value.(*queueItem)
if !eventTime.Before(existingItem.queueTime) {
// Insert after this element
return q.items.InsertAfter(item, elem)
}
}
// This item is the oldest, insert at front
return q.items.PushFront(item)
}
// Remove removes a specific folder from the queue (e.g., when a file is created).
// Returns true if the folder was found and removed.
func (q *CleanupQueue) Remove(folder string) bool {
q.mu.Lock()
defer q.mu.Unlock()
elem, exists := q.itemsMap[folder]
if !exists {
return false
}
q.items.Remove(elem)
delete(q.itemsMap, folder)
return true
}
// ShouldProcess returns true if the queue should be processed.
// This is true when:
// - Queue size >= maxSize, OR
// - Oldest item age > maxAge
func (q *CleanupQueue) ShouldProcess() bool {
q.mu.Lock()
defer q.mu.Unlock()
return q.shouldProcessLocked()
}
// shouldProcessLocked checks if processing is needed (caller must hold lock)
func (q *CleanupQueue) shouldProcessLocked() bool {
if q.items.Len() == 0 {
return false
}
// Check if queue is full
if q.items.Len() >= q.maxSize {
return true
}
// Check if oldest item exceeds max age
front := q.items.Front()
if front != nil {
item := front.Value.(*queueItem)
if time.Since(item.queueTime) > q.maxAge {
return true
}
}
return false
}
// Pop removes and returns the oldest folder from the queue.
// Returns the folder and true if an item was available, or empty string and false if queue is empty.
func (q *CleanupQueue) Pop() (string, bool) {
q.mu.Lock()
defer q.mu.Unlock()
front := q.items.Front()
if front == nil {
return "", false
}
item := front.Value.(*queueItem)
q.items.Remove(front)
delete(q.itemsMap, item.folder)
return item.folder, true
}
// Peek returns the oldest folder without removing it.
// Returns the folder and queue time if available, or empty values if queue is empty.
func (q *CleanupQueue) Peek() (folder string, queueTime time.Time, ok bool) {
q.mu.Lock()
defer q.mu.Unlock()
front := q.items.Front()
if front == nil {
return "", time.Time{}, false
}
item := front.Value.(*queueItem)
return item.folder, item.queueTime, true
}
// Len returns the current queue size.
func (q *CleanupQueue) Len() int {
q.mu.Lock()
defer q.mu.Unlock()
return q.items.Len()
}
// Contains checks if a folder is in the queue.
func (q *CleanupQueue) Contains(folder string) bool {
q.mu.Lock()
defer q.mu.Unlock()
_, exists := q.itemsMap[folder]
return exists
}
// Clear removes all items from the queue.
func (q *CleanupQueue) Clear() {
q.mu.Lock()
defer q.mu.Unlock()
q.items.Init()
q.itemsMap = make(map[string]*list.Element)
}
// OldestAge returns the age of the oldest item in the queue, or 0 if empty.
func (q *CleanupQueue) OldestAge() time.Duration {
q.mu.Lock()
defer q.mu.Unlock()
front := q.items.Front()
if front == nil {
return 0
}
item := front.Value.(*queueItem)
return time.Since(item.queueTime)
}

371
weed/filer/empty_folder_cleanup/cleanup_queue_test.go

@ -0,0 +1,371 @@
package empty_folder_cleanup
import (
"testing"
"time"
)
func TestCleanupQueue_Add(t *testing.T) {
q := NewCleanupQueue(100, 10*time.Minute)
now := time.Now()
// Add first item
if !q.Add("/buckets/b1/folder1", now) {
t.Error("expected Add to return true for new item")
}
if q.Len() != 1 {
t.Errorf("expected len 1, got %d", q.Len())
}
// Add second item with later time
if !q.Add("/buckets/b1/folder2", now.Add(1*time.Second)) {
t.Error("expected Add to return true for new item")
}
if q.Len() != 2 {
t.Errorf("expected len 2, got %d", q.Len())
}
// Add duplicate with newer time - should update and reposition
if q.Add("/buckets/b1/folder1", now.Add(2*time.Second)) {
t.Error("expected Add to return false for existing item")
}
if q.Len() != 2 {
t.Errorf("expected len 2 after duplicate, got %d", q.Len())
}
// folder1 should now be at the back (newer time) - verify by popping
folder1, _ := q.Pop()
folder2, _ := q.Pop()
if folder1 != "/buckets/b1/folder2" || folder2 != "/buckets/b1/folder1" {
t.Errorf("expected folder1 to be moved to back, got %s, %s", folder1, folder2)
}
}
func TestCleanupQueue_Add_OutOfOrder(t *testing.T) {
q := NewCleanupQueue(100, 10*time.Minute)
baseTime := time.Now()
// Add items out of order
q.Add("/buckets/b1/folder3", baseTime.Add(3*time.Second))
q.Add("/buckets/b1/folder1", baseTime.Add(1*time.Second))
q.Add("/buckets/b1/folder2", baseTime.Add(2*time.Second))
// Items should be in time order (oldest first) - verify by popping
expected := []string{"/buckets/b1/folder1", "/buckets/b1/folder2", "/buckets/b1/folder3"}
for i, exp := range expected {
folder, ok := q.Pop()
if !ok || folder != exp {
t.Errorf("at index %d: expected %s, got %s", i, exp, folder)
}
}
}
func TestCleanupQueue_Add_DuplicateWithOlderTime(t *testing.T) {
q := NewCleanupQueue(100, 10*time.Minute)
baseTime := time.Now()
// Add folder at t=5
q.Add("/buckets/b1/folder1", baseTime.Add(5*time.Second))
// Try to add same folder with older time - should NOT update
q.Add("/buckets/b1/folder1", baseTime.Add(2*time.Second))
// Time should remain at t=5
_, queueTime, _ := q.Peek()
if queueTime != baseTime.Add(5*time.Second) {
t.Errorf("expected time to remain unchanged, got %v", queueTime)
}
}
func TestCleanupQueue_Remove(t *testing.T) {
q := NewCleanupQueue(100, 10*time.Minute)
now := time.Now()
q.Add("/buckets/b1/folder1", now)
q.Add("/buckets/b1/folder2", now.Add(1*time.Second))
q.Add("/buckets/b1/folder3", now.Add(2*time.Second))
// Remove middle item
if !q.Remove("/buckets/b1/folder2") {
t.Error("expected Remove to return true for existing item")
}
if q.Len() != 2 {
t.Errorf("expected len 2, got %d", q.Len())
}
if q.Contains("/buckets/b1/folder2") {
t.Error("removed item should not be in queue")
}
// Remove non-existent item
if q.Remove("/buckets/b1/nonexistent") {
t.Error("expected Remove to return false for non-existent item")
}
// Verify order is preserved by popping
folder1, _ := q.Pop()
folder3, _ := q.Pop()
if folder1 != "/buckets/b1/folder1" || folder3 != "/buckets/b1/folder3" {
t.Errorf("unexpected order: %s, %s", folder1, folder3)
}
}
func TestCleanupQueue_Pop(t *testing.T) {
q := NewCleanupQueue(100, 10*time.Minute)
now := time.Now()
// Pop from empty queue
folder, ok := q.Pop()
if ok {
t.Error("expected Pop to return false for empty queue")
}
if folder != "" {
t.Errorf("expected empty folder, got %s", folder)
}
// Add items and pop in order
q.Add("/buckets/b1/folder1", now)
q.Add("/buckets/b1/folder2", now.Add(1*time.Second))
q.Add("/buckets/b1/folder3", now.Add(2*time.Second))
folder, ok = q.Pop()
if !ok || folder != "/buckets/b1/folder1" {
t.Errorf("expected folder1, got %s (ok=%v)", folder, ok)
}
folder, ok = q.Pop()
if !ok || folder != "/buckets/b1/folder2" {
t.Errorf("expected folder2, got %s (ok=%v)", folder, ok)
}
folder, ok = q.Pop()
if !ok || folder != "/buckets/b1/folder3" {
t.Errorf("expected folder3, got %s (ok=%v)", folder, ok)
}
// Queue should be empty now
if q.Len() != 0 {
t.Errorf("expected empty queue, got len %d", q.Len())
}
}
func TestCleanupQueue_Peek(t *testing.T) {
q := NewCleanupQueue(100, 10*time.Minute)
now := time.Now()
// Peek empty queue
folder, _, ok := q.Peek()
if ok {
t.Error("expected Peek to return false for empty queue")
}
// Add item and peek
q.Add("/buckets/b1/folder1", now)
folder, queueTime, ok := q.Peek()
if !ok || folder != "/buckets/b1/folder1" {
t.Errorf("expected folder1, got %s (ok=%v)", folder, ok)
}
if queueTime != now {
t.Errorf("expected queue time %v, got %v", now, queueTime)
}
// Peek should not remove item
if q.Len() != 1 {
t.Errorf("Peek should not remove item, len=%d", q.Len())
}
}
func TestCleanupQueue_Contains(t *testing.T) {
q := NewCleanupQueue(100, 10*time.Minute)
now := time.Now()
q.Add("/buckets/b1/folder1", now)
if !q.Contains("/buckets/b1/folder1") {
t.Error("expected Contains to return true")
}
if q.Contains("/buckets/b1/folder2") {
t.Error("expected Contains to return false for non-existent")
}
}
func TestCleanupQueue_ShouldProcess_MaxSize(t *testing.T) {
q := NewCleanupQueue(3, 10*time.Minute)
now := time.Now()
// Empty queue
if q.ShouldProcess() {
t.Error("empty queue should not need processing")
}
// Add items below max
q.Add("/buckets/b1/folder1", now)
q.Add("/buckets/b1/folder2", now.Add(1*time.Second))
if q.ShouldProcess() {
t.Error("queue below max should not need processing")
}
// Add item to reach max
q.Add("/buckets/b1/folder3", now.Add(2*time.Second))
if !q.ShouldProcess() {
t.Error("queue at max should need processing")
}
}
func TestCleanupQueue_ShouldProcess_MaxAge(t *testing.T) {
q := NewCleanupQueue(100, 100*time.Millisecond) // Short max age for testing
// Add item with old event time
oldTime := time.Now().Add(-1 * time.Second) // 1 second ago
q.Add("/buckets/b1/folder1", oldTime)
// Item is older than maxAge, should need processing
if !q.ShouldProcess() {
t.Error("old item should trigger processing")
}
// Clear and add fresh item
q.Clear()
q.Add("/buckets/b1/folder2", time.Now())
// Fresh item should not trigger processing
if q.ShouldProcess() {
t.Error("fresh item should not trigger processing")
}
}
func TestCleanupQueue_Clear(t *testing.T) {
q := NewCleanupQueue(100, 10*time.Minute)
now := time.Now()
q.Add("/buckets/b1/folder1", now)
q.Add("/buckets/b1/folder2", now.Add(1*time.Second))
q.Add("/buckets/b1/folder3", now.Add(2*time.Second))
q.Clear()
if q.Len() != 0 {
t.Errorf("expected empty queue after Clear, got len %d", q.Len())
}
if q.Contains("/buckets/b1/folder1") {
t.Error("queue should not contain items after Clear")
}
}
func TestCleanupQueue_OldestAge(t *testing.T) {
q := NewCleanupQueue(100, 10*time.Minute)
// Empty queue
if q.OldestAge() != 0 {
t.Error("empty queue should have zero oldest age")
}
// Add item with time in the past
oldTime := time.Now().Add(-5 * time.Minute)
q.Add("/buckets/b1/folder1", oldTime)
// Age should be approximately 5 minutes
age := q.OldestAge()
if age < 4*time.Minute || age > 6*time.Minute {
t.Errorf("expected ~5m age, got %v", age)
}
}
func TestCleanupQueue_TimeOrder(t *testing.T) {
q := NewCleanupQueue(100, 10*time.Minute)
baseTime := time.Now()
// Add items in order
items := []string{
"/buckets/b1/a",
"/buckets/b1/b",
"/buckets/b1/c",
"/buckets/b1/d",
"/buckets/b1/e",
}
for i, item := range items {
q.Add(item, baseTime.Add(time.Duration(i)*time.Second))
}
// Pop should return in time order
for i, expected := range items {
got, ok := q.Pop()
if !ok {
t.Errorf("Pop %d: expected item, got empty", i)
}
if got != expected {
t.Errorf("Pop %d: expected %s, got %s", i, expected, got)
}
}
}
func TestCleanupQueue_DuplicateWithNewerTime(t *testing.T) {
q := NewCleanupQueue(100, 10*time.Minute)
baseTime := time.Now()
// Add items
q.Add("/buckets/b1/folder1", baseTime)
q.Add("/buckets/b1/folder2", baseTime.Add(1*time.Second))
q.Add("/buckets/b1/folder3", baseTime.Add(2*time.Second))
// Add duplicate with newer time - should update and reposition
q.Add("/buckets/b1/folder1", baseTime.Add(3*time.Second))
// folder1 should now be at the back (newest time) - verify by popping
expected := []string{"/buckets/b1/folder2", "/buckets/b1/folder3", "/buckets/b1/folder1"}
for i, exp := range expected {
folder, ok := q.Pop()
if !ok || folder != exp {
t.Errorf("at index %d: expected %s, got %s", i, exp, folder)
}
}
}
func TestCleanupQueue_Concurrent(t *testing.T) {
q := NewCleanupQueue(1000, 10*time.Minute)
done := make(chan bool)
now := time.Now()
// Concurrent adds
go func() {
for i := 0; i < 100; i++ {
q.Add("/buckets/b1/folder"+string(rune('A'+i%26)), now.Add(time.Duration(i)*time.Millisecond))
}
done <- true
}()
// Concurrent removes
go func() {
for i := 0; i < 50; i++ {
q.Remove("/buckets/b1/folder" + string(rune('A'+i%26)))
}
done <- true
}()
// Concurrent pops
go func() {
for i := 0; i < 30; i++ {
q.Pop()
}
done <- true
}()
// Concurrent reads
go func() {
for i := 0; i < 100; i++ {
q.Len()
q.Contains("/buckets/b1/folderA")
q.ShouldProcess()
}
done <- true
}()
// Wait for all goroutines
for i := 0; i < 4; i++ {
<-done
}
// Just verify no panic occurred and queue is in consistent state
_ = q.Len()
}

436
weed/filer/empty_folder_cleanup/empty_folder_cleaner.go

@ -0,0 +1,436 @@
package empty_folder_cleanup
import (
"context"
"strings"
"sync"
"time"
"github.com/seaweedfs/seaweedfs/weed/cluster/lock_manager"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/util"
)
const (
DefaultMaxCountCheck = 1000
DefaultCacheExpiry = 5 * time.Minute
DefaultQueueMaxSize = 1000
DefaultQueueMaxAge = 10 * time.Minute
DefaultProcessorSleep = 10 * time.Second // How often to check queue
)
// FilerOperations defines the filer operations needed by EmptyFolderCleaner
type FilerOperations interface {
CountDirectoryEntries(ctx context.Context, dirPath util.FullPath, limit int) (count int, err error)
DeleteEntryMetaAndData(ctx context.Context, p util.FullPath, isRecursive, ignoreRecursiveError, shouldDeleteChunks, isFromOtherCluster bool, signatures []int32, ifNotModifiedAfter int64) error
}
// folderState tracks the state of a folder for empty folder cleanup
type folderState struct {
roughCount int // Cached rough count (up to maxCountCheck)
lastAddTime time.Time // Last time an item was added
lastDelTime time.Time // Last time an item was deleted
lastCheck time.Time // Last time we checked the actual count
}
// EmptyFolderCleaner handles asynchronous cleanup of empty folders
// Each filer owns specific folders via consistent hashing based on the peer filer list
type EmptyFolderCleaner struct {
filer FilerOperations
lockRing *lock_manager.LockRing
host pb.ServerAddress
// Folder state tracking
mu sync.RWMutex
folderCounts map[string]*folderState // Rough count cache
// Cleanup queue (thread-safe, has its own lock)
cleanupQueue *CleanupQueue
// Configuration
maxCountCheck int // Max items to count (1000)
cacheExpiry time.Duration // How long to keep cache entries
processorSleep time.Duration // How often processor checks queue
bucketPath string // e.g., "/buckets"
// Control
enabled bool
stopCh chan struct{}
}
// NewEmptyFolderCleaner creates a new EmptyFolderCleaner
func NewEmptyFolderCleaner(filer FilerOperations, lockRing *lock_manager.LockRing, host pb.ServerAddress, bucketPath string) *EmptyFolderCleaner {
efc := &EmptyFolderCleaner{
filer: filer,
lockRing: lockRing,
host: host,
folderCounts: make(map[string]*folderState),
cleanupQueue: NewCleanupQueue(DefaultQueueMaxSize, DefaultQueueMaxAge),
maxCountCheck: DefaultMaxCountCheck,
cacheExpiry: DefaultCacheExpiry,
processorSleep: DefaultProcessorSleep,
bucketPath: bucketPath,
enabled: true,
stopCh: make(chan struct{}),
}
go efc.cacheEvictionLoop()
go efc.cleanupProcessor()
return efc
}
// SetEnabled enables or disables the cleaner
func (efc *EmptyFolderCleaner) SetEnabled(enabled bool) {
efc.mu.Lock()
defer efc.mu.Unlock()
efc.enabled = enabled
}
// IsEnabled returns whether the cleaner is enabled
func (efc *EmptyFolderCleaner) IsEnabled() bool {
efc.mu.RLock()
defer efc.mu.RUnlock()
return efc.enabled
}
// ownsFolder checks if this filer owns the folder via consistent hashing
func (efc *EmptyFolderCleaner) ownsFolder(folder string) bool {
servers := efc.lockRing.GetSnapshot()
if len(servers) <= 1 {
return true // Single filer case
}
return efc.hashKeyToServer(folder, servers) == efc.host
}
// hashKeyToServer uses consistent hashing to map a folder to a server
func (efc *EmptyFolderCleaner) hashKeyToServer(key string, servers []pb.ServerAddress) pb.ServerAddress {
if len(servers) == 0 {
return ""
}
x := util.HashStringToLong(key)
if x < 0 {
x = -x
}
x = x % int64(len(servers))
return servers[x]
}
// OnDeleteEvent is called when a file or directory is deleted
// Both file and directory deletions count towards making the parent folder empty
// eventTime is the time when the delete event occurred (for proper ordering)
func (efc *EmptyFolderCleaner) OnDeleteEvent(directory string, entryName string, isDirectory bool, eventTime time.Time) {
// Skip if not under bucket path (must be at least /buckets/<bucket>/...)
if efc.bucketPath != "" && !isUnderBucketPath(directory, efc.bucketPath) {
return
}
// Check if we own this folder
if !efc.ownsFolder(directory) {
glog.V(4).Infof("EmptyFolderCleaner: not owner of %s, skipping", directory)
return
}
efc.mu.Lock()
defer efc.mu.Unlock()
// Check enabled inside lock to avoid race with Stop()
if !efc.enabled {
return
}
glog.V(3).Infof("EmptyFolderCleaner: delete event in %s/%s (isDir=%v)", directory, entryName, isDirectory)
// Update cached count (create entry if needed)
state, exists := efc.folderCounts[directory]
if !exists {
state = &folderState{}
efc.folderCounts[directory] = state
}
if state.roughCount > 0 {
state.roughCount--
}
state.lastDelTime = eventTime
// Only add to cleanup queue if roughCount suggests folder might be empty
if state.roughCount > 0 {
glog.V(3).Infof("EmptyFolderCleaner: skipping queue for %s, roughCount=%d", directory, state.roughCount)
return
}
// Add to cleanup queue with event time (handles out-of-order events)
if efc.cleanupQueue.Add(directory, eventTime) {
glog.V(3).Infof("EmptyFolderCleaner: queued %s for cleanup", directory)
}
}
// OnCreateEvent is called when a file or directory is created
// Both file and directory creations cancel pending cleanup for the parent folder
func (efc *EmptyFolderCleaner) OnCreateEvent(directory string, entryName string, isDirectory bool) {
// Skip if not under bucket path (must be at least /buckets/<bucket>/...)
if efc.bucketPath != "" && !isUnderBucketPath(directory, efc.bucketPath) {
return
}
efc.mu.Lock()
defer efc.mu.Unlock()
// Check enabled inside lock to avoid race with Stop()
if !efc.enabled {
return
}
// Update cached count only if already tracked (no need to track new folders)
if state, exists := efc.folderCounts[directory]; exists {
state.roughCount++
state.lastAddTime = time.Now()
}
// Remove from cleanup queue (cancel pending cleanup)
if efc.cleanupQueue.Remove(directory) {
glog.V(3).Infof("EmptyFolderCleaner: cancelled cleanup for %s due to new entry", directory)
}
}
// cleanupProcessor runs in background and processes the cleanup queue
func (efc *EmptyFolderCleaner) cleanupProcessor() {
ticker := time.NewTicker(efc.processorSleep)
defer ticker.Stop()
for {
select {
case <-efc.stopCh:
return
case <-ticker.C:
efc.processCleanupQueue()
}
}
}
// processCleanupQueue processes items from the cleanup queue
func (efc *EmptyFolderCleaner) processCleanupQueue() {
// Check if we should process
if !efc.cleanupQueue.ShouldProcess() {
return
}
glog.V(3).Infof("EmptyFolderCleaner: processing cleanup queue (len=%d, age=%v)",
efc.cleanupQueue.Len(), efc.cleanupQueue.OldestAge())
// Process all items that are ready
for efc.cleanupQueue.Len() > 0 {
// Check if still enabled
if !efc.IsEnabled() {
return
}
// Pop the oldest item
folder, ok := efc.cleanupQueue.Pop()
if !ok {
break
}
// Execute cleanup for this folder
efc.executeCleanup(folder)
// If queue is no longer full and oldest item is not old enough, stop processing
if !efc.cleanupQueue.ShouldProcess() {
break
}
}
}
// executeCleanup performs the actual cleanup of an empty folder
func (efc *EmptyFolderCleaner) executeCleanup(folder string) {
efc.mu.Lock()
// Quick check: if we have cached count and it's > 0, skip
if state, exists := efc.folderCounts[folder]; exists {
if state.roughCount > 0 {
glog.V(3).Infof("EmptyFolderCleaner: skipping %s, cached count=%d", folder, state.roughCount)
efc.mu.Unlock()
return
}
// If there was an add after our delete, skip
if !state.lastAddTime.IsZero() && state.lastAddTime.After(state.lastDelTime) {
glog.V(3).Infof("EmptyFolderCleaner: skipping %s, add happened after delete", folder)
efc.mu.Unlock()
return
}
}
efc.mu.Unlock()
// Re-check ownership (topology might have changed)
if !efc.ownsFolder(folder) {
glog.V(3).Infof("EmptyFolderCleaner: no longer owner of %s, skipping", folder)
return
}
// Check if folder is actually empty (count up to maxCountCheck)
ctx := context.Background()
count, err := efc.countItems(ctx, folder)
if err != nil {
glog.V(2).Infof("EmptyFolderCleaner: error counting items in %s: %v", folder, err)
return
}
efc.mu.Lock()
// Update cache
if _, exists := efc.folderCounts[folder]; !exists {
efc.folderCounts[folder] = &folderState{}
}
efc.folderCounts[folder].roughCount = count
efc.folderCounts[folder].lastCheck = time.Now()
efc.mu.Unlock()
if count > 0 {
glog.V(3).Infof("EmptyFolderCleaner: folder %s has %d items, not empty", folder, count)
return
}
// Delete the empty folder
glog.V(2).Infof("EmptyFolderCleaner: deleting empty folder %s", folder)
if err := efc.deleteFolder(ctx, folder); err != nil {
glog.V(2).Infof("EmptyFolderCleaner: failed to delete empty folder %s: %v", folder, err)
return
}
// Clean up cache entry
efc.mu.Lock()
delete(efc.folderCounts, folder)
efc.mu.Unlock()
// Note: No need to recursively check parent folder here.
// The deletion of this folder will generate a metadata event,
// which will trigger OnDeleteEvent for the parent folder.
}
// countItems counts items in a folder (up to maxCountCheck)
func (efc *EmptyFolderCleaner) countItems(ctx context.Context, folder string) (int, error) {
return efc.filer.CountDirectoryEntries(ctx, util.FullPath(folder), efc.maxCountCheck)
}
// deleteFolder deletes an empty folder
func (efc *EmptyFolderCleaner) deleteFolder(ctx context.Context, folder string) error {
return efc.filer.DeleteEntryMetaAndData(ctx, util.FullPath(folder), false, false, false, false, nil, 0)
}
// isUnderPath checks if child is under parent path
func isUnderPath(child, parent string) bool {
if parent == "" || parent == "/" {
return true
}
// Ensure parent ends without slash for proper prefix matching
if len(parent) > 0 && parent[len(parent)-1] == '/' {
parent = parent[:len(parent)-1]
}
// Child must start with parent and then have a / or be exactly parent
if len(child) < len(parent) {
return false
}
if child[:len(parent)] != parent {
return false
}
if len(child) == len(parent) {
return true
}
return child[len(parent)] == '/'
}
// isUnderBucketPath checks if directory is inside a bucket (under /buckets/<bucket>/...)
// This ensures we only clean up folders inside buckets, not the buckets themselves
func isUnderBucketPath(directory, bucketPath string) bool {
if bucketPath == "" {
return true
}
// Ensure bucketPath ends without slash
if len(bucketPath) > 0 && bucketPath[len(bucketPath)-1] == '/' {
bucketPath = bucketPath[:len(bucketPath)-1]
}
// Directory must be under bucketPath
if !isUnderPath(directory, bucketPath) {
return false
}
// Directory must be at least /buckets/<bucket>/<something>
// i.e., depth must be at least bucketPath depth + 2
// For /buckets (depth 1), we need at least /buckets/mybucket/folder (depth 3)
bucketPathDepth := strings.Count(bucketPath, "/")
directoryDepth := strings.Count(directory, "/")
return directoryDepth >= bucketPathDepth+2
}
// cacheEvictionLoop periodically removes stale entries from folderCounts
func (efc *EmptyFolderCleaner) cacheEvictionLoop() {
ticker := time.NewTicker(efc.cacheExpiry)
defer ticker.Stop()
for {
select {
case <-efc.stopCh:
return
case <-ticker.C:
efc.evictStaleCacheEntries()
}
}
}
// evictStaleCacheEntries removes cache entries that haven't been accessed recently
func (efc *EmptyFolderCleaner) evictStaleCacheEntries() {
efc.mu.Lock()
defer efc.mu.Unlock()
now := time.Now()
expiredCount := 0
for folder, state := range efc.folderCounts {
// Skip if folder is in cleanup queue
if efc.cleanupQueue.Contains(folder) {
continue
}
// Find the most recent activity time for this folder
lastActivity := state.lastCheck
if state.lastAddTime.After(lastActivity) {
lastActivity = state.lastAddTime
}
if state.lastDelTime.After(lastActivity) {
lastActivity = state.lastDelTime
}
// Evict if no activity within cache expiry period
if now.Sub(lastActivity) > efc.cacheExpiry {
delete(efc.folderCounts, folder)
expiredCount++
}
}
if expiredCount > 0 {
glog.V(3).Infof("EmptyFolderCleaner: evicted %d stale cache entries", expiredCount)
}
}
// Stop stops the cleaner and cancels all pending tasks
func (efc *EmptyFolderCleaner) Stop() {
close(efc.stopCh)
efc.mu.Lock()
defer efc.mu.Unlock()
efc.enabled = false
efc.cleanupQueue.Clear()
efc.folderCounts = make(map[string]*folderState) // Clear cache on stop
}
// GetPendingCleanupCount returns the number of pending cleanup tasks (for testing)
func (efc *EmptyFolderCleaner) GetPendingCleanupCount() int {
return efc.cleanupQueue.Len()
}
// GetCachedFolderCount returns the cached count for a folder (for testing)
func (efc *EmptyFolderCleaner) GetCachedFolderCount(folder string) (int, bool) {
efc.mu.RLock()
defer efc.mu.RUnlock()
if state, exists := efc.folderCounts[folder]; exists {
return state.roughCount, true
}
return 0, false
}

569
weed/filer/empty_folder_cleanup/empty_folder_cleaner_test.go

@ -0,0 +1,569 @@
package empty_folder_cleanup
import (
"testing"
"time"
"github.com/seaweedfs/seaweedfs/weed/cluster/lock_manager"
"github.com/seaweedfs/seaweedfs/weed/pb"
)
func Test_isUnderPath(t *testing.T) {
tests := []struct {
name string
child string
parent string
expected bool
}{
{"child under parent", "/buckets/mybucket/folder/file.txt", "/buckets", true},
{"child is parent", "/buckets", "/buckets", true},
{"child not under parent", "/other/path", "/buckets", false},
{"empty parent", "/any/path", "", true},
{"root parent", "/any/path", "/", true},
{"parent with trailing slash", "/buckets/mybucket", "/buckets/", true},
{"similar prefix but not under", "/buckets-other/file", "/buckets", false},
{"deeply nested", "/buckets/a/b/c/d/e/f", "/buckets/a/b", true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := isUnderPath(tt.child, tt.parent)
if result != tt.expected {
t.Errorf("isUnderPath(%q, %q) = %v, want %v", tt.child, tt.parent, result, tt.expected)
}
})
}
}
func Test_isUnderBucketPath(t *testing.T) {
tests := []struct {
name string
directory string
bucketPath string
expected bool
}{
// Should NOT process - bucket path itself
{"bucket path itself", "/buckets", "/buckets", false},
// Should NOT process - bucket directory (immediate child)
{"bucket directory", "/buckets/mybucket", "/buckets", false},
// Should process - folder inside bucket
{"folder in bucket", "/buckets/mybucket/folder", "/buckets", true},
// Should process - nested folder
{"nested folder", "/buckets/mybucket/a/b/c", "/buckets", true},
// Should NOT process - outside buckets
{"outside buckets", "/other/path", "/buckets", false},
// Empty bucket path allows all
{"empty bucket path", "/any/path", "", true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := isUnderBucketPath(tt.directory, tt.bucketPath)
if result != tt.expected {
t.Errorf("isUnderBucketPath(%q, %q) = %v, want %v", tt.directory, tt.bucketPath, result, tt.expected)
}
})
}
}
func TestEmptyFolderCleaner_ownsFolder(t *testing.T) {
// Create a LockRing with multiple servers
lockRing := lock_manager.NewLockRing(5 * time.Second)
servers := []pb.ServerAddress{
"filer1:8888",
"filer2:8888",
"filer3:8888",
}
lockRing.SetSnapshot(servers)
// Create cleaner for filer1
cleaner1 := &EmptyFolderCleaner{
lockRing: lockRing,
host: "filer1:8888",
}
// Create cleaner for filer2
cleaner2 := &EmptyFolderCleaner{
lockRing: lockRing,
host: "filer2:8888",
}
// Create cleaner for filer3
cleaner3 := &EmptyFolderCleaner{
lockRing: lockRing,
host: "filer3:8888",
}
// Test that exactly one filer owns each folder
testFolders := []string{
"/buckets/mybucket/folder1",
"/buckets/mybucket/folder2",
"/buckets/mybucket/folder3",
"/buckets/mybucket/a/b/c",
"/buckets/otherbucket/x",
}
for _, folder := range testFolders {
ownCount := 0
if cleaner1.ownsFolder(folder) {
ownCount++
}
if cleaner2.ownsFolder(folder) {
ownCount++
}
if cleaner3.ownsFolder(folder) {
ownCount++
}
if ownCount != 1 {
t.Errorf("folder %q owned by %d filers, expected exactly 1", folder, ownCount)
}
}
}
func TestEmptyFolderCleaner_ownsFolder_singleServer(t *testing.T) {
// Create a LockRing with a single server
lockRing := lock_manager.NewLockRing(5 * time.Second)
lockRing.SetSnapshot([]pb.ServerAddress{"filer1:8888"})
cleaner := &EmptyFolderCleaner{
lockRing: lockRing,
host: "filer1:8888",
}
// Single filer should own all folders
testFolders := []string{
"/buckets/mybucket/folder1",
"/buckets/mybucket/folder2",
"/buckets/otherbucket/x",
}
for _, folder := range testFolders {
if !cleaner.ownsFolder(folder) {
t.Errorf("single filer should own folder %q", folder)
}
}
}
func TestEmptyFolderCleaner_ownsFolder_emptyRing(t *testing.T) {
// Create an empty LockRing
lockRing := lock_manager.NewLockRing(5 * time.Second)
cleaner := &EmptyFolderCleaner{
lockRing: lockRing,
host: "filer1:8888",
}
// With empty ring, should own all folders
if !cleaner.ownsFolder("/buckets/mybucket/folder") {
t.Error("should own folder with empty ring")
}
}
func TestEmptyFolderCleaner_OnCreateEvent_cancelsCleanup(t *testing.T) {
lockRing := lock_manager.NewLockRing(5 * time.Second)
lockRing.SetSnapshot([]pb.ServerAddress{"filer1:8888"})
cleaner := &EmptyFolderCleaner{
lockRing: lockRing,
host: "filer1:8888",
bucketPath: "/buckets",
enabled: true,
folderCounts: make(map[string]*folderState),
cleanupQueue: NewCleanupQueue(1000, 10*time.Minute),
stopCh: make(chan struct{}),
}
folder := "/buckets/mybucket/testfolder"
now := time.Now()
// Simulate delete event
cleaner.OnDeleteEvent(folder, "file.txt", false, now)
// Check that cleanup is queued
if cleaner.GetPendingCleanupCount() != 1 {
t.Errorf("expected 1 pending cleanup, got %d", cleaner.GetPendingCleanupCount())
}
// Simulate create event
cleaner.OnCreateEvent(folder, "newfile.txt", false)
// Check that cleanup is cancelled
if cleaner.GetPendingCleanupCount() != 0 {
t.Errorf("expected 0 pending cleanups after create, got %d", cleaner.GetPendingCleanupCount())
}
cleaner.Stop()
}
func TestEmptyFolderCleaner_OnDeleteEvent_deduplication(t *testing.T) {
lockRing := lock_manager.NewLockRing(5 * time.Second)
lockRing.SetSnapshot([]pb.ServerAddress{"filer1:8888"})
cleaner := &EmptyFolderCleaner{
lockRing: lockRing,
host: "filer1:8888",
bucketPath: "/buckets",
enabled: true,
folderCounts: make(map[string]*folderState),
cleanupQueue: NewCleanupQueue(1000, 10*time.Minute),
stopCh: make(chan struct{}),
}
folder := "/buckets/mybucket/testfolder"
now := time.Now()
// Simulate multiple delete events for same folder
for i := 0; i < 5; i++ {
cleaner.OnDeleteEvent(folder, "file"+string(rune('0'+i))+".txt", false, now.Add(time.Duration(i)*time.Second))
}
// Check that only 1 cleanup is queued (deduplicated)
if cleaner.GetPendingCleanupCount() != 1 {
t.Errorf("expected 1 pending cleanup after deduplication, got %d", cleaner.GetPendingCleanupCount())
}
cleaner.Stop()
}
func TestEmptyFolderCleaner_OnDeleteEvent_multipleFolders(t *testing.T) {
lockRing := lock_manager.NewLockRing(5 * time.Second)
lockRing.SetSnapshot([]pb.ServerAddress{"filer1:8888"})
cleaner := &EmptyFolderCleaner{
lockRing: lockRing,
host: "filer1:8888",
bucketPath: "/buckets",
enabled: true,
folderCounts: make(map[string]*folderState),
cleanupQueue: NewCleanupQueue(1000, 10*time.Minute),
stopCh: make(chan struct{}),
}
now := time.Now()
// Delete files in different folders
cleaner.OnDeleteEvent("/buckets/mybucket/folder1", "file.txt", false, now)
cleaner.OnDeleteEvent("/buckets/mybucket/folder2", "file.txt", false, now.Add(1*time.Second))
cleaner.OnDeleteEvent("/buckets/mybucket/folder3", "file.txt", false, now.Add(2*time.Second))
// Each folder should be queued
if cleaner.GetPendingCleanupCount() != 3 {
t.Errorf("expected 3 pending cleanups, got %d", cleaner.GetPendingCleanupCount())
}
cleaner.Stop()
}
func TestEmptyFolderCleaner_OnDeleteEvent_notOwner(t *testing.T) {
lockRing := lock_manager.NewLockRing(5 * time.Second)
lockRing.SetSnapshot([]pb.ServerAddress{"filer1:8888", "filer2:8888"})
// Create cleaner for filer that doesn't own the folder
cleaner := &EmptyFolderCleaner{
lockRing: lockRing,
host: "filer1:8888",
bucketPath: "/buckets",
enabled: true,
folderCounts: make(map[string]*folderState),
cleanupQueue: NewCleanupQueue(1000, 10*time.Minute),
stopCh: make(chan struct{}),
}
now := time.Now()
// Try many folders, looking for one that filer1 doesn't own
foundNonOwned := false
for i := 0; i < 100; i++ {
folder := "/buckets/mybucket/folder" + string(rune('0'+i%10)) + string(rune('0'+i/10))
if !cleaner.ownsFolder(folder) {
// This folder is not owned by filer1
cleaner.OnDeleteEvent(folder, "file.txt", false, now)
if cleaner.GetPendingCleanupCount() != 0 {
t.Errorf("non-owner should not queue cleanup for folder %s", folder)
}
foundNonOwned = true
break
}
}
if !foundNonOwned {
t.Skip("could not find a folder not owned by filer1")
}
cleaner.Stop()
}
func TestEmptyFolderCleaner_OnDeleteEvent_disabled(t *testing.T) {
lockRing := lock_manager.NewLockRing(5 * time.Second)
lockRing.SetSnapshot([]pb.ServerAddress{"filer1:8888"})
cleaner := &EmptyFolderCleaner{
lockRing: lockRing,
host: "filer1:8888",
bucketPath: "/buckets",
enabled: false, // Disabled
folderCounts: make(map[string]*folderState),
cleanupQueue: NewCleanupQueue(1000, 10*time.Minute),
stopCh: make(chan struct{}),
}
folder := "/buckets/mybucket/testfolder"
now := time.Now()
// Simulate delete event
cleaner.OnDeleteEvent(folder, "file.txt", false, now)
// Check that no cleanup is queued when disabled
if cleaner.GetPendingCleanupCount() != 0 {
t.Errorf("disabled cleaner should not queue cleanup, got %d", cleaner.GetPendingCleanupCount())
}
cleaner.Stop()
}
func TestEmptyFolderCleaner_OnDeleteEvent_directoryDeletion(t *testing.T) {
lockRing := lock_manager.NewLockRing(5 * time.Second)
lockRing.SetSnapshot([]pb.ServerAddress{"filer1:8888"})
cleaner := &EmptyFolderCleaner{
lockRing: lockRing,
host: "filer1:8888",
bucketPath: "/buckets",
enabled: true,
folderCounts: make(map[string]*folderState),
cleanupQueue: NewCleanupQueue(1000, 10*time.Minute),
stopCh: make(chan struct{}),
}
folder := "/buckets/mybucket/testfolder"
now := time.Now()
// Simulate directory delete event - should trigger cleanup
// because subdirectory deletion also makes parent potentially empty
cleaner.OnDeleteEvent(folder, "subdir", true, now)
// Check that cleanup IS queued for directory deletion
if cleaner.GetPendingCleanupCount() != 1 {
t.Errorf("directory deletion should trigger cleanup, got %d", cleaner.GetPendingCleanupCount())
}
cleaner.Stop()
}
func TestEmptyFolderCleaner_cachedCounts(t *testing.T) {
lockRing := lock_manager.NewLockRing(5 * time.Second)
lockRing.SetSnapshot([]pb.ServerAddress{"filer1:8888"})
cleaner := &EmptyFolderCleaner{
lockRing: lockRing,
host: "filer1:8888",
bucketPath: "/buckets",
enabled: true,
folderCounts: make(map[string]*folderState),
cleanupQueue: NewCleanupQueue(1000, 10*time.Minute),
stopCh: make(chan struct{}),
}
folder := "/buckets/mybucket/testfolder"
// Initialize cached count
cleaner.folderCounts[folder] = &folderState{roughCount: 5}
// Simulate create events
cleaner.OnCreateEvent(folder, "newfile1.txt", false)
cleaner.OnCreateEvent(folder, "newfile2.txt", false)
// Check cached count increased
count, exists := cleaner.GetCachedFolderCount(folder)
if !exists {
t.Error("cached folder count should exist")
}
if count != 7 {
t.Errorf("expected cached count 7, got %d", count)
}
// Simulate delete events
now := time.Now()
cleaner.OnDeleteEvent(folder, "file1.txt", false, now)
cleaner.OnDeleteEvent(folder, "file2.txt", false, now.Add(1*time.Second))
// Check cached count decreased
count, exists = cleaner.GetCachedFolderCount(folder)
if !exists {
t.Error("cached folder count should exist")
}
if count != 5 {
t.Errorf("expected cached count 5, got %d", count)
}
cleaner.Stop()
}
func TestEmptyFolderCleaner_Stop(t *testing.T) {
lockRing := lock_manager.NewLockRing(5 * time.Second)
lockRing.SetSnapshot([]pb.ServerAddress{"filer1:8888"})
cleaner := &EmptyFolderCleaner{
lockRing: lockRing,
host: "filer1:8888",
bucketPath: "/buckets",
enabled: true,
folderCounts: make(map[string]*folderState),
cleanupQueue: NewCleanupQueue(1000, 10*time.Minute),
stopCh: make(chan struct{}),
}
now := time.Now()
// Queue some cleanups
cleaner.OnDeleteEvent("/buckets/mybucket/folder1", "file1.txt", false, now)
cleaner.OnDeleteEvent("/buckets/mybucket/folder2", "file2.txt", false, now.Add(1*time.Second))
cleaner.OnDeleteEvent("/buckets/mybucket/folder3", "file3.txt", false, now.Add(2*time.Second))
// Verify cleanups are queued
if cleaner.GetPendingCleanupCount() < 1 {
t.Error("expected at least 1 pending cleanup before stop")
}
// Stop the cleaner
cleaner.Stop()
// Verify all cleanups are cancelled
if cleaner.GetPendingCleanupCount() != 0 {
t.Errorf("expected 0 pending cleanups after stop, got %d", cleaner.GetPendingCleanupCount())
}
}
func TestEmptyFolderCleaner_cacheEviction(t *testing.T) {
lockRing := lock_manager.NewLockRing(5 * time.Second)
lockRing.SetSnapshot([]pb.ServerAddress{"filer1:8888"})
cleaner := &EmptyFolderCleaner{
lockRing: lockRing,
host: "filer1:8888",
bucketPath: "/buckets",
enabled: true,
folderCounts: make(map[string]*folderState),
cleanupQueue: NewCleanupQueue(1000, 10*time.Minute),
cacheExpiry: 100 * time.Millisecond, // Short expiry for testing
stopCh: make(chan struct{}),
}
folder1 := "/buckets/mybucket/folder1"
folder2 := "/buckets/mybucket/folder2"
folder3 := "/buckets/mybucket/folder3"
// Add some cache entries with old timestamps
oldTime := time.Now().Add(-1 * time.Hour)
cleaner.folderCounts[folder1] = &folderState{roughCount: 5, lastCheck: oldTime}
cleaner.folderCounts[folder2] = &folderState{roughCount: 3, lastCheck: oldTime}
// folder3 has recent activity
cleaner.folderCounts[folder3] = &folderState{roughCount: 2, lastCheck: time.Now()}
// Verify all entries exist
if len(cleaner.folderCounts) != 3 {
t.Errorf("expected 3 cache entries, got %d", len(cleaner.folderCounts))
}
// Run eviction
cleaner.evictStaleCacheEntries()
// Verify stale entries are evicted
if len(cleaner.folderCounts) != 1 {
t.Errorf("expected 1 cache entry after eviction, got %d", len(cleaner.folderCounts))
}
// Verify the recent entry still exists
if _, exists := cleaner.folderCounts[folder3]; !exists {
t.Error("expected folder3 to still exist in cache")
}
// Verify stale entries are removed
if _, exists := cleaner.folderCounts[folder1]; exists {
t.Error("expected folder1 to be evicted")
}
if _, exists := cleaner.folderCounts[folder2]; exists {
t.Error("expected folder2 to be evicted")
}
cleaner.Stop()
}
func TestEmptyFolderCleaner_cacheEviction_skipsEntriesInQueue(t *testing.T) {
lockRing := lock_manager.NewLockRing(5 * time.Second)
lockRing.SetSnapshot([]pb.ServerAddress{"filer1:8888"})
cleaner := &EmptyFolderCleaner{
lockRing: lockRing,
host: "filer1:8888",
bucketPath: "/buckets",
enabled: true,
folderCounts: make(map[string]*folderState),
cleanupQueue: NewCleanupQueue(1000, 10*time.Minute),
cacheExpiry: 100 * time.Millisecond,
stopCh: make(chan struct{}),
}
folder := "/buckets/mybucket/folder"
oldTime := time.Now().Add(-1 * time.Hour)
// Add a stale cache entry
cleaner.folderCounts[folder] = &folderState{roughCount: 0, lastCheck: oldTime}
// Also add to cleanup queue
cleaner.cleanupQueue.Add(folder, time.Now())
// Run eviction
cleaner.evictStaleCacheEntries()
// Verify entry is NOT evicted because it's in cleanup queue
if _, exists := cleaner.folderCounts[folder]; !exists {
t.Error("expected folder to still exist in cache (is in cleanup queue)")
}
cleaner.Stop()
}
func TestEmptyFolderCleaner_queueFIFOOrder(t *testing.T) {
lockRing := lock_manager.NewLockRing(5 * time.Second)
lockRing.SetSnapshot([]pb.ServerAddress{"filer1:8888"})
cleaner := &EmptyFolderCleaner{
lockRing: lockRing,
host: "filer1:8888",
bucketPath: "/buckets",
enabled: true,
folderCounts: make(map[string]*folderState),
cleanupQueue: NewCleanupQueue(1000, 10*time.Minute),
stopCh: make(chan struct{}),
}
now := time.Now()
// Add folders in order
folders := []string{
"/buckets/mybucket/folder1",
"/buckets/mybucket/folder2",
"/buckets/mybucket/folder3",
}
for i, folder := range folders {
cleaner.OnDeleteEvent(folder, "file.txt", false, now.Add(time.Duration(i)*time.Second))
}
// Verify queue length
if cleaner.GetPendingCleanupCount() != 3 {
t.Errorf("expected 3 queued folders, got %d", cleaner.GetPendingCleanupCount())
}
// Verify time-sorted order by popping
for i, expected := range folders {
folder, ok := cleaner.cleanupQueue.Pop()
if !ok || folder != expected {
t.Errorf("expected folder %s at index %d, got %s", expected, i, folder)
}
}
cleaner.Stop()
}

8
weed/filer/filer.go

@ -11,6 +11,7 @@ import (
"github.com/seaweedfs/seaweedfs/weed/s3api/s3bucket"
"github.com/seaweedfs/seaweedfs/weed/cluster/lock_manager"
"github.com/seaweedfs/seaweedfs/weed/filer/empty_folder_cleanup"
"github.com/seaweedfs/seaweedfs/weed/cluster"
"github.com/seaweedfs/seaweedfs/weed/pb"
@ -56,6 +57,7 @@ type Filer struct {
MaxFilenameLength uint32
deletionQuit chan struct{}
DeletionRetryQueue *DeletionRetryQueue
EmptyFolderCleaner *empty_folder_cleanup.EmptyFolderCleaner
}
func NewFiler(masters pb.ServerDiscovery, grpcDialOption grpc.DialOption, filerHost pb.ServerAddress, filerGroup string, collection string, replication string, dataCenter string, maxFilenameLength uint32, notifyFn func()) *Filer {
@ -116,6 +118,9 @@ func (f *Filer) AggregateFromPeers(self pb.ServerAddress, existingNodes []*maste
f.Dlm.LockRing.SetSnapshot(snapshot)
glog.V(0).Infof("%s aggregate from peers %+v", self, snapshot)
// Initialize the empty folder cleaner using the same LockRing as Dlm for consistent hashing
f.EmptyFolderCleaner = empty_folder_cleanup.NewEmptyFolderCleaner(f, f.Dlm.LockRing, self, f.DirBucketsPath)
f.MetaAggregator = NewMetaAggregator(f, self, f.GrpcDialOption)
f.MasterClient.SetOnPeerUpdateFn(func(update *master_pb.ClusterNodeUpdate, startFrom time.Time) {
if update.NodeType != cluster.FilerType {
@ -506,6 +511,9 @@ func (f *Filer) IsDirectoryEmpty(ctx context.Context, dirPath util.FullPath) (bo
func (f *Filer) Shutdown() {
close(f.deletionQuit)
if f.EmptyFolderCleaner != nil {
f.EmptyFolderCleaner.Stop()
}
f.LocalMetaLogBuffer.ShutdownLogBuffer()
f.Store.Shutdown()
}

39
weed/filer/filer_notify.go

@ -66,6 +66,10 @@ func (f *Filer) NotifyUpdateEvent(ctx context.Context, oldEntry, newEntry *Entry
f.logMetaEvent(ctx, fullpath, eventNotification)
// Trigger empty folder cleanup for local events
// Remote events are handled via MetaAggregator.onMetadataChangeEvent
f.triggerLocalEmptyFolderCleanup(oldEntry, newEntry)
}
func (f *Filer) logMetaEvent(ctx context.Context, fullpath string, eventNotification *filer_pb.EventNotification) {
@ -89,6 +93,41 @@ func (f *Filer) logMetaEvent(ctx context.Context, fullpath string, eventNotifica
}
// triggerLocalEmptyFolderCleanup triggers empty folder cleanup for local events
// This is needed because onMetadataChangeEvent is only called for remote peer events
func (f *Filer) triggerLocalEmptyFolderCleanup(oldEntry, newEntry *Entry) {
if f.EmptyFolderCleaner == nil || !f.EmptyFolderCleaner.IsEnabled() {
return
}
eventTime := time.Now()
// Handle delete events (oldEntry exists, newEntry is nil)
if oldEntry != nil && newEntry == nil {
dir, name := oldEntry.FullPath.DirAndName()
f.EmptyFolderCleaner.OnDeleteEvent(dir, name, oldEntry.IsDirectory(), eventTime)
}
// Handle create events (oldEntry is nil, newEntry exists)
if oldEntry == nil && newEntry != nil {
dir, name := newEntry.FullPath.DirAndName()
f.EmptyFolderCleaner.OnCreateEvent(dir, name, newEntry.IsDirectory())
}
// Handle rename/move events (both exist but paths differ)
if oldEntry != nil && newEntry != nil {
oldDir, oldName := oldEntry.FullPath.DirAndName()
newDir, newName := newEntry.FullPath.DirAndName()
if oldDir != newDir || oldName != newName {
// Treat old location as delete
f.EmptyFolderCleaner.OnDeleteEvent(oldDir, oldName, oldEntry.IsDirectory(), eventTime)
// Treat new location as create
f.EmptyFolderCleaner.OnCreateEvent(newDir, newName, newEntry.IsDirectory())
}
}
}
func (f *Filer) logFlushFunc(logBuffer *log_buffer.LogBuffer, startTime, stopTime time.Time, buf []byte, minOffset, maxOffset int64) {
if len(buf) == 0 {

39
weed/filer/filer_on_meta_event.go

@ -2,6 +2,7 @@ package filer
import (
"bytes"
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
@ -13,6 +14,7 @@ func (f *Filer) onMetadataChangeEvent(event *filer_pb.SubscribeMetadataResponse)
f.maybeReloadFilerConfiguration(event)
f.maybeReloadRemoteStorageConfigurationAndMapping(event)
f.onBucketEvents(event)
f.onEmptyFolderCleanupEvents(event)
}
func (f *Filer) onBucketEvents(event *filer_pb.SubscribeMetadataResponse) {
@ -32,6 +34,43 @@ func (f *Filer) onBucketEvents(event *filer_pb.SubscribeMetadataResponse) {
}
}
// onEmptyFolderCleanupEvents handles create/delete events for empty folder cleanup
func (f *Filer) onEmptyFolderCleanupEvents(event *filer_pb.SubscribeMetadataResponse) {
if f.EmptyFolderCleaner == nil || !f.EmptyFolderCleaner.IsEnabled() {
return
}
message := event.EventNotification
directory := event.Directory
eventTime := time.Unix(0, event.TsNs)
// Handle delete events - trigger folder cleanup check
if filer_pb.IsDelete(event) && message.OldEntry != nil {
f.EmptyFolderCleaner.OnDeleteEvent(directory, message.OldEntry.Name, message.OldEntry.IsDirectory, eventTime)
}
// Handle create events - cancel pending cleanup for the folder
if filer_pb.IsCreate(event) && message.NewEntry != nil {
f.EmptyFolderCleaner.OnCreateEvent(directory, message.NewEntry.Name, message.NewEntry.IsDirectory)
}
// Handle rename/move events
if filer_pb.IsRename(event) {
// Treat the old location as a delete
if message.OldEntry != nil {
f.EmptyFolderCleaner.OnDeleteEvent(directory, message.OldEntry.Name, message.OldEntry.IsDirectory, eventTime)
}
// Treat the new location as a create
if message.NewEntry != nil {
newDir := message.NewParentPath
if newDir == "" {
newDir = directory
}
f.EmptyFolderCleaner.OnCreateEvent(newDir, message.NewEntry.Name, message.NewEntry.IsDirectory)
}
}
}
func (f *Filer) maybeReloadFilerConfiguration(event *filer_pb.SubscribeMetadataResponse) {
if DirectoryEtcSeaweedFS != event.Directory {
if DirectoryEtcSeaweedFS != event.EventNotification.NewParentPath {

13
weed/filer/filer_search.go

@ -41,6 +41,19 @@ func (f *Filer) ListDirectoryEntries(ctx context.Context, p util.FullPath, start
return entries, hasMore, err
}
// CountDirectoryEntries counts entries in a directory up to limit
func (f *Filer) CountDirectoryEntries(ctx context.Context, p util.FullPath, limit int) (count int, err error) {
entries, hasMore, err := f.ListDirectoryEntries(ctx, p, "", false, int64(limit), "", "", "")
if err != nil {
return 0, err
}
count = len(entries)
if hasMore {
count = limit // At least this many
}
return count, nil
}
// For now, prefix and namePattern are mutually exclusive
func (f *Filer) StreamListDirectoryEntries(ctx context.Context, p util.FullPath, startFileName string, inclusive bool, limit int64, prefix string, namePattern string, namePatternExclude string, eachEntryFunc ListEachEntryFunc) (lastFileName string, err error) {
if strings.HasSuffix(string(p), "/") && len(p) > 1 {

138
weed/filer/reader_at.go

@ -7,6 +7,8 @@ import (
"math/rand"
"sync"
"golang.org/x/sync/errgroup"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
@ -19,6 +21,11 @@ import (
// the prefetch count is derived from the -concurrentReaders option.
const DefaultPrefetchCount = 4
// minReadConcurrency is the minimum number of parallel chunk fetches.
// This ensures at least some parallelism even when prefetchCount is low,
// improving throughput for reads spanning multiple chunks.
const minReadConcurrency = 4
type ChunkReadAt struct {
masterClient *wdclient.MasterClient
chunkViews *IntervalList[*ChunkView]
@ -175,67 +182,139 @@ func (c *ChunkReadAt) ReadAtWithTime(ctx context.Context, p []byte, offset int64
return c.doReadAt(ctx, p, offset)
}
// chunkReadTask represents a single chunk read operation for parallel processing
type chunkReadTask struct {
chunk *ChunkView
bufferStart int64 // start position in the output buffer
bufferEnd int64 // end position in the output buffer
chunkOffset uint64 // offset within the chunk to read from
bytesRead int
modifiedTsNs int64
}
func (c *ChunkReadAt) doReadAt(ctx context.Context, p []byte, offset int64) (n int, ts int64, err error) {
// Collect all chunk read tasks
var tasks []*chunkReadTask
var gaps []struct{ start, length int64 } // gaps that need zero-filling
startOffset, remaining := offset, int64(len(p))
var nextChunks *Interval[*ChunkView]
var lastChunk *Interval[*ChunkView]
for x := c.chunkViews.Front(); x != nil; x = x.Next {
chunk := x.Value
if remaining <= 0 {
break
}
if x.Next != nil {
nextChunks = x.Next
}
lastChunk = x
// Handle gap before this chunk
if startOffset < chunk.ViewOffset {
gap := chunk.ViewOffset - startOffset
glog.V(4).Infof("zero [%d,%d)", startOffset, chunk.ViewOffset)
n += zero(p, startOffset-offset, gap)
gaps = append(gaps, struct{ start, length int64 }{startOffset - offset, gap})
startOffset, remaining = chunk.ViewOffset, remaining-gap
if remaining <= 0 {
break
}
}
// fmt.Printf(">>> doReadAt [%d,%d), chunk[%d,%d)\n", offset, offset+int64(len(p)), chunk.ViewOffset, chunk.ViewOffset+int64(chunk.ViewSize))
chunkStart, chunkStop := max(chunk.ViewOffset, startOffset), min(chunk.ViewOffset+int64(chunk.ViewSize), startOffset+remaining)
if chunkStart >= chunkStop {
continue
}
// glog.V(4).Infof("read [%d,%d), %d/%d chunk %s [%d,%d)", chunkStart, chunkStop, i, len(c.chunkViews), chunk.FileId, chunk.ViewOffset-chunk.Offset, chunk.ViewOffset-chunk.Offset+int64(chunk.ViewSize))
bufferOffset := chunkStart - chunk.ViewOffset + chunk.OffsetInChunk
ts = chunk.ModifiedTsNs
copied, err := c.readChunkSliceAt(ctx, p[startOffset-offset:chunkStop-chunkStart+startOffset-offset], chunk, nextChunks, uint64(bufferOffset))
if err != nil {
glog.Errorf("fetching chunk %+v: %v\n", chunk, err)
return copied, ts, err
tasks = append(tasks, &chunkReadTask{
chunk: chunk,
bufferStart: startOffset - offset,
bufferEnd: chunkStop - chunkStart + startOffset - offset,
chunkOffset: uint64(bufferOffset),
})
startOffset, remaining = chunkStop, remaining-(chunkStop-chunkStart)
}
// Zero-fill gaps
for _, gap := range gaps {
glog.V(4).Infof("zero [%d,%d)", offset+gap.start, offset+gap.start+gap.length)
n += zero(p, gap.start, gap.length)
}
// If only one chunk or random access mode, use sequential reading
if len(tasks) <= 1 || c.readerPattern.IsRandomMode() {
for _, task := range tasks {
copied, readErr := c.readChunkSliceAt(ctx, p[task.bufferStart:task.bufferEnd], task.chunk, nil, task.chunkOffset)
ts = max(ts, task.chunk.ModifiedTsNs)
if readErr != nil {
glog.Errorf("fetching chunk %+v: %v\n", task.chunk, readErr)
return n + copied, ts, readErr
}
n += copied
}
} else {
// Parallel chunk fetching for multiple chunks
// This significantly improves throughput when chunks are on different volume servers
g, gCtx := errgroup.WithContext(ctx)
// Limit concurrency to avoid overwhelming the system
concurrency := c.prefetchCount
if concurrency < minReadConcurrency {
concurrency = minReadConcurrency
}
if concurrency > len(tasks) {
concurrency = len(tasks)
}
g.SetLimit(concurrency)
for _, task := range tasks {
g.Go(func() error {
// Read directly into the correct position in the output buffer
copied, readErr := c.readChunkSliceAtForParallel(gCtx, p[task.bufferStart:task.bufferEnd], task.chunk, task.chunkOffset)
task.bytesRead = copied
task.modifiedTsNs = task.chunk.ModifiedTsNs
return readErr
})
}
n += copied
startOffset, remaining = startOffset+int64(copied), remaining-int64(copied)
// Wait for all chunk reads to complete
if waitErr := g.Wait(); waitErr != nil {
err = waitErr
}
// Aggregate results (order is preserved since we read directly into buffer positions)
for _, task := range tasks {
n += task.bytesRead
ts = max(ts, task.modifiedTsNs)
}
if err != nil {
return n, ts, err
}
}
// glog.V(4).Infof("doReadAt [%d,%d), n:%v, err:%v", offset, offset+int64(len(p)), n, err)
// Trigger prefetch for sequential reads
if lastChunk != nil && lastChunk.Next != nil && c.prefetchCount > 0 && !c.readerPattern.IsRandomMode() {
c.readerCache.MaybeCache(lastChunk.Next, c.prefetchCount)
}
// zero the remaining bytes if a gap exists at the end of the last chunk (or a fully sparse file)
if err == nil && remaining > 0 {
// Zero the remaining bytes if a gap exists at the end
if remaining > 0 {
var delta int64
if c.fileSize >= startOffset {
delta = min(remaining, c.fileSize-startOffset)
startOffset -= offset
}
if delta > 0 {
glog.V(4).Infof("zero2 [%d,%d) of file size %d bytes", startOffset, startOffset+delta, c.fileSize)
n += zero(p, startOffset, delta)
bufStart := startOffset - offset
if delta > 0 {
glog.V(4).Infof("zero2 [%d,%d) of file size %d bytes", startOffset, startOffset+delta, c.fileSize)
n += zero(p, bufStart, delta)
}
}
}
if err == nil && offset+int64(len(p)) >= c.fileSize {
err = io.EOF
}
// fmt.Printf("~~~ filled %d, err: %v\n\n", n, err)
return
}
func (c *ChunkReadAt) readChunkSliceAt(ctx context.Context, buffer []byte, chunkView *ChunkView, nextChunkViews *Interval[*ChunkView], offset uint64) (n int, err error) {
@ -249,7 +328,7 @@ func (c *ChunkReadAt) readChunkSliceAt(ctx context.Context, buffer []byte, chunk
}
shouldCache := (uint64(chunkView.ViewOffset) + chunkView.ChunkSize) <= c.readerCache.chunkCache.GetMaxFilePartSizeInCache()
n, err = c.readerCache.ReadChunkAt(buffer, chunkView.FileId, chunkView.CipherKey, chunkView.IsGzipped, int64(offset), int(chunkView.ChunkSize), shouldCache)
n, err = c.readerCache.ReadChunkAt(ctx, buffer, chunkView.FileId, chunkView.CipherKey, chunkView.IsGzipped, int64(offset), int(chunkView.ChunkSize), shouldCache)
if c.lastChunkFid != chunkView.FileId {
if chunkView.OffsetInChunk == 0 { // start of a new chunk
if c.lastChunkFid != "" {
@ -266,6 +345,13 @@ func (c *ChunkReadAt) readChunkSliceAt(ctx context.Context, buffer []byte, chunk
return
}
// readChunkSliceAtForParallel is a simplified version for parallel chunk fetching
// It doesn't update lastChunkFid or trigger prefetch (handled by the caller)
func (c *ChunkReadAt) readChunkSliceAtForParallel(ctx context.Context, buffer []byte, chunkView *ChunkView, offset uint64) (n int, err error) {
shouldCache := (uint64(chunkView.ViewOffset) + chunkView.ChunkSize) <= c.readerCache.chunkCache.GetMaxFilePartSizeInCache()
return c.readerCache.ReadChunkAt(ctx, buffer, chunkView.FileId, chunkView.CipherKey, chunkView.IsGzipped, int64(offset), int(chunkView.ChunkSize), shouldCache)
}
func zero(buffer []byte, start, length int64) int {
if length <= 0 {
return 0

86
weed/filer/reader_cache.go

@ -35,6 +35,7 @@ type SingleChunkCacher struct {
shouldCache bool
wg sync.WaitGroup
cacheStartedCh chan struct{}
done chan struct{} // signals when download is complete
}
func NewReaderCache(limit int, chunkCache chunk_cache.ChunkCache, lookupFileIdFn wdclient.LookupFileIdFunctionType) *ReaderCache {
@ -93,14 +94,18 @@ func (rc *ReaderCache) MaybeCache(chunkViews *Interval[*ChunkView], count int) {
return
}
func (rc *ReaderCache) ReadChunkAt(buffer []byte, fileId string, cipherKey []byte, isGzipped bool, offset int64, chunkSize int, shouldCache bool) (int, error) {
func (rc *ReaderCache) ReadChunkAt(ctx context.Context, buffer []byte, fileId string, cipherKey []byte, isGzipped bool, offset int64, chunkSize int, shouldCache bool) (int, error) {
rc.Lock()
if cacher, found := rc.downloaders[fileId]; found {
if n, err := cacher.readChunkAt(buffer, offset); n != 0 && err == nil {
rc.Unlock()
rc.Unlock()
n, err := cacher.readChunkAt(ctx, buffer, offset)
if n > 0 || err != nil {
return n, err
}
// If n=0 and err=nil, the cacher couldn't provide data for this offset.
// Fall through to try chunkCache.
rc.Lock()
}
if shouldCache || rc.lookupFileIdFn == nil {
n, err := rc.chunkCache.ReadChunkAt(buffer, fileId, uint64(offset))
@ -134,7 +139,7 @@ func (rc *ReaderCache) ReadChunkAt(buffer []byte, fileId string, cipherKey []byt
rc.downloaders[fileId] = cacher
rc.Unlock()
return cacher.readChunkAt(buffer, offset)
return cacher.readChunkAt(ctx, buffer, offset)
}
func (rc *ReaderCache) UnCache(fileId string) {
@ -166,38 +171,53 @@ func newSingleChunkCacher(parent *ReaderCache, fileId string, cipherKey []byte,
chunkSize: chunkSize,
shouldCache: shouldCache,
cacheStartedCh: make(chan struct{}),
done: make(chan struct{}),
}
}
// startCaching downloads the chunk data in the background.
// It does NOT hold the lock during the HTTP download to allow concurrent readers
// to wait efficiently using the done channel.
func (s *SingleChunkCacher) startCaching() {
s.wg.Add(1)
defer s.wg.Done()
s.Lock()
defer s.Unlock()
defer close(s.done) // guarantee completion signal even on panic
s.cacheStartedCh <- struct{}{} // means this has been started
s.cacheStartedCh <- struct{}{} // signal that we've started
// Note: We intentionally use context.Background() here, NOT a request-specific context.
// The downloaded chunk is a shared resource - multiple concurrent readers may be waiting
// for this same download to complete. If we used a request context and that request was
// cancelled, it would abort the download and cause errors for all other waiting readers.
// The download should always complete once started to serve all potential consumers.
// Lookup file ID without holding the lock
urlStrings, err := s.parent.lookupFileIdFn(context.Background(), s.chunkFileId)
if err != nil {
s.Lock()
s.err = fmt.Errorf("operation LookupFileId %s failed, err: %v", s.chunkFileId, err)
s.Unlock()
return
}
s.data = mem.Allocate(s.chunkSize)
_, s.err = util_http.RetriedFetchChunkData(context.Background(), s.data, urlStrings, s.cipherKey, s.isGzipped, true, 0, s.chunkFileId)
if s.err != nil {
mem.Free(s.data)
s.data = nil
return
}
// Allocate buffer and download without holding the lock
// This allows multiple downloads to proceed in parallel
data := mem.Allocate(s.chunkSize)
_, fetchErr := util_http.RetriedFetchChunkData(context.Background(), data, urlStrings, s.cipherKey, s.isGzipped, true, 0, s.chunkFileId)
if s.shouldCache {
s.parent.chunkCache.SetChunk(s.chunkFileId, s.data)
// Now acquire lock to update state
s.Lock()
if fetchErr != nil {
mem.Free(data)
s.err = fetchErr
} else {
s.data = data
if s.shouldCache {
s.parent.chunkCache.SetChunk(s.chunkFileId, s.data)
}
atomic.StoreInt64(&s.completedTimeNew, time.Now().UnixNano())
}
atomic.StoreInt64(&s.completedTimeNew, time.Now().UnixNano())
return
s.Unlock()
}
func (s *SingleChunkCacher) destroy() {
@ -209,13 +229,34 @@ func (s *SingleChunkCacher) destroy() {
if s.data != nil {
mem.Free(s.data)
s.data = nil
close(s.cacheStartedCh)
}
}
func (s *SingleChunkCacher) readChunkAt(buf []byte, offset int64) (int, error) {
// readChunkAt reads data from the cached chunk.
// It waits for the download to complete if it's still in progress.
// The ctx parameter allows the reader to cancel its wait (but the download continues
// for other readers - see comment in startCaching about shared resource semantics).
func (s *SingleChunkCacher) readChunkAt(ctx context.Context, buf []byte, offset int64) (int, error) {
s.wg.Add(1)
defer s.wg.Done()
// Wait for download to complete, but allow reader cancellation.
// Prioritize checking done first - if data is already available,
// return it even if context is also cancelled.
select {
case <-s.done:
// Download already completed, proceed immediately
default:
// Download not complete, wait for it or context cancellation
select {
case <-s.done:
// Download completed
case <-ctx.Done():
// Reader cancelled while waiting - download continues for other readers
return 0, ctx.Err()
}
}
s.Lock()
defer s.Unlock()
@ -228,5 +269,4 @@ func (s *SingleChunkCacher) readChunkAt(buf []byte, offset int64) (int, error) {
}
return copy(buf, s.data[offset:]), nil
}

505
weed/filer/reader_cache_test.go

@ -0,0 +1,505 @@
package filer
import (
"context"
"fmt"
"sync"
"sync/atomic"
"testing"
"time"
)
// mockChunkCacheForReaderCache implements chunk cache for testing
type mockChunkCacheForReaderCache struct {
data map[string][]byte
hitCount int32
mu sync.Mutex
}
func newMockChunkCacheForReaderCache() *mockChunkCacheForReaderCache {
return &mockChunkCacheForReaderCache{
data: make(map[string][]byte),
}
}
func (m *mockChunkCacheForReaderCache) GetChunk(fileId string, minSize uint64) []byte {
m.mu.Lock()
defer m.mu.Unlock()
if d, ok := m.data[fileId]; ok {
atomic.AddInt32(&m.hitCount, 1)
return d
}
return nil
}
func (m *mockChunkCacheForReaderCache) ReadChunkAt(data []byte, fileId string, offset uint64) (int, error) {
m.mu.Lock()
defer m.mu.Unlock()
if d, ok := m.data[fileId]; ok && int(offset) < len(d) {
atomic.AddInt32(&m.hitCount, 1)
n := copy(data, d[offset:])
return n, nil
}
return 0, nil
}
func (m *mockChunkCacheForReaderCache) SetChunk(fileId string, data []byte) {
m.mu.Lock()
defer m.mu.Unlock()
m.data[fileId] = data
}
func (m *mockChunkCacheForReaderCache) GetMaxFilePartSizeInCache() uint64 {
return 1024 * 1024 // 1MB
}
func (m *mockChunkCacheForReaderCache) IsInCache(fileId string, lockNeeded bool) bool {
m.mu.Lock()
defer m.mu.Unlock()
_, ok := m.data[fileId]
return ok
}
// TestReaderCacheContextCancellation tests that a reader can cancel its wait
// while the download continues for other readers
func TestReaderCacheContextCancellation(t *testing.T) {
cache := newMockChunkCacheForReaderCache()
// Create a ReaderCache - we can't easily test the full flow without mocking HTTP,
// but we can test the context cancellation in readChunkAt
rc := NewReaderCache(10, cache, nil)
defer rc.destroy()
// Pre-populate cache to avoid HTTP calls
testData := []byte("test data for context cancellation")
cache.SetChunk("test-file-1", testData)
// Test that context cancellation works
ctx, cancel := context.WithCancel(context.Background())
buffer := make([]byte, len(testData))
n, err := rc.ReadChunkAt(ctx, buffer, "test-file-1", nil, false, 0, len(testData), true)
if err != nil {
t.Errorf("Expected no error, got: %v", err)
}
if n != len(testData) {
t.Errorf("Expected %d bytes, got %d", len(testData), n)
}
// Cancel context and verify it doesn't affect already completed reads
cancel()
// Subsequent read with cancelled context should still work from cache
buffer2 := make([]byte, len(testData))
n2, err2 := rc.ReadChunkAt(ctx, buffer2, "test-file-1", nil, false, 0, len(testData), true)
// Note: This may or may not error depending on whether it hits cache
_ = n2
_ = err2
}
// TestReaderCacheFallbackToChunkCache tests that when a cacher returns n=0, err=nil,
// we fall back to the chunkCache
func TestReaderCacheFallbackToChunkCache(t *testing.T) {
cache := newMockChunkCacheForReaderCache()
// Pre-populate the chunk cache with data
testData := []byte("fallback test data that should be found in chunk cache")
cache.SetChunk("fallback-file", testData)
rc := NewReaderCache(10, cache, nil)
defer rc.destroy()
// Read should hit the chunk cache
buffer := make([]byte, len(testData))
n, err := rc.ReadChunkAt(context.Background(), buffer, "fallback-file", nil, false, 0, len(testData), true)
if err != nil {
t.Errorf("Expected no error, got: %v", err)
}
if n != len(testData) {
t.Errorf("Expected %d bytes, got %d", len(testData), n)
}
// Verify cache was hit
if cache.hitCount == 0 {
t.Error("Expected chunk cache to be hit")
}
}
// TestReaderCacheMultipleReadersWaitForSameChunk tests that multiple readers
// can wait for the same chunk download to complete
func TestReaderCacheMultipleReadersWaitForSameChunk(t *testing.T) {
cache := newMockChunkCacheForReaderCache()
// Pre-populate cache so we don't need HTTP
testData := make([]byte, 1024)
for i := range testData {
testData[i] = byte(i % 256)
}
cache.SetChunk("shared-chunk", testData)
rc := NewReaderCache(10, cache, nil)
defer rc.destroy()
// Launch multiple concurrent readers for the same chunk
numReaders := 10
var wg sync.WaitGroup
errors := make(chan error, numReaders)
bytesRead := make(chan int, numReaders)
for i := 0; i < numReaders; i++ {
wg.Add(1)
go func() {
defer wg.Done()
buffer := make([]byte, len(testData))
n, err := rc.ReadChunkAt(context.Background(), buffer, "shared-chunk", nil, false, 0, len(testData), true)
if err != nil {
errors <- err
}
bytesRead <- n
}()
}
wg.Wait()
close(errors)
close(bytesRead)
// Check for errors
for err := range errors {
t.Errorf("Reader got error: %v", err)
}
// Verify all readers got the expected data
for n := range bytesRead {
if n != len(testData) {
t.Errorf("Expected %d bytes, got %d", len(testData), n)
}
}
}
// TestReaderCachePartialRead tests reading at different offsets
func TestReaderCachePartialRead(t *testing.T) {
cache := newMockChunkCacheForReaderCache()
testData := []byte("0123456789ABCDEFGHIJ")
cache.SetChunk("partial-read-file", testData)
rc := NewReaderCache(10, cache, nil)
defer rc.destroy()
tests := []struct {
name string
offset int64
size int
expected []byte
}{
{"read from start", 0, 5, []byte("01234")},
{"read from middle", 5, 5, []byte("56789")},
{"read to end", 15, 5, []byte("FGHIJ")},
{"read single byte", 10, 1, []byte("A")},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
buffer := make([]byte, tt.size)
n, err := rc.ReadChunkAt(context.Background(), buffer, "partial-read-file", nil, false, tt.offset, len(testData), true)
if err != nil {
t.Errorf("Expected no error, got: %v", err)
}
if n != tt.size {
t.Errorf("Expected %d bytes, got %d", tt.size, n)
}
if string(buffer[:n]) != string(tt.expected) {
t.Errorf("Expected %q, got %q", tt.expected, buffer[:n])
}
})
}
}
// TestReaderCacheCleanup tests that old downloaders are cleaned up
func TestReaderCacheCleanup(t *testing.T) {
cache := newMockChunkCacheForReaderCache()
// Create cache with limit of 3
rc := NewReaderCache(3, cache, nil)
defer rc.destroy()
// Add data for multiple files
for i := 0; i < 5; i++ {
fileId := string(rune('A' + i))
data := []byte("data for file " + fileId)
cache.SetChunk(fileId, data)
}
// Read from multiple files - should trigger cleanup when exceeding limit
for i := 0; i < 5; i++ {
fileId := string(rune('A' + i))
buffer := make([]byte, 20)
_, err := rc.ReadChunkAt(context.Background(), buffer, fileId, nil, false, 0, 20, true)
if err != nil {
t.Errorf("Read error for file %s: %v", fileId, err)
}
}
// Cache should still work - reads should succeed
for i := 0; i < 5; i++ {
fileId := string(rune('A' + i))
buffer := make([]byte, 20)
n, err := rc.ReadChunkAt(context.Background(), buffer, fileId, nil, false, 0, 20, true)
if err != nil {
t.Errorf("Second read error for file %s: %v", fileId, err)
}
if n == 0 {
t.Errorf("Expected data for file %s, got 0 bytes", fileId)
}
}
}
// TestSingleChunkCacherDoneSignal tests that done channel is always closed
func TestSingleChunkCacherDoneSignal(t *testing.T) {
cache := newMockChunkCacheForReaderCache()
rc := NewReaderCache(10, cache, nil)
defer rc.destroy()
// Test that we can read even when data is in cache (done channel should work)
testData := []byte("done signal test")
cache.SetChunk("done-signal-test", testData)
// Multiple goroutines reading same chunk
var wg sync.WaitGroup
for i := 0; i < 5; i++ {
wg.Add(1)
go func() {
defer wg.Done()
buffer := make([]byte, len(testData))
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
n, err := rc.ReadChunkAt(ctx, buffer, "done-signal-test", nil, false, 0, len(testData), true)
if err != nil && err != context.DeadlineExceeded {
t.Errorf("Unexpected error: %v", err)
}
if n == 0 && err == nil {
t.Error("Got 0 bytes with no error")
}
}()
}
// Should complete without hanging
done := make(chan struct{})
go func() {
wg.Wait()
close(done)
}()
select {
case <-done:
// Success
case <-time.After(10 * time.Second):
t.Fatal("Test timed out - done channel may not be signaled correctly")
}
}
// ============================================================================
// Tests that exercise SingleChunkCacher concurrency logic
// ============================================================================
//
// These tests use blocking lookupFileIdFn to exercise the wait/cancellation
// logic in SingleChunkCacher without requiring HTTP calls.
// TestSingleChunkCacherLookupError tests handling of lookup errors
func TestSingleChunkCacherLookupError(t *testing.T) {
cache := newMockChunkCacheForReaderCache()
// Lookup function that returns an error
lookupFn := func(ctx context.Context, fileId string) ([]string, error) {
return nil, fmt.Errorf("lookup failed for %s", fileId)
}
rc := NewReaderCache(10, cache, lookupFn)
defer rc.destroy()
buffer := make([]byte, 100)
_, err := rc.ReadChunkAt(context.Background(), buffer, "error-test", nil, false, 0, 100, true)
if err == nil {
t.Error("Expected an error, got nil")
}
}
// TestSingleChunkCacherContextCancellationDuringLookup tests that a reader can
// cancel its wait while the lookup is in progress. This exercises the actual
// SingleChunkCacher wait/cancel logic.
func TestSingleChunkCacherContextCancellationDuringLookup(t *testing.T) {
cache := newMockChunkCacheForReaderCache()
lookupStarted := make(chan struct{})
lookupCanFinish := make(chan struct{})
// Lookup function that blocks to simulate slow operation
lookupFn := func(ctx context.Context, fileId string) ([]string, error) {
close(lookupStarted)
<-lookupCanFinish // Block until test allows completion
return nil, fmt.Errorf("lookup completed but reader should have cancelled")
}
rc := NewReaderCache(10, cache, lookupFn)
defer rc.destroy()
defer close(lookupCanFinish) // Ensure cleanup
ctx, cancel := context.WithCancel(context.Background())
readResult := make(chan error, 1)
go func() {
buffer := make([]byte, 100)
_, err := rc.ReadChunkAt(ctx, buffer, "cancel-during-lookup", nil, false, 0, 100, true)
readResult <- err
}()
// Wait for lookup to start, then cancel the reader's context
select {
case <-lookupStarted:
cancel() // Cancel the reader while lookup is blocked
case <-time.After(5 * time.Second):
t.Fatal("Lookup never started")
}
// Read should return with context.Canceled
select {
case err := <-readResult:
if err != context.Canceled {
t.Errorf("Expected context.Canceled, got: %v", err)
}
case <-time.After(5 * time.Second):
t.Fatal("Read did not complete after context cancellation")
}
}
// TestSingleChunkCacherMultipleReadersWaitForDownload tests that multiple readers
// can wait for the same SingleChunkCacher download to complete. When lookup fails,
// all readers should receive the same error.
func TestSingleChunkCacherMultipleReadersWaitForDownload(t *testing.T) {
cache := newMockChunkCacheForReaderCache()
lookupStarted := make(chan struct{})
lookupCanFinish := make(chan struct{})
var lookupStartedOnce sync.Once
// Lookup function that blocks to simulate slow operation
lookupFn := func(ctx context.Context, fileId string) ([]string, error) {
lookupStartedOnce.Do(func() { close(lookupStarted) })
<-lookupCanFinish
return nil, fmt.Errorf("simulated lookup error")
}
rc := NewReaderCache(10, cache, lookupFn)
defer rc.destroy()
numReaders := 5
var wg sync.WaitGroup
errors := make(chan error, numReaders)
// Start multiple readers for the same chunk
for i := 0; i < numReaders; i++ {
wg.Add(1)
go func() {
defer wg.Done()
buffer := make([]byte, 100)
_, err := rc.ReadChunkAt(context.Background(), buffer, "shared-chunk", nil, false, 0, 100, true)
errors <- err
}()
}
// Wait for lookup to start, then allow completion
select {
case <-lookupStarted:
close(lookupCanFinish)
case <-time.After(5 * time.Second):
close(lookupCanFinish)
t.Fatal("Lookup never started")
}
wg.Wait()
close(errors)
// All readers should receive an error
errorCount := 0
for err := range errors {
if err != nil {
errorCount++
}
}
if errorCount != numReaders {
t.Errorf("Expected %d errors, got %d", numReaders, errorCount)
}
}
// TestSingleChunkCacherOneReaderCancelsOthersContinue tests that when one reader
// cancels, other readers waiting on the same chunk continue to wait.
func TestSingleChunkCacherOneReaderCancelsOthersContinue(t *testing.T) {
cache := newMockChunkCacheForReaderCache()
lookupStarted := make(chan struct{})
lookupCanFinish := make(chan struct{})
var lookupStartedOnce sync.Once
lookupFn := func(ctx context.Context, fileId string) ([]string, error) {
lookupStartedOnce.Do(func() { close(lookupStarted) })
<-lookupCanFinish
return nil, fmt.Errorf("simulated error after delay")
}
rc := NewReaderCache(10, cache, lookupFn)
defer rc.destroy()
cancelledReaderDone := make(chan error, 1)
otherReaderDone := make(chan error, 1)
ctx, cancel := context.WithCancel(context.Background())
// Start reader that will be cancelled
go func() {
buffer := make([]byte, 100)
_, err := rc.ReadChunkAt(ctx, buffer, "shared-chunk-2", nil, false, 0, 100, true)
cancelledReaderDone <- err
}()
// Start reader that will NOT be cancelled
go func() {
buffer := make([]byte, 100)
_, err := rc.ReadChunkAt(context.Background(), buffer, "shared-chunk-2", nil, false, 0, 100, true)
otherReaderDone <- err
}()
// Wait for lookup to start
select {
case <-lookupStarted:
case <-time.After(5 * time.Second):
t.Fatal("Lookup never started")
}
// Cancel the first reader
cancel()
// First reader should complete with context.Canceled quickly
select {
case err := <-cancelledReaderDone:
if err != context.Canceled {
t.Errorf("Cancelled reader: expected context.Canceled, got: %v", err)
}
case <-time.After(2 * time.Second):
t.Error("Cancelled reader did not complete quickly")
}
// Allow the download to complete
close(lookupCanFinish)
// Other reader should eventually complete (with error since lookup returns error)
select {
case err := <-otherReaderDone:
if err == nil || err == context.Canceled {
t.Errorf("Other reader: expected non-nil non-cancelled error, got: %v", err)
}
// Expected: "simulated error after delay"
case <-time.After(5 * time.Second):
t.Error("Other reader did not complete")
}
}

12
weed/operation/upload_content.go

@ -90,10 +90,9 @@ func (uploadResult *UploadResult) ToPbFileChunkWithSSE(fileId string, offset int
}
var (
fileNameEscaper = strings.NewReplacer(`\`, `\\`, `"`, `\"`, "\n", "")
uploader *Uploader
uploaderErr error
once sync.Once
uploader *Uploader
uploaderErr error
once sync.Once
)
// HTTPClient interface for testing
@ -336,8 +335,9 @@ func (uploader *Uploader) upload_content(ctx context.Context, fillBufferFunction
body_writer = multipart.NewWriter(option.BytesBuffer)
}
h := make(textproto.MIMEHeader)
filename := fileNameEscaper.Replace(option.Filename)
h.Set("Content-Disposition", fmt.Sprintf(`form-data; name="file"; filename="%s"`, filename))
// Use mime.FormatMediaType for RFC 6266 compliant Content-Disposition,
// properly handling non-ASCII characters and special characters
h.Set("Content-Disposition", mime.FormatMediaType("form-data", map[string]string{"name": "file", "filename": option.Filename}))
h.Set("Idempotency-Key", option.UploadUrl)
if option.MimeType == "" {
option.MimeType = mime.TypeByExtension(strings.ToLower(filepath.Ext(option.Filename)))

2
weed/pb/master.proto

@ -81,6 +81,7 @@ message Heartbeat {
map<string, uint32> max_volume_counts = 4;
uint32 grpc_port = 20;
repeated string location_uuids = 21;
string id = 22; // volume server id, independent of ip:port for stable identification
}
message HeartbeatResponse {
@ -289,6 +290,7 @@ message DataNodeInfo {
string id = 1;
map<string, DiskInfo> diskInfos = 2;
uint32 grpc_port = 3;
string address = 4; // ip:port for connecting to the volume server
}
message RackInfo {
string id = 1;

26
weed/pb/master_pb/master.pb.go

@ -44,6 +44,7 @@ type Heartbeat struct {
MaxVolumeCounts map[string]uint32 `protobuf:"bytes,4,rep,name=max_volume_counts,json=maxVolumeCounts,proto3" json:"max_volume_counts,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"varint,2,opt,name=value"`
GrpcPort uint32 `protobuf:"varint,20,opt,name=grpc_port,json=grpcPort,proto3" json:"grpc_port,omitempty"`
LocationUuids []string `protobuf:"bytes,21,rep,name=location_uuids,json=locationUuids,proto3" json:"location_uuids,omitempty"`
Id string `protobuf:"bytes,22,opt,name=id,proto3" json:"id,omitempty"` // volume server id, independent of ip:port for stable identification
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
@ -204,6 +205,13 @@ func (x *Heartbeat) GetLocationUuids() []string {
return nil
}
func (x *Heartbeat) GetId() string {
if x != nil {
return x.Id
}
return ""
}
type HeartbeatResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
VolumeSizeLimit uint64 `protobuf:"varint,1,opt,name=volume_size_limit,json=volumeSizeLimit,proto3" json:"volume_size_limit,omitempty"`
@ -2039,6 +2047,7 @@ type DataNodeInfo struct {
Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"`
DiskInfos map[string]*DiskInfo `protobuf:"bytes,2,rep,name=diskInfos,proto3" json:"diskInfos,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"`
GrpcPort uint32 `protobuf:"varint,3,opt,name=grpc_port,json=grpcPort,proto3" json:"grpc_port,omitempty"`
Address string `protobuf:"bytes,4,opt,name=address,proto3" json:"address,omitempty"` // ip:port for connecting to the volume server
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
@ -2094,6 +2103,13 @@ func (x *DataNodeInfo) GetGrpcPort() uint32 {
return 0
}
func (x *DataNodeInfo) GetAddress() string {
if x != nil {
return x.Address
}
return ""
}
type RackInfo struct {
state protoimpl.MessageState `protogen:"open.v1"`
Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"`
@ -4038,7 +4054,7 @@ var File_master_proto protoreflect.FileDescriptor
const file_master_proto_rawDesc = "" +
"\n" +
"\fmaster.proto\x12\tmaster_pb\"\xc0\a\n" +
"\fmaster.proto\x12\tmaster_pb\"\xd0\a\n" +
"\tHeartbeat\x12\x0e\n" +
"\x02ip\x18\x01 \x01(\tR\x02ip\x12\x12\n" +
"\x04port\x18\x02 \x01(\rR\x04port\x12\x1d\n" +
@ -4063,7 +4079,8 @@ const file_master_proto_rawDesc = "" +
"\x10has_no_ec_shards\x18\x13 \x01(\bR\rhasNoEcShards\x12U\n" +
"\x11max_volume_counts\x18\x04 \x03(\v2).master_pb.Heartbeat.MaxVolumeCountsEntryR\x0fmaxVolumeCounts\x12\x1b\n" +
"\tgrpc_port\x18\x14 \x01(\rR\bgrpcPort\x12%\n" +
"\x0elocation_uuids\x18\x15 \x03(\tR\rlocationUuids\x1aB\n" +
"\x0elocation_uuids\x18\x15 \x03(\tR\rlocationUuids\x12\x0e\n" +
"\x02id\x18\x16 \x01(\tR\x02id\x1aB\n" +
"\x14MaxVolumeCountsEntry\x12\x10\n" +
"\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n" +
"\x05value\x18\x02 \x01(\rR\x05value:\x028\x01\"\xcd\x02\n" +
@ -4254,11 +4271,12 @@ const file_master_proto_rawDesc = "" +
"\fvolume_infos\x18\x06 \x03(\v2#.master_pb.VolumeInformationMessageR\vvolumeInfos\x12P\n" +
"\x0eec_shard_infos\x18\a \x03(\v2*.master_pb.VolumeEcShardInformationMessageR\fecShardInfos\x12.\n" +
"\x13remote_volume_count\x18\b \x01(\x03R\x11remoteVolumeCount\x12\x17\n" +
"\adisk_id\x18\t \x01(\rR\x06diskId\"\xd4\x01\n" +
"\adisk_id\x18\t \x01(\rR\x06diskId\"\xee\x01\n" +
"\fDataNodeInfo\x12\x0e\n" +
"\x02id\x18\x01 \x01(\tR\x02id\x12D\n" +
"\tdiskInfos\x18\x02 \x03(\v2&.master_pb.DataNodeInfo.DiskInfosEntryR\tdiskInfos\x12\x1b\n" +
"\tgrpc_port\x18\x03 \x01(\rR\bgrpcPort\x1aQ\n" +
"\tgrpc_port\x18\x03 \x01(\rR\bgrpcPort\x12\x18\n" +
"\aaddress\x18\x04 \x01(\tR\aaddress\x1aQ\n" +
"\x0eDiskInfosEntry\x12\x10\n" +
"\x03key\x18\x01 \x01(\tR\x03key\x12)\n" +
"\x05value\x18\x02 \x01(\v2\x13.master_pb.DiskInfoR\x05value:\x028\x01\"\xf0\x01\n" +

12
weed/pb/server_address.go

@ -2,11 +2,12 @@ package pb
import (
"fmt"
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
"net"
"strconv"
"strings"
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
)
type ServerAddress string
@ -32,7 +33,12 @@ func NewServerAddressWithGrpcPort(address string, grpcPort int) ServerAddress {
}
func NewServerAddressFromDataNode(dn *master_pb.DataNodeInfo) ServerAddress {
return NewServerAddressWithGrpcPort(dn.Id, int(dn.GrpcPort))
// Use Address field if available (new behavior), fall back to Id for backward compatibility
addr := dn.Address
if addr == "" {
addr = dn.Id // backward compatibility: old nodes use ip:port as id
}
return NewServerAddressWithGrpcPort(addr, int(dn.GrpcPort))
}
func NewServerAddressFromLocation(dn *master_pb.Location) ServerAddress {

19
weed/s3api/auth_signature_v4.go

@ -53,10 +53,11 @@ func (iam *IdentityAccessManagement) reqSignatureV4Verify(r *http.Request) (*Ide
// Constants specific to this file
const (
emptySHA256 = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
streamingContentSHA256 = "STREAMING-AWS4-HMAC-SHA256-PAYLOAD"
streamingUnsignedPayload = "STREAMING-UNSIGNED-PAYLOAD-TRAILER"
unsignedPayload = "UNSIGNED-PAYLOAD"
emptySHA256 = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
streamingContentSHA256 = "STREAMING-AWS4-HMAC-SHA256-PAYLOAD"
streamingContentSHA256Trailer = "STREAMING-AWS4-HMAC-SHA256-PAYLOAD-TRAILER"
streamingUnsignedPayload = "STREAMING-UNSIGNED-PAYLOAD-TRAILER"
unsignedPayload = "UNSIGNED-PAYLOAD"
// Limit for IAM/STS request body size to prevent DoS attacks
iamRequestBodyLimit = 10 * (1 << 20) // 10 MiB
)
@ -214,14 +215,14 @@ func (iam *IdentityAccessManagement) verifyV4Signature(r *http.Request, shouldCh
availableKeys = append(availableKeys, key)
}
iam.m.RUnlock()
glog.Warningf("InvalidAccessKeyId: attempted key '%s' not found. Available keys: %d, Auth enabled: %v",
authInfo.AccessKey, len(availableKeys), iam.isAuthEnabled)
if glog.V(2) && len(availableKeys) > 0 {
glog.V(2).Infof("Available access keys: %v", availableKeys)
}
return nil, nil, "", nil, s3err.ErrInvalidAccessKeyID
}
@ -562,10 +563,10 @@ func (iam *IdentityAccessManagement) doesPolicySignatureV4Match(formValues http.
iam.m.RLock()
availableKeyCount := len(iam.accessKeyIdent)
iam.m.RUnlock()
glog.Warningf("InvalidAccessKeyId (POST policy): attempted key '%s' not found. Available keys: %d, Auth enabled: %v",
credHeader.accessKey, availableKeyCount, iam.isAuthEnabled)
return s3err.ErrInvalidAccessKeyID
}

10
weed/s3api/chunked_reader_v4.go

@ -53,8 +53,8 @@ func (iam *IdentityAccessManagement) calculateSeedSignature(r *http.Request) (cr
// This check ensures we only proceed for streaming uploads.
switch authInfo.HashedPayload {
case streamingContentSHA256:
glog.V(3).Infof("streaming content sha256")
case streamingContentSHA256, streamingContentSHA256Trailer:
glog.V(3).Infof("streaming content sha256 (with trailer: %v)", authInfo.HashedPayload == streamingContentSHA256Trailer)
case streamingUnsignedPayload:
glog.V(3).Infof("streaming unsigned payload")
default:
@ -87,9 +87,9 @@ func (iam *IdentityAccessManagement) newChunkedReader(req *http.Request) (io.Rea
var errCode s3err.ErrorCode
switch contentSha256Header {
// Payload for STREAMING signature should be 'STREAMING-AWS4-HMAC-SHA256-PAYLOAD'
case streamingContentSHA256:
glog.V(3).Infof("streaming content sha256")
// Payload for STREAMING signature should be 'STREAMING-AWS4-HMAC-SHA256-PAYLOAD' or 'STREAMING-AWS4-HMAC-SHA256-PAYLOAD-TRAILER'
case streamingContentSHA256, streamingContentSHA256Trailer:
glog.V(3).Infof("streaming content sha256 (with trailer: %v)", contentSha256Header == streamingContentSHA256Trailer)
credential, seedSignature, region, service, seedDate, errCode = iam.calculateSeedSignature(req)
if errCode != s3err.ErrNone {
return nil, errCode

144
weed/s3api/chunked_reader_v4_test.go

@ -234,6 +234,150 @@ func TestSignedStreamingUpload(t *testing.T) {
assert.Equal(t, chunk1Data+chunk2Data, string(data))
}
// createTrailerStreamingRequest creates a streaming upload request with trailer for testing.
// If useValidTrailerSignature is true, uses a correctly calculated trailer signature;
// otherwise uses an intentionally wrong signature for negative testing.
func createTrailerStreamingRequest(t *testing.T, useValidTrailerSignature bool) (*http.Request, string) {
chunk1Data := "hello world\n"
chunk1DataLen := len(chunk1Data)
chunk1DataLenHex := fmt.Sprintf("%x", chunk1DataLen)
// Use current time for signatures
now := time.Now().UTC()
amzDate := now.Format(iso8601Format)
dateStamp := now.Format(yyyymmdd)
// Calculate seed signature
scope := dateStamp + "/" + defaultRegion + "/s3/aws4_request"
// Build canonical request for seed signature
hashedPayload := "STREAMING-AWS4-HMAC-SHA256-PAYLOAD-TRAILER"
canonicalHeaders := "content-encoding:aws-chunked\n" +
"host:s3.amazonaws.com\n" +
"x-amz-content-sha256:" + hashedPayload + "\n" +
"x-amz-date:" + amzDate + "\n" +
fmt.Sprintf("x-amz-decoded-content-length:%d\n", chunk1DataLen) +
"x-amz-trailer:x-amz-checksum-crc32\n"
signedHeaders := "content-encoding;host;x-amz-content-sha256;x-amz-date;x-amz-decoded-content-length;x-amz-trailer"
canonicalRequest := "PUT\n" +
"/test-bucket/test-object\n" +
"\n" +
canonicalHeaders + "\n" +
signedHeaders + "\n" +
hashedPayload
canonicalRequestHash := getSHA256Hash([]byte(canonicalRequest))
stringToSign := "AWS4-HMAC-SHA256\n" + amzDate + "\n" + scope + "\n" + canonicalRequestHash
signingKey := getSigningKey(defaultSecretAccessKey, dateStamp, defaultRegion, "s3")
seedSignature := getSignature(signingKey, stringToSign)
// Calculate chunk signatures
chunk1Hash := getSHA256Hash([]byte(chunk1Data))
chunk1StringToSign := "AWS4-HMAC-SHA256-PAYLOAD\n" + amzDate + "\n" + scope + "\n" +
seedSignature + "\n" + emptySHA256 + "\n" + chunk1Hash
chunk1Signature := getSignature(signingKey, chunk1StringToSign)
// Final chunk (0 bytes)
finalStringToSign := "AWS4-HMAC-SHA256-PAYLOAD\n" + amzDate + "\n" + scope + "\n" +
chunk1Signature + "\n" + emptySHA256 + "\n" + emptySHA256
finalSignature := getSignature(signingKey, finalStringToSign)
// Calculate CRC32 checksum for trailer
crcWriter := crc32.NewIEEE()
_, crcErr := crcWriter.Write([]byte(chunk1Data))
assert.NoError(t, crcErr)
checksum := crcWriter.Sum(nil)
base64EncodedChecksum := base64.StdEncoding.EncodeToString(checksum)
// The on-wire trailer format uses \r\n (HTTP/aws-chunked convention)
trailerOnWire := "x-amz-checksum-crc32:" + base64EncodedChecksum + "\r\n"
// Calculate or use wrong trailer signature
var trailerSignature string
if useValidTrailerSignature {
// The canonical trailer content uses \n for signing (per AWS SigV4 spec)
trailerCanonical := "x-amz-checksum-crc32:" + base64EncodedChecksum + "\n"
trailerHash := getSHA256Hash([]byte(trailerCanonical))
trailerStringToSign := "AWS4-HMAC-SHA256-TRAILER\n" + amzDate + "\n" + scope + "\n" +
finalSignature + "\n" + trailerHash
trailerSignature = getSignature(signingKey, trailerStringToSign)
} else {
// Intentionally wrong signature for negative testing
trailerSignature = "0000000000000000000000000000000000000000000000000000000000000000"
}
// Build the chunked payload with trailer and trailer signature
payload := fmt.Sprintf("%s;chunk-signature=%s\r\n%s\r\n", chunk1DataLenHex, chunk1Signature, chunk1Data) +
fmt.Sprintf("0;chunk-signature=%s\r\n", finalSignature) +
trailerOnWire +
"x-amz-trailer-signature:" + trailerSignature + "\r\n" +
"\r\n"
// Create the request
req, err := http.NewRequest("PUT", "http://s3.amazonaws.com/test-bucket/test-object",
bytes.NewReader([]byte(payload)))
assert.NoError(t, err)
req.Header.Set("Host", "s3.amazonaws.com")
req.Header.Set("x-amz-date", amzDate)
req.Header.Set("x-amz-content-sha256", hashedPayload)
req.Header.Set("Content-Encoding", "aws-chunked")
req.Header.Set("x-amz-decoded-content-length", fmt.Sprintf("%d", chunk1DataLen))
req.Header.Set("x-amz-trailer", "x-amz-checksum-crc32")
authHeader := fmt.Sprintf("AWS4-HMAC-SHA256 Credential=%s/%s, SignedHeaders=%s, Signature=%s",
defaultAccessKeyId, scope, signedHeaders, seedSignature)
req.Header.Set("Authorization", authHeader)
return req, chunk1Data
}
// TestSignedStreamingUploadWithTrailer tests streaming uploads with signed chunks and trailers
// This tests the STREAMING-AWS4-HMAC-SHA256-PAYLOAD-TRAILER content-sha256 header value
// which is used by AWS SDK v2 when checksum validation is enabled
func TestSignedStreamingUploadWithTrailer(t *testing.T) {
iam := setupIam()
req, expectedData := createTrailerStreamingRequest(t, true)
// Test the chunked reader
reader, errCode := iam.newChunkedReader(req)
assert.Equal(t, s3err.ErrNone, errCode)
assert.NotNil(t, reader)
// Read and verify the payload
data, err := io.ReadAll(reader)
assert.NoError(t, err)
assert.Equal(t, expectedData, string(data))
}
// TestSignedStreamingUploadWithTrailerInvalidSignature tests behavior with invalid trailer signatures.
// This is a negative test case for trailer signature validation. It currently verifies that an invalid
// signature doesn't break content reading, and is prepared for when validation is implemented.
func TestSignedStreamingUploadWithTrailerInvalidSignature(t *testing.T) {
iam := setupIam()
req, expectedData := createTrailerStreamingRequest(t, false)
// Test the chunked reader - it should be created successfully
reader, errCode := iam.newChunkedReader(req)
assert.Equal(t, s3err.ErrNone, errCode)
assert.NotNil(t, reader)
// Read the payload - currently trailer signature validation may not be implemented,
// but this test documents the expected behavior and will catch regressions
// if trailer signature validation is added in the future
data, err := io.ReadAll(reader)
// Note: If trailer signature validation is implemented, this should fail with an error
// For now, we just verify the content is correctly extracted
if err != nil {
assert.Contains(t, err.Error(), "signature", "Error should indicate signature mismatch")
} else {
// If no error, content should still be correct (trailer sig validation not yet implemented)
assert.Equal(t, expectedData, string(data))
}
}
// TestSignedStreamingUploadInvalidSignature tests that invalid chunk signatures are rejected
// This is a negative test case to ensure signature validation is actually working
func TestSignedStreamingUploadInvalidSignature(t *testing.T) {

5
weed/s3api/filer_multipart.go

@ -187,7 +187,10 @@ func (s3a *S3ApiServer) completeMultipartUpload(r *http.Request, input *s3.Compl
sort.Ints(completedPartNumbers)
uploadDirectory := s3a.genUploadsFolder(*input.Bucket) + "/" + *input.UploadId
entries, _, err := s3a.list(uploadDirectory, "", "", false, 0)
// Use explicit limit to ensure all parts are listed (up to S3's max of 10,000 parts)
// Previously limit=0 relied on server's DirListingLimit default (1000 in weed server mode),
// which caused CompleteMultipartUpload to fail for uploads with more than 1000 parts.
entries, _, err := s3a.list(uploadDirectory, "", "", false, s3_constants.MaxS3MultipartParts+1)
if err != nil {
glog.Errorf("completeMultipartUpload %s %s error: %v, entries:%d", *input.Bucket, *input.UploadId, err, len(entries))
stats.S3HandlerCounter.WithLabelValues(stats.ErrorCompletedNoSuchUpload).Inc()

16
weed/s3api/s3api_auth.go

@ -48,14 +48,22 @@ func isRequestPostPolicySignatureV4(r *http.Request) bool {
}
// Verify if the request has AWS Streaming Signature Version '4'. This is only valid for 'PUT' operation.
// Supports both with and without trailer variants:
// - STREAMING-AWS4-HMAC-SHA256-PAYLOAD (original)
// - STREAMING-AWS4-HMAC-SHA256-PAYLOAD-TRAILER (with trailing checksums)
func isRequestSignStreamingV4(r *http.Request) bool {
return r.Header.Get("x-amz-content-sha256") == streamingContentSHA256 &&
r.Method == http.MethodPut
if r.Method != http.MethodPut {
return false
}
contentSha256 := r.Header.Get("x-amz-content-sha256")
return contentSha256 == streamingContentSHA256 || contentSha256 == streamingContentSHA256Trailer
}
func isRequestUnsignedStreaming(r *http.Request) bool {
return r.Header.Get("x-amz-content-sha256") == streamingUnsignedPayload &&
r.Method == http.MethodPut
if r.Method != http.MethodPut {
return false
}
return r.Header.Get("x-amz-content-sha256") == streamingUnsignedPayload
}
// Authorization type.

17
weed/s3api/s3api_bucket_config.go

@ -514,12 +514,27 @@ func (s3a *S3ApiServer) isVersioningConfigured(bucket string) (bool, error) {
return config.Versioning != "" || config.ObjectLockConfig != nil, nil
}
// isObjectLockEnabled checks if Object Lock is enabled for a bucket (with caching)
func (s3a *S3ApiServer) isObjectLockEnabled(bucket string) (bool, error) {
config, errCode := s3a.getBucketConfig(bucket)
if errCode != s3err.ErrNone {
if errCode == s3err.ErrNoSuchBucket {
return false, filer_pb.ErrNotFound
}
return false, fmt.Errorf("failed to get bucket config: %v", errCode)
}
return config.ObjectLockConfig != nil, nil
}
// getVersioningState returns the detailed versioning state for a bucket
func (s3a *S3ApiServer) getVersioningState(bucket string) (string, error) {
config, errCode := s3a.getBucketConfig(bucket)
if errCode != s3err.ErrNone {
if errCode == s3err.ErrNoSuchBucket {
return "", nil
// Signal to callers that the bucket does not exist so they can
// decide whether to auto-create it (e.g., in PUT handlers).
return "", filer_pb.ErrNotFound
}
glog.Errorf("getVersioningState: failed to get bucket config for %s: %v", bucket, errCode)
return "", fmt.Errorf("failed to get bucket config: %v", errCode)

78
weed/s3api/s3api_bucket_handlers.go

@ -244,46 +244,64 @@ func (s3a *S3ApiServer) PutBucketHandler(w http.ResponseWriter, r *http.Request)
return
}
// create the folder for bucket, but lazily create actual collection
if err := s3a.mkdir(s3a.option.BucketsPath, bucket, setBucketOwner(r)); err != nil {
glog.Errorf("PutBucketHandler mkdir: %v", err)
s3err.WriteErrorResponse(w, r, s3err.ErrInternalError)
return
}
// Check for x-amz-bucket-object-lock-enabled header BEFORE creating bucket
// This allows us to create the bucket with Object Lock configuration atomically
objectLockEnabled := strings.EqualFold(r.Header.Get(s3_constants.AmzBucketObjectLockEnabled), "true")
// Remove bucket from negative cache after successful creation
if s3a.bucketConfigCache != nil {
s3a.bucketConfigCache.RemoveNegativeCache(bucket)
}
// Capture any Object Lock configuration error from within the callback
// The mkdir callback doesn't support returning errors, so we capture it here
var objectLockSetupError error
// Check for x-amz-bucket-object-lock-enabled header (S3 standard compliance)
if objectLockHeaderValue := r.Header.Get(s3_constants.AmzBucketObjectLockEnabled); strings.EqualFold(objectLockHeaderValue, "true") {
glog.V(3).Infof("PutBucketHandler: enabling Object Lock and Versioning for bucket %s due to x-amz-bucket-object-lock-enabled header", bucket)
// Create the folder for bucket with all settings atomically
// This ensures Object Lock configuration is set in the same CreateEntry call,
// preventing race conditions where the bucket exists without Object Lock enabled
if err := s3a.mkdir(s3a.option.BucketsPath, bucket, func(entry *filer_pb.Entry) {
// Set bucket owner
setBucketOwner(r)(entry)
// Set Object Lock configuration atomically during bucket creation
if objectLockEnabled {
glog.V(3).Infof("PutBucketHandler: enabling Object Lock and Versioning for bucket %s atomically", bucket)
if entry.Extended == nil {
entry.Extended = make(map[string][]byte)
}
// Atomically update the configuration of the specified bucket. See the updateBucketConfig
// function definition for detailed documentation on parameters and behavior.
errCode := s3a.updateBucketConfig(bucket, func(bucketConfig *BucketConfig) error {
// Enable versioning (required for Object Lock)
bucketConfig.Versioning = s3_constants.VersioningEnabled
entry.Extended[s3_constants.ExtVersioningKey] = []byte(s3_constants.VersioningEnabled)
// Create basic Object Lock configuration (enabled without default retention)
// Create and store Object Lock configuration
objectLockConfig := &ObjectLockConfiguration{
ObjectLockEnabled: s3_constants.ObjectLockEnabled,
}
if err := StoreObjectLockConfigurationInExtended(entry, objectLockConfig); err != nil {
glog.Errorf("PutBucketHandler: failed to store Object Lock config for bucket %s: %v", bucket, err)
objectLockSetupError = err
// Note: The entry will still be created, but we'll roll it back below
} else {
glog.V(3).Infof("PutBucketHandler: set ObjectLockConfig for bucket %s: %+v", bucket, objectLockConfig)
}
}
}); err != nil {
glog.Errorf("PutBucketHandler mkdir: %v", err)
s3err.WriteErrorResponse(w, r, s3err.ErrInternalError)
return
}
// Set the cached Object Lock configuration
bucketConfig.ObjectLockConfig = objectLockConfig
glog.V(3).Infof("PutBucketHandler: set ObjectLockConfig for bucket %s: %+v", bucket, objectLockConfig)
return nil
})
if errCode != s3err.ErrNone {
glog.Errorf("PutBucketHandler: failed to enable Object Lock for bucket %s: %v", bucket, errCode)
s3err.WriteErrorResponse(w, r, errCode)
return
// If Object Lock setup failed, roll back the bucket creation
// This ensures we don't leave a bucket without the requested Object Lock configuration
if objectLockSetupError != nil {
glog.Errorf("PutBucketHandler: rolling back bucket %s creation due to Object Lock setup failure: %v", bucket, objectLockSetupError)
if deleteErr := s3a.rm(s3a.option.BucketsPath, bucket, true, true); deleteErr != nil {
glog.Errorf("PutBucketHandler: failed to rollback bucket %s after Object Lock setup failure: %v", bucket, deleteErr)
}
glog.V(3).Infof("PutBucketHandler: enabled Object Lock and Versioning for bucket %s", bucket)
s3err.WriteErrorResponse(w, r, s3err.ErrInternalError)
return
}
// Remove bucket from negative cache after successful creation
if s3a.bucketConfigCache != nil {
s3a.bucketConfigCache.RemoveNegativeCache(bucket)
}
w.Header().Set("Location", "/"+bucket)

42
weed/s3api/s3api_object_handlers.go

@ -659,16 +659,14 @@ func (s3a *S3ApiServer) GetObjectHandler(w http.ResponseWriter, r *http.Request)
glog.V(3).Infof("GetObject: Set PartsCount=%d for multipart GET with PartNumber=%d", partsCount, partNumber)
// Calculate the byte range for this part
// Note: ETag is NOT overridden - AWS S3 returns the complete object's ETag
// even when requesting a specific part via PartNumber
var startOffset, endOffset int64
if partInfo != nil {
// Use part boundaries from metadata (accurate for multi-chunk parts)
startOffset = objectEntryForSSE.Chunks[partInfo.StartChunk].Offset
lastChunk := objectEntryForSSE.Chunks[partInfo.EndChunk-1]
endOffset = lastChunk.Offset + int64(lastChunk.Size) - 1
// Override ETag with the part's ETag from metadata
w.Header().Set("ETag", "\""+partInfo.ETag+"\"")
glog.V(3).Infof("GetObject: Override ETag with part %d ETag: %s (from metadata)", partNumber, partInfo.ETag)
} else {
// Fallback: assume 1:1 part-to-chunk mapping (backward compatibility)
chunkIndex := partNumber - 1
@ -680,15 +678,6 @@ func (s3a *S3ApiServer) GetObjectHandler(w http.ResponseWriter, r *http.Request)
partChunk := objectEntryForSSE.Chunks[chunkIndex]
startOffset = partChunk.Offset
endOffset = partChunk.Offset + int64(partChunk.Size) - 1
// Override ETag with chunk's ETag (fallback)
if partChunk.ETag != "" {
if md5Bytes, decodeErr := base64.StdEncoding.DecodeString(partChunk.ETag); decodeErr == nil {
partETag := fmt.Sprintf("%x", md5Bytes)
w.Header().Set("ETag", "\""+partETag+"\"")
glog.V(3).Infof("GetObject: Override ETag with part %d ETag: %s (fallback from chunk)", partNumber, partETag)
}
}
}
// Check if client supplied a Range header - if so, apply it within the part's boundaries
@ -2266,7 +2255,7 @@ func (s3a *S3ApiServer) HeadObjectHandler(w http.ResponseWriter, r *http.Request
if partNumberStr != "" {
if partNumber, parseErr := strconv.Atoi(partNumberStr); parseErr == nil && partNumber > 0 {
// Get actual parts count from metadata (not chunk count)
partsCount, partInfo := s3a.getMultipartInfo(objectEntryForSSE, partNumber)
partsCount, _ := s3a.getMultipartInfo(objectEntryForSSE, partNumber)
// Validate part number
if partNumber > partsCount {
@ -2276,31 +2265,10 @@ func (s3a *S3ApiServer) HeadObjectHandler(w http.ResponseWriter, r *http.Request
}
// Set parts count header
// Note: ETag is NOT overridden - AWS S3 returns the complete object's ETag
// even when requesting a specific part via PartNumber
w.Header().Set(s3_constants.AmzMpPartsCount, strconv.Itoa(partsCount))
glog.V(3).Infof("HeadObject: Set PartsCount=%d for part %d", partsCount, partNumber)
// Override ETag with the part's ETag
if partInfo != nil {
// Use part ETag from metadata (accurate for multi-chunk parts)
w.Header().Set("ETag", "\""+partInfo.ETag+"\"")
glog.V(3).Infof("HeadObject: Override ETag with part %d ETag: %s (from metadata)", partNumber, partInfo.ETag)
} else {
// Fallback: use chunk's ETag (backward compatibility)
chunkIndex := partNumber - 1
if chunkIndex >= len(objectEntryForSSE.Chunks) {
glog.Warningf("HeadObject: Part %d chunk index %d out of range (chunks: %d)", partNumber, chunkIndex, len(objectEntryForSSE.Chunks))
s3err.WriteErrorResponse(w, r, s3err.ErrInvalidPart)
return
}
partChunk := objectEntryForSSE.Chunks[chunkIndex]
if partChunk.ETag != "" {
if md5Bytes, decodeErr := base64.StdEncoding.DecodeString(partChunk.ETag); decodeErr == nil {
partETag := fmt.Sprintf("%x", md5Bytes)
w.Header().Set("ETag", "\""+partETag+"\"")
glog.V(3).Infof("HeadObject: Override ETag with part %d ETag: %s (fallback from chunk)", partNumber, partETag)
}
}
}
}
}

58
weed/s3api/s3api_object_handlers_delete.go

@ -1,12 +1,10 @@
package s3api
import (
"context"
"encoding/xml"
"fmt"
"io"
"net/http"
"slices"
"strings"
"github.com/seaweedfs/seaweedfs/weed/filer"
@ -127,23 +125,11 @@ func (s3a *S3ApiServer) DeleteObjectHandler(w http.ResponseWriter, r *http.Reque
dir, name := target.DirAndName()
err := s3a.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
// Use operation context that won't be cancelled if request terminates
// This ensures deletion completes atomically to avoid inconsistent state
opCtx := context.WithoutCancel(r.Context())
if err := doDeleteEntry(client, dir, name, true, false); err != nil {
return err
}
// Cleanup empty directories
if !s3a.option.AllowEmptyFolder && strings.LastIndex(object, "/") > 0 {
bucketPath := fmt.Sprintf("%s/%s", s3a.option.BucketsPath, bucket)
// Recursively delete empty parent directories, stop at bucket path
filer_pb.DoDeleteEmptyParentDirectories(opCtx, client, util.FullPath(dir), util.FullPath(bucketPath), nil)
}
return nil
return doDeleteEntry(client, dir, name, true, false)
// Note: Empty folder cleanup is now handled asynchronously by EmptyFolderCleaner
// which listens to metadata events and uses consistent hashing for coordination
})
if err != nil {
s3err.WriteErrorResponse(w, r, s3err.ErrInternalError)
return
@ -222,8 +208,6 @@ func (s3a *S3ApiServer) DeleteMultipleObjectsHandler(w http.ResponseWriter, r *h
var deleteErrors []DeleteError
var auditLog *s3err.AccessLog
directoriesWithDeletion := make(map[string]bool)
if s3err.Logger != nil {
auditLog = s3err.GetAccessLog(r, http.StatusNoContent, s3err.ErrNone)
}
@ -245,10 +229,6 @@ func (s3a *S3ApiServer) DeleteMultipleObjectsHandler(w http.ResponseWriter, r *h
versioningConfigured := (versioningState != "")
s3a.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
// Use operation context that won't be cancelled if request terminates
// This ensures batch deletion completes atomically to avoid inconsistent state
opCtx := context.WithoutCancel(r.Context())
// delete file entries
for _, object := range deleteObjects.Objects {
if object.Key == "" {
@ -357,10 +337,6 @@ func (s3a *S3ApiServer) DeleteMultipleObjectsHandler(w http.ResponseWriter, r *h
err := doDeleteEntry(client, parentDirectoryPath, entryName, isDeleteData, isRecursive)
if err == nil {
// Track directory for empty directory cleanup
if !s3a.option.AllowEmptyFolder {
directoriesWithDeletion[parentDirectoryPath] = true
}
deletedObjects = append(deletedObjects, object)
} else if strings.Contains(err.Error(), filer.MsgFailDelNonEmptyFolder) {
deletedObjects = append(deletedObjects, object)
@ -380,30 +356,8 @@ func (s3a *S3ApiServer) DeleteMultipleObjectsHandler(w http.ResponseWriter, r *h
}
}
// Cleanup empty directories - optimize by processing deepest first
if !s3a.option.AllowEmptyFolder && len(directoriesWithDeletion) > 0 {
bucketPath := fmt.Sprintf("%s/%s", s3a.option.BucketsPath, bucket)
// Collect and sort directories by depth (deepest first) to avoid redundant checks
var allDirs []string
for dirPath := range directoriesWithDeletion {
allDirs = append(allDirs, dirPath)
}
// Sort by depth (deeper directories first)
slices.SortFunc(allDirs, func(a, b string) int {
return strings.Count(b, "/") - strings.Count(a, "/")
})
// Track already-checked directories to avoid redundant work
checked := make(map[string]bool)
for _, dirPath := range allDirs {
if !checked[dirPath] {
// Recursively delete empty parent directories, stop at bucket path
// Mark this directory and all its parents as checked during recursion
filer_pb.DoDeleteEmptyParentDirectories(opCtx, client, util.FullPath(dirPath), util.FullPath(bucketPath), checked)
}
}
}
// Note: Empty folder cleanup is now handled asynchronously by EmptyFolderCleaner
// which listens to metadata events and uses consistent hashing for coordination
return nil
})

10
weed/s3api/s3api_object_handlers_list.go

@ -554,15 +554,7 @@ func (s3a *S3ApiServer) doListFilerEntries(client filer_pb.SeaweedFilerClient, d
}
// println("doListFilerEntries2 nextMarker", nextMarker)
} else {
var isEmpty bool
if !s3a.option.AllowEmptyFolder && entry.IsOlderDir() {
//if isEmpty, err = s3a.ensureDirectoryAllEmpty(client, dir, entry.Name); err != nil {
// glog.Errorf("check empty folder %s: %v", dir, err)
//}
}
if !isEmpty {
eachEntryFn(dir, entry)
}
eachEntryFn(dir, entry)
}
} else {
eachEntryFn(dir, entry)

30
weed/s3api/s3api_object_handlers_put.go

@ -30,14 +30,14 @@ import (
// Object lock validation errors
var (
ErrObjectLockVersioningRequired = errors.New("object lock headers can only be used on versioned buckets")
ErrObjectLockVersioningRequired = errors.New("object lock headers can only be used on buckets with Object Lock enabled")
ErrInvalidObjectLockMode = errors.New("invalid object lock mode")
ErrInvalidLegalHoldStatus = errors.New("invalid legal hold status")
ErrInvalidRetentionDateFormat = errors.New("invalid retention until date format")
ErrRetentionDateMustBeFuture = errors.New("retain until date must be in the future")
ErrObjectLockModeRequiresDate = errors.New("object lock mode requires retention until date")
ErrRetentionDateRequiresMode = errors.New("retention until date requires object lock mode")
ErrGovernanceBypassVersioningRequired = errors.New("governance bypass header can only be used on versioned buckets")
ErrGovernanceBypassVersioningRequired = errors.New("governance bypass header can only be used on buckets with Object Lock enabled")
ErrInvalidObjectLockDuration = errors.New("object lock duration must be greater than 0 days")
ErrObjectLockDurationExceeded = errors.New("object lock duration exceeds maximum allowed days")
ErrObjectLockConfigurationMissingEnabled = errors.New("object lock configuration must specify ObjectLockEnabled")
@ -159,8 +159,16 @@ func (s3a *S3ApiServer) PutObjectHandler(w http.ResponseWriter, r *http.Request)
glog.V(3).Infof("PutObjectHandler: bucket=%s, object=%s, versioningState='%s', versioningEnabled=%v, versioningConfigured=%v", bucket, object, versioningState, versioningEnabled, versioningConfigured)
// Check if Object Lock is enabled for this bucket
objectLockEnabled, err := s3a.isObjectLockEnabled(bucket)
if err != nil && !errors.Is(err, filer_pb.ErrNotFound) {
glog.Errorf("Error checking Object Lock status for bucket %s: %v", bucket, err)
s3err.WriteErrorResponse(w, r, s3err.ErrInternalError)
return
}
// Validate object lock headers before processing
if err := s3a.validateObjectLockHeaders(r, versioningEnabled); err != nil {
if err := s3a.validateObjectLockHeaders(r, objectLockEnabled); err != nil {
glog.V(2).Infof("PutObjectHandler: object lock header validation failed for bucket %s, object %s: %v", bucket, object, err)
s3err.WriteErrorResponse(w, r, mapValidationErrorToS3Error(err))
return
@ -1311,7 +1319,8 @@ func (s3a *S3ApiServer) applyBucketDefaultRetention(bucket string, entry *filer_
}
// validateObjectLockHeaders validates object lock headers in PUT requests
func (s3a *S3ApiServer) validateObjectLockHeaders(r *http.Request, versioningEnabled bool) error {
// objectLockEnabled should be true only if the bucket has Object Lock configured
func (s3a *S3ApiServer) validateObjectLockHeaders(r *http.Request, objectLockEnabled bool) error {
// Extract object lock headers from request
mode := r.Header.Get(s3_constants.AmzObjectLockMode)
retainUntilDateStr := r.Header.Get(s3_constants.AmzObjectLockRetainUntilDate)
@ -1320,8 +1329,11 @@ func (s3a *S3ApiServer) validateObjectLockHeaders(r *http.Request, versioningEna
// Check if any object lock headers are present
hasObjectLockHeaders := mode != "" || retainUntilDateStr != "" || legalHold != ""
// Object lock headers can only be used on versioned buckets
if hasObjectLockHeaders && !versioningEnabled {
// Object lock headers can only be used on buckets with Object Lock enabled
// Per AWS S3: Object Lock can only be enabled at bucket creation, and once enabled,
// objects can have retention/legal-hold metadata. Without Object Lock enabled,
// these headers must be rejected.
if hasObjectLockHeaders && !objectLockEnabled {
return ErrObjectLockVersioningRequired
}
@ -1362,11 +1374,11 @@ func (s3a *S3ApiServer) validateObjectLockHeaders(r *http.Request, versioningEna
}
}
// Check for governance bypass header - only valid for versioned buckets
// Check for governance bypass header - only valid for buckets with Object Lock enabled
bypassGovernance := r.Header.Get("x-amz-bypass-governance-retention") == "true"
// Governance bypass headers are only valid for versioned buckets (like object lock headers)
if bypassGovernance && !versioningEnabled {
// Governance bypass headers are only valid for buckets with Object Lock enabled
if bypassGovernance && !objectLockEnabled {
return ErrGovernanceBypassVersioningRequired
}

18
weed/s3api/s3api_object_retention.go

@ -586,10 +586,26 @@ func (s3a *S3ApiServer) evaluateGovernanceBypassRequest(r *http.Request, bucket,
// enforceObjectLockProtections enforces object lock protections for operations
func (s3a *S3ApiServer) enforceObjectLockProtections(request *http.Request, bucket, object, versionId string, governanceBypassAllowed bool) error {
// Quick check: if bucket doesn't have Object Lock enabled, skip the expensive entry lookup
// This optimization avoids a filer gRPC call for every DELETE operation on buckets without Object Lock
objectLockEnabled, err := s3a.isObjectLockEnabled(bucket)
if err != nil {
if errors.Is(err, filer_pb.ErrNotFound) {
// Bucket does not exist, so no protections to enforce
return nil
}
// For other errors, we can't determine lock status, so we should fail.
glog.Errorf("enforceObjectLockProtections: failed to check object lock for bucket %s: %v", bucket, err)
return err
}
if !objectLockEnabled {
// Object Lock is not enabled on this bucket, no protections to enforce
return nil
}
// Get the object entry to check both retention and legal hold
// For delete operations without versionId, we need to check the latest version
var entry *filer_pb.Entry
var err error
if versionId != "" {
// Check specific version

1
weed/s3api/s3api_server.go

@ -43,7 +43,6 @@ type S3ApiServerOption struct {
AllowedOrigins []string
BucketsPath string
GrpcDialOption grpc.DialOption
AllowEmptyFolder bool
AllowDeleteBucketNotEmpty bool
LocalFilerSocket string
DataCenter string

11
weed/server/common.go

@ -9,9 +9,9 @@ import (
"fmt"
"io"
"io/fs"
"mime"
"mime/multipart"
"net/http"
"net/url"
"path/filepath"
"strconv"
"strings"
@ -286,14 +286,15 @@ func adjustHeaderContentDisposition(w http.ResponseWriter, r *http.Request, file
return
}
if filename != "" {
filename = url.QueryEscape(filename)
contentDisposition := "inline"
dispositionType := "inline"
if r.FormValue("dl") != "" {
if dl, _ := strconv.ParseBool(r.FormValue("dl")); dl {
contentDisposition = "attachment"
dispositionType = "attachment"
}
}
w.Header().Set("Content-Disposition", contentDisposition+`; filename="`+fileNameEscaper.Replace(filename)+`"`)
// Use mime.FormatMediaType for RFC 6266 compliant Content-Disposition,
// properly handling non-ASCII characters and special characters
w.Header().Set("Content-Disposition", mime.FormatMediaType(dispositionType, map[string]string{"filename": filename}))
}
}

4
weed/server/master_grpc_server.go

@ -137,8 +137,8 @@ func (ms *MasterServer) SendHeartbeat(stream master_pb.Seaweed_SendHeartbeatServ
dcName, rackName := ms.Topo.Configuration.Locate(heartbeat.Ip, heartbeat.DataCenter, heartbeat.Rack)
dc := ms.Topo.GetOrCreateDataCenter(dcName)
rack := dc.GetOrCreateRack(rackName)
dn = rack.GetOrCreateDataNode(heartbeat.Ip, int(heartbeat.Port), int(heartbeat.GrpcPort), heartbeat.PublicUrl, heartbeat.MaxVolumeCounts)
glog.V(0).Infof("added volume server %d: %v:%d %v", dn.Counter, heartbeat.GetIp(), heartbeat.GetPort(), heartbeat.LocationUuids)
dn = rack.GetOrCreateDataNode(heartbeat.Ip, int(heartbeat.Port), int(heartbeat.GrpcPort), heartbeat.PublicUrl, heartbeat.Id, heartbeat.MaxVolumeCounts)
glog.V(0).Infof("added volume server %d: %v (id=%s, ip=%v:%d) %v", dn.Counter, dn.Id(), heartbeat.Id, heartbeat.GetIp(), heartbeat.GetPort(), heartbeat.LocationUuids)
uuidlist, err := ms.RegisterUuids(heartbeat)
if err != nil {
if stream_err := stream.Send(&master_pb.HeartbeatResponse{

2
weed/server/master_grpc_server_volume.go

@ -253,7 +253,7 @@ func (ms *MasterServer) LookupEcVolume(ctx context.Context, req *master_pb.Looku
var locations []*master_pb.Location
for _, dn := range shardLocations {
locations = append(locations, &master_pb.Location{
Url: string(dn.Id()),
Url: dn.Url(),
PublicUrl: dn.PublicUrl,
DataCenter: dn.GetDataCenterId(),
})

12
weed/server/volume_grpc_copy.go

@ -115,7 +115,7 @@ func (vs *VolumeServer) VolumeCopy(req *volume_server_pb.VolumeCopyRequest, stre
var sendErr error
var ioBytePerSecond int64
if req.IoBytePerSecond <= 0 {
ioBytePerSecond = vs.compactionBytePerSecond
ioBytePerSecond = vs.maintenanceBytePerSecond
} else {
ioBytePerSecond = req.IoBytePerSecond
}
@ -199,7 +199,7 @@ func (vs *VolumeServer) VolumeCopy(req *volume_server_pb.VolumeCopyRequest, stre
}
func (vs *VolumeServer) doCopyFile(client volume_server_pb.VolumeServerClient, isEcVolume bool, collection string, vid, compactRevision uint32, stopOffset uint64, baseFileName, ext string, isAppend, ignoreSourceFileNotFound bool, progressFn storage.ProgressFunc) (modifiedTsNs int64, err error) {
return vs.doCopyFileWithThrottler(client, isEcVolume, collection, vid, compactRevision, stopOffset, baseFileName, ext, isAppend, ignoreSourceFileNotFound, progressFn, util.NewWriteThrottler(vs.compactionBytePerSecond))
return vs.doCopyFileWithThrottler(client, isEcVolume, collection, vid, compactRevision, stopOffset, baseFileName, ext, isAppend, ignoreSourceFileNotFound, progressFn, util.NewWriteThrottler(vs.maintenanceBytePerSecond))
}
func (vs *VolumeServer) doCopyFileWithThrottler(client volume_server_pb.VolumeServerClient, isEcVolume bool, collection string, vid, compactRevision uint32, stopOffset uint64, baseFileName, ext string, isAppend, ignoreSourceFileNotFound bool, progressFn storage.ProgressFunc, throttler *util.WriteThrottler) (modifiedTsNs int64, err error) {
@ -264,7 +264,7 @@ func writeToFile(client volume_server_pb.VolumeServer_CopyFileClient, fileName s
}
dst, err := os.OpenFile(fileName, flags, 0644)
if err != nil {
return modifiedTsNs, nil
return modifiedTsNs, fmt.Errorf("open file %s: %w", fileName, err)
}
defer dst.Close()
@ -278,9 +278,11 @@ func writeToFile(client volume_server_pb.VolumeServer_CopyFileClient, fileName s
modifiedTsNs = resp.ModifiedTsNs
}
if receiveErr != nil {
return modifiedTsNs, fmt.Errorf("receiving %s: %v", fileName, receiveErr)
return modifiedTsNs, fmt.Errorf("receiving %s: %w", fileName, receiveErr)
}
if _, writeErr := dst.Write(resp.FileContent); writeErr != nil {
return modifiedTsNs, fmt.Errorf("write file %s: %w", fileName, writeErr)
}
dst.Write(resp.FileContent)
progressedBytes += int64(len(resp.FileContent))
if progressFn != nil {
if !progressFn(progressedBytes) {

27
weed/server/volume_server.go

@ -42,20 +42,21 @@ type VolumeServer struct {
guard *security.Guard
grpcDialOption grpc.DialOption
needleMapKind storage.NeedleMapKind
ldbTimout int64
FixJpgOrientation bool
ReadMode string
compactionBytePerSecond int64
metricsAddress string
metricsIntervalSec int
fileSizeLimitBytes int64
isHeartbeating bool
stopChan chan bool
needleMapKind storage.NeedleMapKind
ldbTimout int64
FixJpgOrientation bool
ReadMode string
compactionBytePerSecond int64
maintenanceBytePerSecond int64
metricsAddress string
metricsIntervalSec int
fileSizeLimitBytes int64
isHeartbeating bool
stopChan chan bool
}
func NewVolumeServer(adminMux, publicMux *http.ServeMux, ip string,
port int, grpcPort int, publicUrl string,
port int, grpcPort int, publicUrl string, id string,
folders []string, maxCounts []int32, minFreeSpaces []util.MinFreeSpace, diskTypes []types.DiskType,
idxFolder string,
needleMapKind storage.NeedleMapKind,
@ -65,6 +66,7 @@ func NewVolumeServer(adminMux, publicMux *http.ServeMux, ip string,
fixJpgOrientation bool,
readMode string,
compactionMBPerSecond int,
maintenanceMBPerSecond int,
fileSizeLimitMB int,
concurrentUploadLimit int64,
concurrentDownloadLimit int64,
@ -94,6 +96,7 @@ func NewVolumeServer(adminMux, publicMux *http.ServeMux, ip string,
ReadMode: readMode,
grpcDialOption: security.LoadClientTLS(util.GetViper(), "grpc.volume"),
compactionBytePerSecond: int64(compactionMBPerSecond) * 1024 * 1024,
maintenanceBytePerSecond: int64(maintenanceMBPerSecond) * 1024 * 1024,
fileSizeLimitBytes: int64(fileSizeLimitMB) * 1024 * 1024,
isHeartbeating: true,
stopChan: make(chan bool),
@ -114,7 +117,7 @@ func NewVolumeServer(adminMux, publicMux *http.ServeMux, ip string,
vs.checkWithMaster()
vs.store = storage.NewStore(vs.grpcDialOption, ip, port, grpcPort, publicUrl, folders, maxCounts, minFreeSpaces, idxFolder, vs.needleMapKind, diskTypes, ldbTimeout)
vs.store = storage.NewStore(vs.grpcDialOption, ip, port, grpcPort, publicUrl, id, folders, maxCounts, minFreeSpaces, idxFolder, vs.needleMapKind, diskTypes, ldbTimeout)
vs.guard = security.NewGuard(whiteList, signingKey, expiresAfterSec, readSigningKey, readExpiresAfterSec)
handleStaticResources(adminMux)

31
weed/server/volume_server_handlers_admin.go

@ -4,28 +4,33 @@ import (
"net/http"
"path/filepath"
"github.com/seaweedfs/seaweedfs/weed/topology"
"github.com/seaweedfs/seaweedfs/weed/util/version"
"github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
"github.com/seaweedfs/seaweedfs/weed/stats"
)
// healthzHandler checks the local health of the volume server.
// It only checks local conditions to avoid cascading failures when remote
// volume servers go down. Previously, this handler checked if all replicated
// volumes could reach their remote replicas, which caused healthy volume
// servers to fail health checks when a peer went down.
// See https://github.com/seaweedfs/seaweedfs/issues/6823
func (vs *VolumeServer) healthzHandler(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Server", "SeaweedFS Volume "+version.VERSION)
volumeInfos := vs.store.VolumeInfos()
for _, vinfo := range volumeInfos {
if len(vinfo.Collection) == 0 {
continue
}
if vinfo.ReplicaPlacement.GetCopyCount() > 1 {
_, err := topology.GetWritableRemoteReplications(vs.store, vs.grpcDialOption, vinfo.Id, vs.GetMaster)
if err != nil {
w.WriteHeader(http.StatusServiceUnavailable)
return
}
}
// Check if the server is shutting down
if vs.store.IsStopping() {
w.WriteHeader(http.StatusServiceUnavailable)
return
}
// Check if we can communicate with master
if !vs.isHeartbeating {
w.WriteHeader(http.StatusServiceUnavailable)
return
}
w.WriteHeader(http.StatusOK)
}

2
weed/server/volume_server_handlers_read.go

@ -34,8 +34,6 @@ import (
const reqIsProxied = "proxied"
var fileNameEscaper = strings.NewReplacer(`\`, `\\`, `"`, `\"`)
func NotFound(w http.ResponseWriter) {
stats.VolumeServerHandlerCounter.WithLabelValues(stats.ErrorGetNotFound).Inc()
w.WriteHeader(http.StatusNotFound)

5
weed/sftpd/sftp_file_writer.go

@ -72,6 +72,7 @@ func (l listerat) ListAt(ls []os.FileInfo, offset int64) (int, error) {
type SeaweedSftpFileWriter struct {
fs SftpServer
req *sftp.Request
absPath string // Absolute path after HomeDir translation
mu sync.Mutex
tmpFile *os.File
permissions os.FileMode
@ -105,6 +106,6 @@ func (w *SeaweedSftpFileWriter) Close() error {
return err
}
// Stream the file instead of loading it
return w.fs.putFile(w.req.Filepath, w.tmpFile, w.fs.user)
// Stream the file to the absolute path (after HomeDir translation)
return w.fs.putFile(w.absPath, w.tmpFile, w.fs.user)
}

82
weed/sftpd/sftp_filer.go

@ -100,18 +100,26 @@ func (fs *SftpServer) withTimeoutContext(fn func(ctx context.Context) error) err
// ==================== Command Dispatcher ====================
func (fs *SftpServer) dispatchCmd(r *sftp.Request) error {
glog.V(0).Infof("Dispatch: %s %s", r.Method, r.Filepath)
absPath, err := fs.toAbsolutePath(r.Filepath)
if err != nil {
return err
}
glog.V(1).Infof("Dispatch: %s %s (absolute: %s)", r.Method, r.Filepath, absPath)
switch r.Method {
case "Remove":
return fs.removeEntry(r)
return fs.removeEntry(absPath)
case "Rename":
return fs.renameEntry(r)
absTarget, err := fs.toAbsolutePath(r.Target)
if err != nil {
return err
}
return fs.renameEntry(absPath, absTarget)
case "Mkdir":
return fs.makeDir(r)
return fs.makeDir(absPath)
case "Rmdir":
return fs.removeDir(r)
return fs.removeDir(absPath)
case "Setstat":
return fs.setFileStat(r)
return fs.setFileStatWithRequest(absPath, r)
default:
return fmt.Errorf("unsupported: %s", r.Method)
}
@ -120,10 +128,14 @@ func (fs *SftpServer) dispatchCmd(r *sftp.Request) error {
// ==================== File Operations ====================
func (fs *SftpServer) readFile(r *sftp.Request) (io.ReaderAt, error) {
if err := fs.checkFilePermission(r.Filepath, "read"); err != nil {
absPath, err := fs.toAbsolutePath(r.Filepath)
if err != nil {
return nil, err
}
if err := fs.checkFilePermission(absPath, "read"); err != nil {
return nil, err
}
entry, err := fs.getEntry(r.Filepath)
entry, err := fs.getEntry(absPath)
if err != nil {
return nil, err
}
@ -131,7 +143,11 @@ func (fs *SftpServer) readFile(r *sftp.Request) (io.ReaderAt, error) {
}
func (fs *SftpServer) newFileWriter(r *sftp.Request) (io.WriterAt, error) {
dir, _ := util.FullPath(r.Filepath).DirAndName()
absPath, err := fs.toAbsolutePath(r.Filepath)
if err != nil {
return nil, err
}
dir, _ := util.FullPath(absPath).DirAndName()
if err := fs.checkFilePermission(dir, "write"); err != nil {
glog.Errorf("Permission denied for %s", dir)
return nil, err
@ -145,6 +161,7 @@ func (fs *SftpServer) newFileWriter(r *sftp.Request) (io.WriterAt, error) {
return &SeaweedSftpFileWriter{
fs: *fs,
req: r,
absPath: absPath,
tmpFile: tmpFile,
permissions: 0644,
uid: fs.user.Uid,
@ -153,16 +170,20 @@ func (fs *SftpServer) newFileWriter(r *sftp.Request) (io.WriterAt, error) {
}, nil
}
func (fs *SftpServer) removeEntry(r *sftp.Request) error {
return fs.deleteEntry(r.Filepath, false)
func (fs *SftpServer) removeEntry(absPath string) error {
return fs.deleteEntry(absPath, false)
}
func (fs *SftpServer) renameEntry(r *sftp.Request) error {
if err := fs.checkFilePermission(r.Filepath, "rename"); err != nil {
func (fs *SftpServer) renameEntry(absPath, absTarget string) error {
if err := fs.checkFilePermission(absPath, "rename"); err != nil {
return err
}
targetDir, _ := util.FullPath(absTarget).DirAndName()
if err := fs.checkFilePermission(targetDir, "write"); err != nil {
return err
}
oldDir, oldName := util.FullPath(r.Filepath).DirAndName()
newDir, newName := util.FullPath(r.Target).DirAndName()
oldDir, oldName := util.FullPath(absPath).DirAndName()
newDir, newName := util.FullPath(absTarget).DirAndName()
return fs.callWithClient(false, func(ctx context.Context, client filer_pb.SeaweedFilerClient) error {
_, err := client.AtomicRenameEntry(ctx, &filer_pb.AtomicRenameEntryRequest{
OldDirectory: oldDir, OldName: oldName,
@ -172,15 +193,15 @@ func (fs *SftpServer) renameEntry(r *sftp.Request) error {
})
}
func (fs *SftpServer) setFileStat(r *sftp.Request) error {
if err := fs.checkFilePermission(r.Filepath, "write"); err != nil {
func (fs *SftpServer) setFileStatWithRequest(absPath string, r *sftp.Request) error {
if err := fs.checkFilePermission(absPath, "write"); err != nil {
return err
}
entry, err := fs.getEntry(r.Filepath)
entry, err := fs.getEntry(absPath)
if err != nil {
return err
}
dir, _ := util.FullPath(r.Filepath).DirAndName()
dir, _ := util.FullPath(absPath).DirAndName()
// apply attrs
if r.AttrFlags().Permissions {
entry.Attributes.FileMode = uint32(r.Attributes().FileMode())
@ -201,18 +222,22 @@ func (fs *SftpServer) setFileStat(r *sftp.Request) error {
// ==================== Directory Operations ====================
func (fs *SftpServer) listDir(r *sftp.Request) (sftp.ListerAt, error) {
if err := fs.checkFilePermission(r.Filepath, "list"); err != nil {
absPath, err := fs.toAbsolutePath(r.Filepath)
if err != nil {
return nil, err
}
if err := fs.checkFilePermission(absPath, "list"); err != nil {
return nil, err
}
if r.Method == "Stat" || r.Method == "Lstat" {
entry, err := fs.getEntry(r.Filepath)
entry, err := fs.getEntry(absPath)
if err != nil {
return nil, err
}
fi := &EnhancedFileInfo{FileInfo: FileInfoFromEntry(entry), uid: entry.Attributes.Uid, gid: entry.Attributes.Gid}
return listerat([]os.FileInfo{fi}), nil
}
return fs.listAllPages(r.Filepath)
return fs.listAllPages(absPath)
}
func (fs *SftpServer) listAllPages(dirPath string) (sftp.ListerAt, error) {
@ -259,18 +284,19 @@ func (fs *SftpServer) fetchDirectoryPage(dirPath, start string) ([]os.FileInfo,
}
// makeDir creates a new directory with proper permissions.
func (fs *SftpServer) makeDir(r *sftp.Request) error {
func (fs *SftpServer) makeDir(absPath string) error {
if fs.user == nil {
return fmt.Errorf("cannot create directory: no user info")
}
dir, name := util.FullPath(r.Filepath).DirAndName()
if err := fs.checkFilePermission(r.Filepath, "mkdir"); err != nil {
dir, name := util.FullPath(absPath).DirAndName()
if err := fs.checkFilePermission(dir, "write"); err != nil {
return err
}
// default mode and ownership
err := filer_pb.Mkdir(context.Background(), fs, string(dir), name, func(entry *filer_pb.Entry) {
mode := uint32(0755 | os.ModeDir)
if strings.HasPrefix(r.Filepath, fs.user.HomeDir) {
// Defensive check: all paths should be under HomeDir after toAbsolutePath translation
if absPath == fs.user.HomeDir || strings.HasPrefix(absPath, fs.user.HomeDir+"/") {
mode = uint32(0700 | os.ModeDir)
}
entry.Attributes.FileMode = mode
@ -288,8 +314,8 @@ func (fs *SftpServer) makeDir(r *sftp.Request) error {
}
// removeDir deletes a directory.
func (fs *SftpServer) removeDir(r *sftp.Request) error {
return fs.deleteEntry(r.Filepath, false)
func (fs *SftpServer) removeDir(absPath string) error {
return fs.deleteEntry(absPath, false)
}
func (fs *SftpServer) putFile(filepath string, reader io.Reader, user *user.User) error {

24
weed/sftpd/sftp_server.go

@ -6,6 +6,8 @@ import (
"fmt"
"io"
"os"
"path"
"strings"
"time"
"github.com/pkg/sftp"
@ -37,6 +39,28 @@ func NewSftpServer(filerAddr pb.ServerAddress, grpcDialOption grpc.DialOption, d
}
}
// toAbsolutePath translates a user-relative path to an absolute filer path.
// When a user has HomeDir="/sftp/user", their view of "/" maps to "/sftp/user".
// This implements chroot-like behavior where the user's home directory
// becomes their root.
func (fs *SftpServer) toAbsolutePath(userPath string) (string, error) {
// If user has root as home directory, no translation needed
if fs.user.HomeDir == "" || fs.user.HomeDir == "/" {
return path.Clean(userPath), nil
}
// Concatenate home directory with user path, then clean to resolve any ".." components
p := path.Join(fs.user.HomeDir, strings.TrimPrefix(userPath, "/"))
// Security check: ensure the final path is within the home directory.
// This prevents path traversal attacks like `../..` that could escape the chroot jail.
if !strings.HasPrefix(p, fs.user.HomeDir+"/") && p != fs.user.HomeDir {
return "", fmt.Errorf("path traversal attempt: %s resolves to %s which is outside home dir %s", userPath, p, fs.user.HomeDir)
}
return p, nil
}
// Fileread is invoked for “get” requests.
func (fs *SftpServer) Fileread(req *sftp.Request) (io.ReaderAt, error) {
return fs.readFile(req)

103
weed/sftpd/sftp_server_test.go

@ -0,0 +1,103 @@
package sftpd
import (
"testing"
"github.com/seaweedfs/seaweedfs/weed/sftpd/user"
"github.com/stretchr/testify/assert"
)
func stringPtr(s string) *string {
return &s
}
func TestToAbsolutePath(t *testing.T) {
tests := []struct {
name string
homeDir *string // Use pointer to distinguish between unset and empty
userPath string
expected string
expectError bool
}{
{
name: "normal path",
userPath: "/foo.txt",
expected: "/sftp/testuser/foo.txt",
},
{
name: "root path",
userPath: "/",
expected: "/sftp/testuser",
},
{
name: "path with dot",
userPath: "/./foo.txt",
expected: "/sftp/testuser/foo.txt",
},
{
name: "path traversal attempts",
userPath: "/../foo.txt",
expectError: true,
},
{
name: "path traversal attempts 2",
userPath: "../../foo.txt",
expectError: true,
},
{
name: "path traversal attempts 3",
userPath: "/subdir/../../foo.txt",
expectError: true,
},
{
name: "empty path",
userPath: "",
expected: "/sftp/testuser",
},
{
name: "multiple slashes",
userPath: "//foo.txt",
expected: "/sftp/testuser/foo.txt",
},
{
name: "trailing slash",
userPath: "/foo/",
expected: "/sftp/testuser/foo",
},
{
name: "empty HomeDir passthrough",
homeDir: stringPtr(""),
userPath: "/foo.txt",
expected: "/foo.txt",
},
{
name: "root HomeDir passthrough",
homeDir: stringPtr("/"),
userPath: "/foo.txt",
expected: "/foo.txt",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
homeDir := "/sftp/testuser" // default
if tt.homeDir != nil {
homeDir = *tt.homeDir
}
fs := &SftpServer{
user: &user.User{
HomeDir: homeDir,
},
}
got, err := fs.toAbsolutePath(tt.userPath)
if tt.expectError {
assert.Error(t, err)
} else {
assert.NoError(t, err)
assert.Equal(t, tt.expected, got)
}
})
}
}

4
weed/sftpd/sftp_service.go

@ -284,8 +284,8 @@ func (s *SFTPService) handleChannel(newChannel ssh.NewChannel, fs *SftpServer) {
// handleSFTP starts the SFTP server on the SSH channel.
func (s *SFTPService) handleSFTP(channel ssh.Channel, fs *SftpServer) {
// Create server options with initial working directory set to user's home
serverOptions := sftp.WithStartDirectory(fs.user.HomeDir)
// Start at virtual root "/" - toAbsolutePath translates this to the user's HomeDir
serverOptions := sftp.WithStartDirectory("/")
server := sftp.NewRequestServer(channel, sftp.Handlers{
FileGet: fs,
FilePut: fs,

5
weed/sftpd/user/filestore.go

@ -5,6 +5,7 @@ import (
"encoding/json"
"fmt"
"os"
"path"
"sync"
"golang.org/x/crypto/ssh"
@ -99,6 +100,10 @@ func (s *FileStore) loadUsers() error {
user.PublicKeys[i] = string(pubKey.Marshal())
}
}
// Clean HomeDir to handle trailing slashes and normalize path
if user.HomeDir != "" {
user.HomeDir = path.Clean(user.HomeDir)
}
s.users[user.Username] = user
}

55
weed/shell/command_volume_check_disk.go

@ -64,9 +64,9 @@ func (c *commandVolumeCheckDisk) Help() string {
append entries in B and not in A to A
optionally, for each non-writable volume replica A
if volume is not full
select a writable volume replica B
if entries in A don't match B
prune late volume entries not matching its index file
select a writable volume replica B
append missing entries from B into A
mark the volume as writable (healthy)
@ -179,9 +179,16 @@ func (vcd *volumeCheckDisk) checkWritableVolumes(volumeReplicas map[uint32][]*Vo
writableReplicas = append(writableReplicas[:1], writableReplicas[2:]...)
continue
}
if err := vcd.syncTwoReplicas(a, b, true); err != nil {
vcd.write("sync volume %d on %s and %s: %v", a.info.Id, a.location.dataNode.Id, b.location.dataNode.Id, err)
modified, err := vcd.syncTwoReplicas(a, b, true)
if err != nil {
vcd.write("failed to sync volumes %d on %s and %s: %v", a.info.Id, a.location.dataNode.Id, b.location.dataNode.Id, err)
} else {
if modified {
vcd.write("synced %s and %s for volume %d", a.location.dataNode.Id, b.location.dataNode.Id, a.info.Id)
}
}
// always choose the larger volume to be the source
if a.info.FileCount > b.info.FileCount {
writableReplicas = append(writableReplicas[:1], writableReplicas[2:]...)
@ -280,19 +287,25 @@ func (vcd *volumeCheckDisk) checkReadOnlyVolumes(volumeReplicas map[uint32][]*Vo
return err
}
// ...fix it...
// TODO: test whether syncTwoReplicas() is enough to prune garbage entries on broken volumes.
if err := vcd.syncTwoReplicas(source, r, false); err != nil {
vcd.write("sync read-only volume %d on %s from %s: %v\n", vid, r.location.dataNode.Id, source.location.dataNode.Id, err)
// ...try to fix it...
// TODO: test whether syncTwoReplicas() is enough to prune garbage entries on broken volumes...
modified, err := vcd.syncTwoReplicas(source, r, false)
if err != nil {
vcd.write("sync read-only volume %d on %s from %s: %v", vid, r.location.dataNode.Id, source.location.dataNode.Id, err)
// ...or revert it back to read-only, if something went wrong.
// TODO: we should keep unchanged volumes as read-only, so we don't modify valid volumes which are full.
if roErr := vcd.makeVolumeReadonly(vid, r); roErr != nil {
return fmt.Errorf("failed to make volume %d on %s readonly after: %v: %v", vid, r.location.dataNode.Id, err, roErr)
return fmt.Errorf("failed to revert volume %d on %s to readonly after: %v: %v", vid, r.location.dataNode.Id, err, roErr)
}
vcd.write("volume %d on %s is now read-only\n", vid, r.location.dataNode.Id)
return err
} else {
if modified {
vcd.write("volume %d on %s is now synced to %d and writable", vid, r.location.dataNode.Id, source.location.dataNode.Id)
} else {
// ...or restore back to read-only, if no changes were made.
if err := vcd.makeVolumeReadonly(vid, r); err != nil {
return fmt.Errorf("failed to revert volume %d on %s to readonly: %v", vid, r.location.dataNode.Id, err)
}
}
}
return nil
@ -411,35 +424,39 @@ func (vcd *volumeCheckDisk) shouldSkipVolume(a, b *VolumeReplica) (bool, error)
// syncTwoReplicas attempts to sync all entries from a source volume replica into a target. If bi-directional mode
// is enabled, changes from target are also synced back into the source.
func (vcd *volumeCheckDisk) syncTwoReplicas(source, target *VolumeReplica, bidi bool) (err error) {
// Returns true if source and/or target were modified, false otherwise.
func (vcd *volumeCheckDisk) syncTwoReplicas(source, target *VolumeReplica, bidi bool) (modified bool, err error) {
sourceHasChanges, targetHasChanges := true, true
const maxIterations = 5
iteration := 0
modified = false
for (sourceHasChanges || targetHasChanges) && iteration < maxIterations {
iteration++
vcd.writeVerbose("sync iteration %d/%d for volume %d", iteration, maxIterations, source.info.Id)
prevSourceHasChanges, prevTargetHasChanges := sourceHasChanges, targetHasChanges
if sourceHasChanges, targetHasChanges, err = vcd.checkBoth(source, target, bidi); err != nil {
return err
return modified, err
}
modified = modified || sourceHasChanges || targetHasChanges
// Detect if we're stuck in a loop with no progress
if iteration > 1 && prevSourceHasChanges == sourceHasChanges && prevTargetHasChanges == targetHasChanges && (sourceHasChanges || targetHasChanges) {
vcd.write("volume %d sync is not making progress between %s and %s after iteration %d, stopping to prevent infinite loop",
source.info.Id, source.location.dataNode.Id, target.location.dataNode.Id, iteration)
return fmt.Errorf("sync not making progress after %d iterations", iteration)
return modified, fmt.Errorf("sync not making progress after %d iterations", iteration)
}
}
if iteration >= maxIterations && (sourceHasChanges || targetHasChanges) {
vcd.write("volume %d sync reached maximum iterations (%d) between %s and %s, may need manual intervention",
source.info.Id, maxIterations, source.location.dataNode.Id, target.location.dataNode.Id)
return fmt.Errorf("reached maximum sync iterations (%d)", maxIterations)
return modified, fmt.Errorf("reached maximum sync iterations (%d)", maxIterations)
}
return nil
return modified, nil
}
// checkBoth performs a sync between source and target volume replicas. If bi-directional mode is enabled, changes from target are also synced back into the source.
@ -628,7 +645,7 @@ func (vcd *volumeCheckDisk) copyVolumeIndexFile(collection string, volumeId uint
copyFileClient, err := volumeServerClient.CopyFile(context.Background(), &volume_server_pb.CopyFileRequest{
VolumeId: volumeId,
Ext: ".idx",
Ext: ext,
CompactionRevision: math.MaxUint32,
StopOffset: math.MaxInt64,
Collection: collection,

45
weed/shell/command_volume_server_evacuate.go

@ -4,7 +4,6 @@ import (
"flag"
"fmt"
"io"
"os"
"slices"
@ -159,6 +158,9 @@ func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandE
func (c *commandVolumeServerEvacuate) evacuateEcVolumes(commandEnv *CommandEnv, volumeServer string, skipNonMoveable, applyChange bool, writer io.Writer) error {
// Evacuate EC volumes for all disk types
// We need to handle each disk type separately because shards should be moved to nodes with the same disk type
// We collect topology once at the start and track capacity changes ourselves
// (via freeEcSlot decrement after each move) rather than repeatedly refreshing,
// which would give a false sense of correctness since topology could be stale.
diskTypes := []types.DiskType{types.HardDriveType, types.SsdType}
for _, diskType := range diskTypes {
@ -176,9 +178,9 @@ func (c *commandVolumeServerEvacuate) evacuateEcVolumes(commandEnv *CommandEnv,
continue
}
for _, ecShardInfo := range diskInfo.EcShardInfos {
hasMoved, err := c.moveAwayOneEcVolume(commandEnv, ecShardInfo, thisNode, otherNodes, applyChange, diskType)
hasMoved, err := c.moveAwayOneEcVolume(commandEnv, ecShardInfo, thisNode, otherNodes, applyChange, diskType, writer)
if err != nil {
fmt.Fprintf(writer, "move away volume %d from %s: %v", ecShardInfo.Id, volumeServer, err)
fmt.Fprintf(writer, "move away volume %d from %s: %v\n", ecShardInfo.Id, volumeServer, err)
}
if !hasMoved {
if skipNonMoveable {
@ -193,14 +195,31 @@ func (c *commandVolumeServerEvacuate) evacuateEcVolumes(commandEnv *CommandEnv,
return nil
}
func (c *commandVolumeServerEvacuate) moveAwayOneEcVolume(commandEnv *CommandEnv, ecShardInfo *master_pb.VolumeEcShardInformationMessage, thisNode *EcNode, otherNodes []*EcNode, applyChange bool, diskType types.DiskType) (hasMoved bool, err error) {
func (c *commandVolumeServerEvacuate) moveAwayOneEcVolume(commandEnv *CommandEnv, ecShardInfo *master_pb.VolumeEcShardInformationMessage, thisNode *EcNode, otherNodes []*EcNode, applyChange bool, diskType types.DiskType, writer io.Writer) (hasMoved bool, err error) {
for _, shardId := range erasure_coding.ShardBits(ecShardInfo.EcIndexBits).ShardIds() {
// Sort by: 1) fewest shards of this volume, 2) most free EC slots
// This ensures we prefer nodes with capacity and balanced shard distribution
slices.SortFunc(otherNodes, func(a, b *EcNode) int {
return a.localShardIdCount(ecShardInfo.Id) - b.localShardIdCount(ecShardInfo.Id)
aShards := a.localShardIdCount(ecShardInfo.Id)
bShards := b.localShardIdCount(ecShardInfo.Id)
if aShards != bShards {
return aShards - bShards // Prefer fewer shards
}
return b.freeEcSlot - a.freeEcSlot // Then prefer more free slots
})
shardMoved := false
skippedNodes := 0
for i := 0; i < len(otherNodes); i++ {
emptyNode := otherNodes[i]
// Skip nodes with no free EC slots
if emptyNode.freeEcSlot <= 0 {
skippedNodes++
continue
}
collectionPrefix := ""
if ecShardInfo.Collection != "" {
collectionPrefix = ecShardInfo.Collection + "_"
@ -209,19 +228,29 @@ func (c *commandVolumeServerEvacuate) moveAwayOneEcVolume(commandEnv *CommandEnv
// For evacuation, prefer same disk type but allow fallback to other types
destDiskId := pickBestDiskOnNode(emptyNode, vid, diskType, false)
if destDiskId > 0 {
fmt.Fprintf(os.Stdout, "moving ec volume %s%d.%d %s => %s (disk %d)\n", collectionPrefix, ecShardInfo.Id, shardId, thisNode.info.Id, emptyNode.info.Id, destDiskId)
fmt.Fprintf(writer, "moving ec volume %s%d.%d %s => %s (disk %d)\n", collectionPrefix, ecShardInfo.Id, shardId, thisNode.info.Id, emptyNode.info.Id, destDiskId)
} else {
fmt.Fprintf(os.Stdout, "moving ec volume %s%d.%d %s => %s\n", collectionPrefix, ecShardInfo.Id, shardId, thisNode.info.Id, emptyNode.info.Id)
fmt.Fprintf(writer, "moving ec volume %s%d.%d %s => %s\n", collectionPrefix, ecShardInfo.Id, shardId, thisNode.info.Id, emptyNode.info.Id)
}
err = moveMountedShardToEcNode(commandEnv, thisNode, ecShardInfo.Collection, vid, shardId, emptyNode, destDiskId, applyChange, diskType)
if err != nil {
hasMoved = false
return
} else {
hasMoved = true
shardMoved = true
// Update the node's free slot count after successful move
emptyNode.freeEcSlot--
break
}
}
if !hasMoved {
if !shardMoved {
if skippedNodes > 0 {
fmt.Fprintf(writer, "no available destination for ec shard %d.%d: %d nodes have no free slots\n",
ecShardInfo.Id, shardId, skippedNodes)
}
// Ensure partial moves are reported as failures to prevent data loss
hasMoved = false
return
}
}

6
weed/storage/needle/needle_parse_upload.go

@ -128,7 +128,7 @@ func parseUpload(r *http.Request, sizeLimit int64, pu *ParsedUpload) (e error) {
pu.FileName = part.FileName()
if pu.FileName != "" {
pu.FileName = path.Base(pu.FileName)
pu.FileName = util.CleanWindowsPathBase(pu.FileName)
}
dataSize, e = pu.bytesBuffer.ReadFrom(io.LimitReader(part, sizeLimit+1))
@ -169,7 +169,7 @@ func parseUpload(r *http.Request, sizeLimit int64, pu *ParsedUpload) (e error) {
// update
pu.Data = pu.bytesBuffer.Bytes()
pu.FileName = path.Base(fName)
pu.FileName = util.CleanWindowsPathBase(fName)
contentType = part.Header.Get("Content-Type")
part = part2
break
@ -207,7 +207,7 @@ func parseUpload(r *http.Request, sizeLimit int64, pu *ParsedUpload) (e error) {
}
if pu.FileName != "" {
pu.FileName = path.Base(pu.FileName)
pu.FileName = util.CleanWindowsPathBase(pu.FileName)
} else {
pu.FileName = path.Base(r.URL.Path)
}

12
weed/storage/store.go

@ -63,6 +63,7 @@ type Store struct {
Port int
GrpcPort int
PublicUrl string
Id string // volume server id, independent of ip:port for stable identification
Locations []*DiskLocation
dataCenter string // optional information, overwriting master setting if exists
rack string // optional information, overwriting master setting if exists
@ -76,13 +77,13 @@ type Store struct {
}
func (s *Store) String() (str string) {
str = fmt.Sprintf("Ip:%s, Port:%d, GrpcPort:%d PublicUrl:%s, dataCenter:%s, rack:%s, connected:%v, volumeSizeLimit:%d", s.Ip, s.Port, s.GrpcPort, s.PublicUrl, s.dataCenter, s.rack, s.connected, s.GetVolumeSizeLimit())
str = fmt.Sprintf("Id:%s, Ip:%s, Port:%d, GrpcPort:%d PublicUrl:%s, dataCenter:%s, rack:%s, connected:%v, volumeSizeLimit:%d", s.Id, s.Ip, s.Port, s.GrpcPort, s.PublicUrl, s.dataCenter, s.rack, s.connected, s.GetVolumeSizeLimit())
return
}
func NewStore(grpcDialOption grpc.DialOption, ip string, port int, grpcPort int, publicUrl string, dirnames []string, maxVolumeCounts []int32,
func NewStore(grpcDialOption grpc.DialOption, ip string, port int, grpcPort int, publicUrl string, id string, dirnames []string, maxVolumeCounts []int32,
minFreeSpaces []util.MinFreeSpace, idxFolder string, needleMapKind NeedleMapKind, diskTypes []DiskType, ldbTimeout int64) (s *Store) {
s = &Store{grpcDialOption: grpcDialOption, Port: port, Ip: ip, GrpcPort: grpcPort, PublicUrl: publicUrl, NeedleMapKind: needleMapKind}
s = &Store{grpcDialOption: grpcDialOption, Port: port, Ip: ip, GrpcPort: grpcPort, PublicUrl: publicUrl, Id: id, NeedleMapKind: needleMapKind}
s.Locations = make([]*DiskLocation, 0)
var wg sync.WaitGroup
@ -414,6 +415,7 @@ func (s *Store) CollectHeartbeat() *master_pb.Heartbeat {
Port: uint32(s.Port),
GrpcPort: uint32(s.GrpcPort),
PublicUrl: s.PublicUrl,
Id: s.Id,
MaxVolumeCounts: maxVolumeCounts,
MaxFileKey: NeedleIdToUint64(maxFileKey),
DataCenter: s.dataCenter,
@ -467,6 +469,10 @@ func (s *Store) SetStopping() {
}
}
func (s *Store) IsStopping() bool {
return s.isStopping
}
func (s *Store) LoadNewVolumes() {
for _, location := range s.Locations {
location.loadExistingVolumes(s.NeedleMapKind, 0)

17
weed/storage/store_ec_delete.go

@ -3,6 +3,7 @@ package storage
import (
"context"
"fmt"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/glog"
@ -21,7 +22,8 @@ func (s *Store) DeleteEcShardNeedle(ecVolume *erasure_coding.EcVolume, n *needle
return 0, err
}
if cookie != n.Cookie {
// cookie == 0 indicates SkipCookieCheck was requested (e.g., orphan cleanup)
if cookie != 0 && cookie != n.Cookie {
return 0, fmt.Errorf("unexpected cookie %x", cookie)
}
@ -45,22 +47,17 @@ func (s *Store) doDeleteNeedleFromAtLeastOneRemoteEcShards(ecVolume *erasure_cod
shardId, _ := intervals[0].ToShardIdAndOffset(erasure_coding.ErasureCodingLargeBlockSize, erasure_coding.ErasureCodingSmallBlockSize)
hasDeletionSuccess := false
err = s.doDeleteNeedleFromRemoteEcShardServers(shardId, ecVolume, needleId)
if err == nil {
hasDeletionSuccess = true
return nil
}
for shardId = erasure_coding.DataShardsCount; shardId < erasure_coding.TotalShardsCount; shardId++ {
if parityDeletionError := s.doDeleteNeedleFromRemoteEcShardServers(shardId, ecVolume, needleId); parityDeletionError == nil {
hasDeletionSuccess = true
return nil
}
}
if hasDeletionSuccess {
return nil
}
return err
}
@ -77,11 +74,9 @@ func (s *Store) doDeleteNeedleFromRemoteEcShardServers(shardId erasure_coding.Sh
for _, sourceDataNode := range sourceDataNodes {
glog.V(4).Infof("delete from remote ec shard %d.%d from %s", ecVolume.VolumeId, shardId, sourceDataNode)
err := s.doDeleteNeedleFromRemoteEcShard(sourceDataNode, ecVolume.VolumeId, ecVolume.Collection, ecVolume.Version, needleId)
if err != nil {
if err := s.doDeleteNeedleFromRemoteEcShard(sourceDataNode, ecVolume.VolumeId, ecVolume.Collection, ecVolume.Version, needleId); err != nil {
return err
}
glog.V(1).Infof("delete from remote ec shard %d.%d from %s: %v", ecVolume.VolumeId, shardId, sourceDataNode, err)
}
return nil

2
weed/storage/store_load_balancing_test.go

@ -31,7 +31,7 @@ func newTestStore(t *testing.T, numDirs int) *Store {
diskTypes = append(diskTypes, types.HardDriveType)
}
store := NewStore(nil, "localhost", 8080, 18080, "http://localhost:8080",
store := NewStore(nil, "localhost", 8080, 18080, "http://localhost:8080", "",
dirs, maxCounts, minFreeSpaces, "", NeedleMapInMemory, diskTypes, 3)
// Consume channel messages to prevent blocking

1
weed/topology/data_node.go

@ -269,6 +269,7 @@ func (dn *DataNode) ToDataNodeInfo() *master_pb.DataNodeInfo {
Id: string(dn.Id()),
DiskInfos: make(map[string]*master_pb.DiskInfo),
GrpcPort: uint32(dn.GrpcPort),
Address: dn.Url(), // ip:port for connecting to the volume server
}
for _, c := range dn.Children() {
disk := c.(*Disk)

69
weed/topology/rack.go

@ -5,6 +5,7 @@ import (
"strings"
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
"github.com/seaweedfs/seaweedfs/weed/storage/types"
"github.com/seaweedfs/seaweedfs/weed/util"
@ -34,17 +35,73 @@ func (r *Rack) FindDataNode(ip string, port int) *DataNode {
}
return nil
}
func (r *Rack) GetOrCreateDataNode(ip string, port int, grpcPort int, publicUrl string, maxVolumeCounts map[string]uint32) *DataNode {
// FindDataNodeById finds a DataNode by its ID using O(1) map lookup
func (r *Rack) FindDataNodeById(id string) *DataNode {
r.RLock()
defer r.RUnlock()
if c, ok := r.children[NodeId(id)]; ok {
return c.(*DataNode)
}
return nil
}
func (r *Rack) GetOrCreateDataNode(ip string, port int, grpcPort int, publicUrl string, id string, maxVolumeCounts map[string]uint32) *DataNode {
r.Lock()
defer r.Unlock()
for _, c := range r.children {
// Normalize the id parameter (trim whitespace)
id = strings.TrimSpace(id)
// Determine the node ID: use provided id, or fall back to ip:port for backward compatibility
nodeId := util.GetVolumeServerId(id, ip, port)
// First, try to find by node ID using O(1) map lookup (stable identity)
if c, ok := r.children[NodeId(nodeId)]; ok {
dn := c.(*DataNode)
if dn.MatchLocation(ip, port) {
dn.LastSeen = time.Now().Unix()
return dn
// Log if IP or Port changed (e.g., pod rescheduled in K8s)
if dn.Ip != ip || dn.Port != port {
glog.V(0).Infof("DataNode %s address changed from %s:%d to %s:%d", nodeId, dn.Ip, dn.Port, ip, port)
}
// Update the IP/Port in case they changed
dn.Ip = ip
dn.Port = port
dn.GrpcPort = grpcPort
dn.PublicUrl = publicUrl
dn.LastSeen = time.Now().Unix()
return dn
}
dn := NewDataNode(util.JoinHostPort(ip, port))
// For backward compatibility: if explicit id was provided, also check by ip:port
// to handle transition from old (ip:port) to new (explicit id) behavior
ipPortId := util.JoinHostPort(ip, port)
if nodeId != ipPortId {
for oldId, c := range r.children {
dn := c.(*DataNode)
if dn.MatchLocation(ip, port) {
// Only transition if the oldId exactly matches ip:port (legacy identification).
// If oldId is different, this is a node with an explicit id that happens to
// reuse the same ip:port - don't incorrectly merge them.
if string(oldId) != ipPortId {
glog.Warningf("Volume server with id %s has ip:port %s which is used by node %s", nodeId, ipPortId, oldId)
continue
}
// Found a legacy node identified by ip:port, transition it to use the new explicit id
glog.V(0).Infof("Volume server %s transitioning id from %s to %s", dn.Url(), oldId, nodeId)
// Re-key the node in the children map with the new id
delete(r.children, oldId)
dn.id = NodeId(nodeId)
r.children[NodeId(nodeId)] = dn
// Update connection info in case they changed
dn.GrpcPort = grpcPort
dn.PublicUrl = publicUrl
dn.LastSeen = time.Now().Unix()
return dn
}
}
}
dn := NewDataNode(nodeId)
dn.Ip = ip
dn.Port = port
dn.GrpcPort = grpcPort

119
weed/topology/topology_test.go

@ -34,7 +34,7 @@ func TestHandlingVolumeServerHeartbeat(t *testing.T) {
maxVolumeCounts := make(map[string]uint32)
maxVolumeCounts[""] = 25
maxVolumeCounts["ssd"] = 12
dn := rack.GetOrCreateDataNode("127.0.0.1", 34534, 0, "127.0.0.1", maxVolumeCounts)
dn := rack.GetOrCreateDataNode("127.0.0.1", 34534, 0, "127.0.0.1", "", maxVolumeCounts)
{
volumeCount := 7
@ -180,7 +180,7 @@ func TestAddRemoveVolume(t *testing.T) {
maxVolumeCounts := make(map[string]uint32)
maxVolumeCounts[""] = 25
maxVolumeCounts["ssd"] = 12
dn := rack.GetOrCreateDataNode("127.0.0.1", 34534, 0, "127.0.0.1", maxVolumeCounts)
dn := rack.GetOrCreateDataNode("127.0.0.1", 34534, 0, "127.0.0.1", "", maxVolumeCounts)
v := storage.VolumeInfo{
Id: needle.VolumeId(1),
@ -218,7 +218,7 @@ func TestVolumeReadOnlyStatusChange(t *testing.T) {
rack := dc.GetOrCreateRack("rack1")
maxVolumeCounts := make(map[string]uint32)
maxVolumeCounts[""] = 25
dn := rack.GetOrCreateDataNode("127.0.0.1", 34534, 0, "127.0.0.1", maxVolumeCounts)
dn := rack.GetOrCreateDataNode("127.0.0.1", 34534, 0, "127.0.0.1", "", maxVolumeCounts)
// Create a writable volume
v := storage.VolumeInfo{
@ -267,7 +267,7 @@ func TestVolumeReadOnlyAndRemoteStatusChange(t *testing.T) {
rack := dc.GetOrCreateRack("rack1")
maxVolumeCounts := make(map[string]uint32)
maxVolumeCounts[""] = 25
dn := rack.GetOrCreateDataNode("127.0.0.1", 34534, 0, "127.0.0.1", maxVolumeCounts)
dn := rack.GetOrCreateDataNode("127.0.0.1", 34534, 0, "127.0.0.1", "", maxVolumeCounts)
// Create a writable, local volume
v := storage.VolumeInfo{
@ -331,7 +331,7 @@ func TestListCollections(t *testing.T) {
topo := NewTopology("weedfs", sequence.NewMemorySequencer(), 32*1024, 5, false)
dc := topo.GetOrCreateDataCenter("dc1")
rack := dc.GetOrCreateRack("rack1")
dn := rack.GetOrCreateDataNode("127.0.0.1", 34534, 0, "127.0.0.1", nil)
dn := rack.GetOrCreateDataNode("127.0.0.1", 34534, 0, "127.0.0.1", "", nil)
topo.RegisterVolumeLayout(storage.VolumeInfo{
Id: needle.VolumeId(1111),
@ -396,3 +396,112 @@ func TestListCollections(t *testing.T) {
})
}
}
func TestDataNodeIdBasedIdentification(t *testing.T) {
topo := NewTopology("weedfs", sequence.NewMemorySequencer(), 32*1024, 5, false)
dc := topo.GetOrCreateDataCenter("dc1")
rack := dc.GetOrCreateRack("rack1")
maxVolumeCounts := make(map[string]uint32)
maxVolumeCounts[""] = 10
// Test 1: Create a DataNode with explicit id
dn1 := rack.GetOrCreateDataNode("10.0.0.1", 8080, 18080, "10.0.0.1:8080", "node-1", maxVolumeCounts)
if string(dn1.Id()) != "node-1" {
t.Errorf("expected node id 'node-1', got '%s'", dn1.Id())
}
if dn1.Ip != "10.0.0.1" {
t.Errorf("expected ip '10.0.0.1', got '%s'", dn1.Ip)
}
// Test 2: Same id with different IP should return the same DataNode (K8s pod reschedule scenario)
dn2 := rack.GetOrCreateDataNode("10.0.0.2", 8080, 18080, "10.0.0.2:8080", "node-1", maxVolumeCounts)
if dn1 != dn2 {
t.Errorf("expected same DataNode for same id, got different nodes")
}
// IP should be updated to the new value
if dn2.Ip != "10.0.0.2" {
t.Errorf("expected ip to be updated to '10.0.0.2', got '%s'", dn2.Ip)
}
if dn2.PublicUrl != "10.0.0.2:8080" {
t.Errorf("expected publicUrl to be updated to '10.0.0.2:8080', got '%s'", dn2.PublicUrl)
}
// Test 3: Different id should create a new DataNode
dn3 := rack.GetOrCreateDataNode("10.0.0.3", 8080, 18080, "10.0.0.3:8080", "node-2", maxVolumeCounts)
if string(dn3.Id()) != "node-2" {
t.Errorf("expected node id 'node-2', got '%s'", dn3.Id())
}
if dn1 == dn3 {
t.Errorf("expected different DataNode for different id")
}
// Test 4: Empty id should fall back to ip:port (backward compatibility)
dn4 := rack.GetOrCreateDataNode("10.0.0.4", 8080, 18080, "10.0.0.4:8080", "", maxVolumeCounts)
if string(dn4.Id()) != "10.0.0.4:8080" {
t.Errorf("expected node id '10.0.0.4:8080' for empty id, got '%s'", dn4.Id())
}
// Test 5: Same ip:port with empty id should return the same DataNode
dn5 := rack.GetOrCreateDataNode("10.0.0.4", 8080, 18080, "10.0.0.4:8080", "", maxVolumeCounts)
if dn4 != dn5 {
t.Errorf("expected same DataNode for same ip:port with empty id")
}
// Verify we have 3 unique DataNodes total:
// - node-1 (dn1/dn2 share the same id)
// - node-2 (dn3)
// - 10.0.0.4:8080 (dn4/dn5 share the same ip:port)
children := rack.Children()
if len(children) != 3 {
t.Errorf("expected 3 DataNodes, got %d", len(children))
}
// Test 6: Transition from ip:port to explicit id
// First, the node exists with ip:port as id (dn4/dn5)
// Now the same volume server starts sending an explicit id
dn6 := rack.GetOrCreateDataNode("10.0.0.4", 8080, 18080, "10.0.0.4:8080", "node-4-explicit", maxVolumeCounts)
// Should return the same DataNode instance
if dn6 != dn4 {
t.Errorf("expected same DataNode instance during transition")
}
// But the id should now be updated to the explicit id
if string(dn6.Id()) != "node-4-explicit" {
t.Errorf("expected node id to transition to 'node-4-explicit', got '%s'", dn6.Id())
}
// The node should be re-keyed in the children map
if rack.FindDataNodeById("node-4-explicit") != dn6 {
t.Errorf("expected to find DataNode by new explicit id")
}
// Old ip:port key should no longer work
if rack.FindDataNodeById("10.0.0.4:8080") != nil {
t.Errorf("expected old ip:port id to be removed from children map")
}
// Still 3 unique DataNodes (node-1, node-2, node-4-explicit)
children = rack.Children()
if len(children) != 3 {
t.Errorf("expected 3 DataNodes after transition, got %d", len(children))
}
// Test 7: Prevent incorrect transition when a new node reuses ip:port of a node with explicit id
// Scenario: node-1 runs at 10.0.0.1:8080, dies, new node-99 starts at same ip:port
// The transition should NOT happen because node-1 already has an explicit id
dn7 := rack.GetOrCreateDataNode("10.0.0.1", 8080, 18080, "10.0.0.1:8080", "node-99", maxVolumeCounts)
// Should create a NEW DataNode, not reuse node-1
if dn7 == dn1 {
t.Errorf("expected new DataNode for node-99, got reused node-1")
}
if string(dn7.Id()) != "node-99" {
t.Errorf("expected node id 'node-99', got '%s'", dn7.Id())
}
// node-1 should still exist with its original id
if rack.FindDataNodeById("node-1") == nil {
t.Errorf("node-1 should still exist")
}
// Now we have 4 DataNodes
children = rack.Children()
if len(children) != 4 {
t.Errorf("expected 4 DataNodes, got %d", len(children))
}
}

13
weed/util/fullpath.go

@ -1,6 +1,7 @@
package util
import (
"path"
"path/filepath"
"strings"
)
@ -85,3 +86,15 @@ func StringSplit(separatedValues string, sep string) []string {
}
return strings.Split(separatedValues, sep)
}
// CleanWindowsPath normalizes Windows-style backslashes to forward slashes.
// This handles paths from Windows clients where paths use backslashes.
func CleanWindowsPath(p string) string {
return strings.ReplaceAll(p, "\\", "/")
}
// CleanWindowsPathBase normalizes Windows-style backslashes to forward slashes
// and returns the base name of the path.
func CleanWindowsPathBase(p string) string {
return path.Base(strings.ReplaceAll(p, "\\", "/"))
}

108
weed/util/http/http_global_client_util.go

@ -487,6 +487,12 @@ func RetriedFetchChunkData(ctx context.Context, buffer []byte, urlStrings []stri
)
}
// For unencrypted, non-gzipped full chunks, use direct buffer read
// This avoids the 64KB intermediate buffer and callback overhead
if cipherKey == nil && !isGzipped && isFullChunk {
return retriedFetchChunkDataDirect(ctx, buffer, urlStrings, string(jwt))
}
var shouldRetry bool
for waitTime := time.Second; waitTime < util.RetryWaitTime; waitTime += waitTime / 2 {
@ -551,3 +557,105 @@ func RetriedFetchChunkData(ctx context.Context, buffer []byte, urlStrings []stri
return n, err
}
// retriedFetchChunkDataDirect reads chunk data directly into the buffer without
// intermediate buffering. This reduces memory copies and improves throughput
// for large chunk reads.
func retriedFetchChunkDataDirect(ctx context.Context, buffer []byte, urlStrings []string, jwt string) (n int, err error) {
var shouldRetry bool
for waitTime := time.Second; waitTime < util.RetryWaitTime; waitTime += waitTime / 2 {
select {
case <-ctx.Done():
return 0, ctx.Err()
default:
}
for _, urlString := range urlStrings {
select {
case <-ctx.Done():
return 0, ctx.Err()
default:
}
n, shouldRetry, err = readUrlDirectToBuffer(ctx, urlString+"?readDeleted=true", jwt, buffer)
if err == nil {
return n, nil
}
if !shouldRetry {
break
}
glog.V(0).InfofCtx(ctx, "read %s failed, err: %v", urlString, err)
}
if err != nil && shouldRetry {
glog.V(0).InfofCtx(ctx, "retry reading in %v", waitTime)
timer := time.NewTimer(waitTime)
select {
case <-ctx.Done():
timer.Stop()
return 0, ctx.Err()
case <-timer.C:
}
} else {
break
}
}
return n, err
}
// readUrlDirectToBuffer reads HTTP response directly into the provided buffer,
// avoiding intermediate buffer allocations and copies.
func readUrlDirectToBuffer(ctx context.Context, fileUrl, jwt string, buffer []byte) (n int, retryable bool, err error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, fileUrl, nil)
if err != nil {
return 0, false, err
}
maybeAddAuth(req, jwt)
request_id.InjectToRequest(ctx, req)
r, err := GetGlobalHttpClient().Do(req)
if err != nil {
return 0, true, err
}
defer CloseResponse(r)
if r.StatusCode >= 400 {
if r.StatusCode == http.StatusNotFound {
return 0, true, fmt.Errorf("%s: %s: %w", fileUrl, r.Status, ErrNotFound)
}
if r.StatusCode == http.StatusTooManyRequests {
return 0, false, fmt.Errorf("%s: %s: %w", fileUrl, r.Status, ErrTooManyRequests)
}
retryable = r.StatusCode >= 499
return 0, retryable, fmt.Errorf("%s: %s", fileUrl, r.Status)
}
// Read directly into the buffer without intermediate copying
// This is significantly faster for large chunks (16MB+)
var totalRead int
for totalRead < len(buffer) {
select {
case <-ctx.Done():
return totalRead, false, ctx.Err()
default:
}
m, readErr := r.Body.Read(buffer[totalRead:])
totalRead += m
if readErr != nil {
if readErr == io.EOF {
// Return io.ErrUnexpectedEOF if we haven't filled the buffer
// This prevents silent data corruption from truncated responses
if totalRead < len(buffer) {
return totalRead, true, io.ErrUnexpectedEOF
}
return totalRead, false, nil
}
return totalRead, true, readErr
}
}
return totalRead, false, nil
}

119
weed/util/net_timeout.go

@ -9,22 +9,11 @@ import (
"github.com/seaweedfs/seaweedfs/weed/stats"
)
const (
// minThroughputBytesPerSecond defines the minimum expected throughput (4KB/s)
// Used to calculate timeout scaling based on data transferred
minThroughputBytesPerSecond = 4000
// graceTimeCapMultiplier caps the grace period for slow clients at 3x base timeout
// This prevents indefinite connections while allowing time for server-side chunk fetches
graceTimeCapMultiplier = 3
)
// Listener wraps a net.Listener, and gives a place to store the timeout
// parameters. On Accept, it will wrap the net.Conn with our own Conn for us.
type Listener struct {
net.Listener
ReadTimeout time.Duration
WriteTimeout time.Duration
Timeout time.Duration
}
func (l *Listener) Accept() (net.Conn, error) {
@ -34,103 +23,50 @@ func (l *Listener) Accept() (net.Conn, error) {
}
stats.ConnectionOpen()
tc := &Conn{
Conn: c,
ReadTimeout: l.ReadTimeout,
WriteTimeout: l.WriteTimeout,
Conn: c,
Timeout: l.Timeout,
}
return tc, nil
}
// Conn wraps a net.Conn, and sets a deadline for every read
// and write operation.
// Conn wraps a net.Conn and implements a "no activity timeout".
// Any activity (read or write) resets the deadline, so the connection
// only times out when there's no activity in either direction.
type Conn struct {
net.Conn
ReadTimeout time.Duration
WriteTimeout time.Duration
isClosed bool
bytesRead int64
bytesWritten int64
lastWrite time.Time
Timeout time.Duration
isClosed bool
}
// calculateBytesPerTimeout calculates the expected number of bytes that should
// be transferred during one timeout period, based on the minimum throughput.
// Returns at least 1 to prevent division by zero.
func calculateBytesPerTimeout(timeout time.Duration) int64 {
bytesPerTimeout := int64(float64(minThroughputBytesPerSecond) * timeout.Seconds())
if bytesPerTimeout <= 0 {
return 1 // Prevent division by zero
// extendDeadline extends the connection deadline from now.
// This implements "no activity timeout" - any activity keeps the connection alive.
func (c *Conn) extendDeadline() error {
if c.Timeout > 0 {
return c.Conn.SetDeadline(time.Now().Add(c.Timeout))
}
return bytesPerTimeout
return nil
}
func (c *Conn) Read(b []byte) (count int, e error) {
if c.ReadTimeout != 0 {
// Calculate expected bytes per timeout period based on minimum throughput (4KB/s)
// Example: with ReadTimeout=30s, bytesPerTimeout = 4000 * 30 = 120KB
// After reading 1MB: multiplier = 1,000,000/120,000 + 1 ≈ 9, deadline = 30s * 9 = 270s
bytesPerTimeout := calculateBytesPerTimeout(c.ReadTimeout)
timeoutMultiplier := time.Duration(c.bytesRead/bytesPerTimeout + 1)
err := c.Conn.SetReadDeadline(time.Now().Add(c.ReadTimeout * timeoutMultiplier))
if err != nil {
return 0, err
}
// Extend deadline before reading - any activity keeps connection alive
if err := c.extendDeadline(); err != nil {
return 0, err
}
count, e = c.Conn.Read(b)
if e == nil {
stats.BytesIn(int64(count))
c.bytesRead += int64(count)
}
return
}
func (c *Conn) Write(b []byte) (count int, e error) {
if c.WriteTimeout != 0 {
now := time.Now()
// Calculate timeout with two components:
// 1. Base timeout scaled by cumulative data (minimum throughput of 4KB/s)
// 2. Additional grace period if there was a gap since last write (for chunk fetch delays)
// Calculate expected bytes per timeout period based on minimum throughput (4KB/s)
// Example: with WriteTimeout=30s, bytesPerTimeout = 4000 * 30 = 120KB
// After writing 1MB: multiplier = 1,000,000/120,000 + 1 ≈ 9, baseTimeout = 30s * 9 = 270s
bytesPerTimeout := calculateBytesPerTimeout(c.WriteTimeout)
timeoutMultiplier := time.Duration(c.bytesWritten/bytesPerTimeout + 1)
baseTimeout := c.WriteTimeout * timeoutMultiplier
// If it's been a while since last write, add grace time for server-side chunk fetches
// But cap it to avoid keeping slow clients connected indefinitely
//
// The comparison uses unscaled WriteTimeout intentionally: triggers grace when idle time
// exceeds base timeout, independent of throughput scaling.
if !c.lastWrite.IsZero() {
timeSinceLastWrite := now.Sub(c.lastWrite)
if timeSinceLastWrite > c.WriteTimeout {
// Add grace time capped at graceTimeCapMultiplier * scaled timeout.
// This allows total deadline up to 4x scaled timeout for server-side delays.
//
// Example: WriteTimeout=30s, 1MB written (multiplier≈9), baseTimeout=270s
// If 400s gap occurs fetching chunks: graceTime capped at 270s*3=810s
// Final deadline: 270s + 810s = 1080s (~18min) to accommodate slow storage
// But if only 50s gap: graceTime = 50s, final deadline = 270s + 50s = 320s
graceTime := timeSinceLastWrite
if graceTime > baseTimeout*graceTimeCapMultiplier {
graceTime = baseTimeout * graceTimeCapMultiplier
}
baseTimeout += graceTime
}
}
err := c.Conn.SetWriteDeadline(now.Add(baseTimeout))
if err != nil {
return 0, err
}
// Extend deadline before writing - any activity keeps connection alive
if err := c.extendDeadline(); err != nil {
return 0, err
}
count, e = c.Conn.Write(b)
if e == nil {
stats.BytesOut(int64(count))
c.bytesWritten += int64(count)
c.lastWrite = time.Now()
}
return
}
@ -153,9 +89,8 @@ func NewListener(addr string, timeout time.Duration) (ipListener net.Listener, e
}
ipListener = &Listener{
Listener: listener,
ReadTimeout: timeout,
WriteTimeout: timeout,
Listener: listener,
Timeout: timeout,
}
return
@ -168,9 +103,8 @@ func NewIpAndLocalListeners(host string, port int, timeout time.Duration) (ipLis
}
ipListener = &Listener{
Listener: listener,
ReadTimeout: timeout,
WriteTimeout: timeout,
Listener: listener,
Timeout: timeout,
}
if host != "localhost" && host != "" && host != "0.0.0.0" && host != "127.0.0.1" && host != "[::]" && host != "[::1]" {
@ -181,9 +115,8 @@ func NewIpAndLocalListeners(host string, port int, timeout time.Duration) (ipLis
}
localListener = &Listener{
Listener: listener,
ReadTimeout: timeout,
WriteTimeout: timeout,
Listener: listener,
Timeout: timeout,
}
}

11
weed/util/network.go

@ -64,3 +64,14 @@ func JoinHostPort(host string, port int) string {
}
return net.JoinHostPort(host, portStr)
}
// GetVolumeServerId returns the volume server ID.
// If id is provided (non-empty after trimming), use it as the identifier.
// Otherwise, fall back to ip:port for backward compatibility.
func GetVolumeServerId(id, ip string, port int) string {
volumeServerId := strings.TrimSpace(id)
if volumeServerId == "" {
volumeServerId = JoinHostPort(ip, port)
}
return volumeServerId
}
Loading…
Cancel
Save