Browse Source

start master, volume servers, filer

Current Status
 Master: Healthy and running (port 9333)
 Filer: Healthy and running (port 8888)
 Volume Servers: All 6 servers running (ports 8080-8085)
🔄 Admin/Workers: Will start when dependencies are ready
worker-execute-ec-tasks
chrislu 4 months ago
parent
commit
167ab29eb6
  1. 33
      docker/admin_integration/Dockerfile.admin
  2. 44
      docker/admin_integration/Dockerfile.load
  3. 48
      docker/admin_integration/Dockerfile.monitor
  4. 33
      docker/admin_integration/Dockerfile.worker
  5. 433
      docker/admin_integration/EC-TESTING-README.md
  6. 301
      docker/admin_integration/Makefile
  7. 153
      docker/admin_integration/admin-entrypoint.sh
  8. 393
      docker/admin_integration/docker-compose-ec-test.yml
  9. 21
      docker/admin_integration/load-entrypoint.sh
  10. 352
      docker/admin_integration/load-generator.go
  11. 38
      docker/admin_integration/monitor-entrypoint.sh
  12. 366
      docker/admin_integration/monitor.go
  13. 106
      docker/admin_integration/run-ec-test.sh
  14. 230
      docker/admin_integration/worker-entrypoint.sh

33
docker/admin_integration/Dockerfile.admin

@ -0,0 +1,33 @@
# Final stage
FROM alpine:latest
# Install dependencies including Go for the entrypoint script
RUN apk --no-cache add curl ca-certificates go
WORKDIR /root/
# Copy admin server binary (if it exists) or create a simple one
COPY ./docker/admin_integration/admin-entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
# Create directories
RUN mkdir -p /data /config /work
# Expose admin port
EXPOSE 9900
# Set environment variables
ENV MASTER_ADDRESS="master:9333"
ENV ADMIN_PORT="9900"
ENV SCAN_INTERVAL="30s"
ENV WORKER_TIMEOUT="5m"
ENV TASK_TIMEOUT="30m"
ENV MAX_RETRIES="3"
ENV MAX_CONCURRENT_TASKS="5"
# Health check
HEALTHCHECK --interval=15s --timeout=5s --start-period=30s --retries=3 \
CMD curl -f http://localhost:9900/health || exit 1
# Start admin server
ENTRYPOINT ["/entrypoint.sh"]

44
docker/admin_integration/Dockerfile.load

@ -0,0 +1,44 @@
FROM golang:1.24-alpine AS builder
# Install dependencies
RUN apk add --no-cache git build-base
# Set working directory
WORKDIR /app
# Copy and create load generator
COPY ./docker/admin_integration/load-generator.go .
COPY go.mod go.sum ./
RUN go mod download
RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o load-generator load-generator.go
# Final stage
FROM alpine:latest
# Install dependencies
RUN apk --no-cache add curl ca-certificates openssl
WORKDIR /root/
# Copy the binary
COPY --from=builder /app/load-generator .
# Copy load generator script
COPY ./docker/admin_integration/load-entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
# Create directories for test data
RUN mkdir -p /test-data /temp
# Set environment variables
ENV FILER_ADDRESS="filer:8888"
ENV MASTER_ADDRESS="master:9333"
ENV WRITE_RATE="10"
ENV DELETE_RATE="2"
ENV FILE_SIZE_MIN="1MB"
ENV FILE_SIZE_MAX="5MB"
ENV TEST_DURATION="3600"
ENV COLLECTION=""
# Start load generator
ENTRYPOINT ["/entrypoint.sh"]

48
docker/admin_integration/Dockerfile.monitor

@ -0,0 +1,48 @@
FROM golang:1.24-alpine AS builder
# Install dependencies
RUN apk add --no-cache git build-base
# Set working directory
WORKDIR /app
# Copy and create monitor
COPY ./docker/admin_integration/monitor.go .
COPY go.mod go.sum ./
RUN go mod download
RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o monitor monitor.go
# Final stage
FROM alpine:latest
# Install dependencies
RUN apk --no-cache add curl ca-certificates jq
WORKDIR /root/
# Copy the binary
COPY --from=builder /app/monitor .
# Copy monitor scripts
COPY ./docker/admin_integration/monitor-entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
# Create monitoring directories
RUN mkdir -p /monitor-data /logs
# Expose monitor port
EXPOSE 9999
# Set environment variables
ENV MASTER_ADDRESS="master:9333"
ENV ADMIN_ADDRESS="admin:9900"
ENV FILER_ADDRESS="filer:8888"
ENV MONITOR_INTERVAL="10s"
ENV LOG_LEVEL="info"
# Health check
HEALTHCHECK --interval=30s --timeout=5s --start-period=30s --retries=3 \
CMD curl -f http://localhost:9999/health || exit 1
# Start monitor
ENTRYPOINT ["/entrypoint.sh"]

33
docker/admin_integration/Dockerfile.worker

@ -0,0 +1,33 @@
# Final stage
FROM alpine:latest
# Install dependencies including Go for the entrypoint script
RUN apk --no-cache add curl ca-certificates go
WORKDIR /root/
# Copy worker entrypoint script
COPY ./docker/admin_integration/worker-entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
# Create working directories
RUN mkdir -p /work /tmp/ec_work
# Expose worker port
EXPOSE 9001
# Set environment variables
ENV ADMIN_ADDRESS="admin:9900"
ENV WORKER_ID="worker-1"
ENV WORKER_ADDRESS="worker:9001"
ENV CAPABILITIES="erasure_coding"
ENV MAX_CONCURRENT="2"
ENV WORK_DIR="/work"
ENV HEARTBEAT_INTERVAL="10s"
# Health check
HEALTHCHECK --interval=15s --timeout=5s --start-period=30s --retries=3 \
CMD curl -f http://localhost:9001/health || exit 1
# Start worker
ENTRYPOINT ["/entrypoint.sh"]

433
docker/admin_integration/EC-TESTING-README.md

@ -0,0 +1,433 @@
# SeaweedFS EC Worker Testing Environment
This Docker Compose setup provides a comprehensive testing environment for SeaweedFS Erasure Coding (EC) workers with real workload simulation.
## 📂 Directory Structure
The testing environment is located in `docker/admin_integration/` and includes:
```
docker/admin_integration/
├── Makefile # Main management interface
├── docker-compose-ec-test.yml # Docker compose configuration
├── EC-TESTING-README.md # This documentation
├── Dockerfile.admin # Admin server image
├── Dockerfile.worker # EC worker image
├── Dockerfile.load # Load generator image
├── Dockerfile.monitor # Monitor service image
├── admin-entrypoint.sh # Admin server startup script
├── worker-entrypoint.sh # Worker startup script
├── load-generator.go # Load generator source code
├── load-entrypoint.sh # Load generator startup script
├── monitor.go # Monitor service source code
└── monitor-entrypoint.sh # Monitor startup script
```
## 🏗️ Architecture
The testing environment includes:
- **1 Master Server** (port 9333) - Coordinates the cluster with 50MB volume size limit
- **6 Volume Servers** (ports 8080-8085) - Distributed across 2 data centers and 3 racks for diversity
- **1 Filer** (port 8888) - Provides file system interface
- **1 Admin Server** (port 9900) - Detects volumes needing EC and manages workers
- **3 EC Workers** - Execute erasure coding tasks with different capabilities
- **1 Load Generator** - Continuously writes and deletes files to trigger EC
- **1 Monitor** (port 9999) - Tracks cluster health and EC progress
## 🚀 Quick Start
### Prerequisites
- Docker and Docker Compose installed
- GNU Make installed
- At least 4GB RAM available for containers
- Ports 8080-8085, 8888, 9333, 9900, 9999 available
### Start the Environment
```bash
# Navigate to the admin integration directory
cd docker/admin_integration/
# Show available commands
make help
# Start the complete testing environment
make start
```
The `make start` command will:
1. Build all necessary Docker images
2. Start all services in the correct order
3. Wait for services to be ready
4. Display monitoring URLs and run health checks
### Alternative Commands
```bash
# Quick start aliases
make up # Same as 'make start'
# Development mode (higher load for faster testing)
make dev-start
# Build images without starting
make build
```
## 📋 Available Make Targets
Run `make help` to see all available targets:
### **🚀 Main Operations**
- `make start` - Start the complete EC testing environment
- `make stop` - Stop all services
- `make restart` - Restart all services
- `make clean` - Complete cleanup (containers, volumes, images)
### **📊 Monitoring & Status**
- `make health` - Check health of all services
- `make status` - Show status of all containers
- `make urls` - Display all monitoring URLs
- `make monitor` - Open monitor dashboard in browser
- `make monitor-status` - Show monitor status via API
- `make volume-status` - Show volume status from master
- `make admin-status` - Show admin server status
- `make cluster-status` - Show complete cluster status
### **📋 Logs Management**
- `make logs` - Show logs from all services
- `make logs-admin` - Show admin server logs
- `make logs-workers` - Show all worker logs
- `make logs-worker1/2/3` - Show specific worker logs
- `make logs-load` - Show load generator logs
- `make logs-monitor` - Show monitor logs
- `make backup-logs` - Backup all logs to files
### **⚖️ Scaling & Testing**
- `make scale-workers WORKERS=5` - Scale workers to 5 instances
- `make scale-load RATE=25` - Increase load generation rate
- `make test-ec` - Run focused EC test scenario
### **🔧 Development & Debug**
- `make shell-admin` - Open shell in admin container
- `make shell-worker1` - Open shell in worker container
- `make debug` - Show debug information
- `make troubleshoot` - Run troubleshooting checks
## 📊 Monitoring URLs
| Service | URL | Description |
|---------|-----|-------------|
| Master UI | http://localhost:9333 | Cluster status and topology |
| Filer | http://localhost:8888 | File operations |
| Admin Server | http://localhost:9900/status | Task management |
| Monitor | http://localhost:9999/status | Complete cluster monitoring |
| Volume Servers | http://localhost:8080-8085/status | Individual volume server stats |
Quick access: `make urls` or `make monitor`
## 🔄 How EC Testing Works
### 1. Continuous Load Generation
- **Write Rate**: 10 files/second (1-5MB each)
- **Delete Rate**: 2 files/second
- **Target**: Fill volumes to 50MB limit quickly
### 2. Volume Detection
- Admin server scans master every 30 seconds
- Identifies volumes >40MB (80% of 50MB limit)
- Queues EC tasks for eligible volumes
### 3. EC Worker Assignment
- **Worker 1**: EC specialist (max 2 concurrent tasks)
- **Worker 2**: EC + Vacuum hybrid (max 2 concurrent tasks)
- **Worker 3**: EC + Vacuum hybrid (max 1 concurrent task)
### 4. Comprehensive EC Process
Each EC task follows 6 phases:
1. **Copy Volume Data** (5-15%) - Stream .dat/.idx files locally
2. **Mark Read-Only** (20-25%) - Ensure data consistency
3. **Local Encoding** (30-60%) - Create 14 shards (10+4 Reed-Solomon)
4. **Calculate Placement** (65-70%) - Smart rack-aware distribution
5. **Distribute Shards** (75-90%) - Upload to optimal servers
6. **Verify & Cleanup** (95-100%) - Validate and clean temporary files
### 5. Real-Time Monitoring
- Volume analysis and EC candidate detection
- Worker health and task progress
- No data loss verification
- Performance metrics
## 📋 Key Features Tested
### ✅ EC Implementation Features
- [x] Local volume data copying with progress tracking
- [x] Local Reed-Solomon encoding (10+4 shards)
- [x] Intelligent shard placement with rack awareness
- [x] Load balancing across available servers
- [x] Backup server selection for redundancy
- [x] Detailed step-by-step progress tracking
- [x] Comprehensive error handling and recovery
### ✅ Infrastructure Features
- [x] Multi-datacenter topology (dc1, dc2)
- [x] Rack diversity (rack1, rack2, rack3)
- [x] Volume size limits (50MB)
- [x] Worker capability matching
- [x] Health monitoring and alerting
- [x] Continuous workload simulation
## 🛠️ Common Usage Patterns
### Basic Testing Workflow
```bash
# Start environment
make start
# Watch progress
make monitor-status
# Check for EC candidates
make volume-status
# View worker activity
make logs-workers
# Stop when done
make stop
```
### High-Load Testing
```bash
# Start with higher load
make dev-start
# Scale up workers and load
make scale-workers WORKERS=5
make scale-load RATE=50
# Monitor intensive EC activity
make logs-admin
```
### Debugging Issues
```bash
# Check port conflicts and system state
make troubleshoot
# View specific service logs
make logs-admin
make logs-worker1
# Get shell access for debugging
make shell-admin
make shell-worker1
# Check detailed status
make debug
```
### Development Iteration
```bash
# Quick restart after code changes
make restart
# Rebuild and restart
make clean
make start
# Monitor specific components
make logs-monitor
```
## 📈 Expected Results
### Successful EC Testing Shows:
1. **Volume Growth**: Steady increase in volume sizes toward 50MB limit
2. **EC Detection**: Admin server identifies volumes >40MB for EC
3. **Task Assignment**: Workers receive and execute EC tasks
4. **Shard Distribution**: 14 shards distributed across 6 volume servers
5. **No Data Loss**: All files remain accessible during and after EC
6. **Performance**: EC tasks complete within estimated timeframes
### Sample Monitor Output:
```bash
# Check current status
make monitor-status
# Output example:
{
"monitor": {
"uptime": "15m30s",
"master_addr": "master:9333",
"admin_addr": "admin:9900"
},
"stats": {
"VolumeCount": 12,
"ECTasksDetected": 3,
"WorkersActive": 3
}
}
```
## 🔧 Configuration
### Environment Variables
You can customize the environment by setting variables:
```bash
# High load testing
WRITE_RATE=25 DELETE_RATE=5 make start
# Extended test duration
TEST_DURATION=7200 make start # 2 hours
```
### Scaling Examples
```bash
# Scale workers
make scale-workers WORKERS=6
# Increase load generation
make scale-load RATE=30
# Combined scaling
make scale-workers WORKERS=4
make scale-load RATE=40
```
## 🧹 Cleanup Options
```bash
# Stop services only
make stop
# Remove containers but keep volumes
make down
# Remove data volumes only
make clean-volumes
# Remove built images only
make clean-images
# Complete cleanup (everything)
make clean
```
## 🐛 Troubleshooting
### Quick Diagnostics
```bash
# Run complete troubleshooting
make troubleshoot
# Check specific components
make health
make debug
make status
```
### Common Issues
**Services not starting:**
```bash
# Check port availability
make troubleshoot
# View startup logs
make logs-master
make logs-admin
```
**No EC tasks being created:**
```bash
# Check volume status
make volume-status
# Increase load to fill volumes faster
make scale-load RATE=30
# Check admin detection
make logs-admin
```
**Workers not responding:**
```bash
# Check worker registration
make admin-status
# View worker logs
make logs-workers
# Restart workers
make restart
```
### Performance Tuning
**For faster testing:**
```bash
make dev-start # Higher default load
make scale-load RATE=50 # Very high load
```
**For stress testing:**
```bash
make scale-workers WORKERS=8
make scale-load RATE=100
```
## 📚 Technical Details
### Network Architecture
- Custom bridge network (172.20.0.0/16)
- Service discovery via container names
- Health checks for all services
### Storage Layout
- Each volume server: max 100 volumes
- Data centers: dc1, dc2
- Racks: rack1, rack2, rack3
- Volume limit: 50MB per volume
### EC Algorithm
- Reed-Solomon RS(10,4)
- 10 data shards + 4 parity shards
- Rack-aware distribution
- Backup server redundancy
### Make Integration
- Color-coded output for better readability
- Comprehensive help system (`make help`)
- Parallel execution support
- Error handling and cleanup
- Cross-platform compatibility
## 🎯 Quick Reference
```bash
# Essential commands
make help # Show all available targets
make start # Start complete environment
make health # Check all services
make monitor # Open dashboard
make logs-admin # View admin activity
make clean # Complete cleanup
# Monitoring
make volume-status # Check for EC candidates
make admin-status # Check task queue
make monitor-status # Full cluster status
# Scaling & Testing
make test-ec # Run focused EC test
make scale-load RATE=X # Increase load
make troubleshoot # Diagnose issues
```
This environment provides a realistic testing scenario for SeaweedFS EC workers with actual data operations, comprehensive monitoring, and easy management through Make targets.

301
docker/admin_integration/Makefile

@ -0,0 +1,301 @@
# SeaweedFS EC Worker Testing Environment Makefile
# Usage: make <target>
.PHONY: help start stop clean logs status monitor health up down restart scale docs test
# Default target
.DEFAULT_GOAL := help
# Docker compose file
COMPOSE_FILE := docker-compose-ec-test.yml
# Color codes for output
GREEN := \033[32m
YELLOW := \033[33m
BLUE := \033[34m
RED := \033[31m
NC := \033[0m # No Color
help: ## Show this help message
@echo "$(BLUE)🧪 SeaweedFS EC Worker Testing Environment$(NC)"
@echo "$(BLUE)===========================================$(NC)"
@echo ""
@echo "$(YELLOW)Available targets:$(NC)"
@awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf " $(GREEN)%-15s$(NC) %s\n", $$1, $$2}' $(MAKEFILE_LIST)
@echo ""
@echo "$(YELLOW)Quick start:$(NC) make start"
@echo "$(YELLOW)Monitor:$(NC) make monitor"
@echo "$(YELLOW)Cleanup:$(NC) make clean"
start: ## Start the complete EC testing environment
@echo "$(GREEN)🚀 Starting SeaweedFS EC testing environment...$(NC)"
@echo "$(BLUE)This will start:$(NC)"
@echo " • 1 Master server (port 9333)"
@echo " • 6 Volume servers (ports 8080-8085) with 50MB volume limit"
@echo " • 1 Filer (port 8888)"
@echo " • 1 Admin server (port 9900)"
@echo " • 3 EC Workers"
@echo " • 1 Load generator (continuous read/write)"
@echo " • 1 Monitor (port 9999)"
@echo ""
@mkdir -p monitor-data admin-config
@chmod +x *.sh 2>/dev/null || true
@docker-compose -f $(COMPOSE_FILE) down -v 2>/dev/null || true
@docker-compose -f $(COMPOSE_FILE) up --build -d
@echo ""
@echo "$(GREEN)✅ Environment started successfully!$(NC)"
@echo ""
@$(MAKE) urls
@echo ""
@echo "$(YELLOW)⏳ Waiting for services to be ready...$(NC)"
@sleep 10
@$(MAKE) health
stop: ## Stop all services
@echo "$(YELLOW)🛑 Stopping all services...$(NC)"
@docker-compose -f $(COMPOSE_FILE) stop
@echo "$(GREEN)✅ All services stopped$(NC)"
down: ## Stop and remove all containers
@echo "$(YELLOW)🛑 Stopping and removing containers...$(NC)"
@docker-compose -f $(COMPOSE_FILE) down
@echo "$(GREEN)✅ Containers stopped and removed$(NC)"
clean: ## Stop and remove all containers, networks, volumes, and images
@echo "$(RED)🧹 Cleaning up entire environment...$(NC)"
@docker-compose -f $(COMPOSE_FILE) down -v --rmi all 2>/dev/null || true
@docker system prune -f
@echo "$(GREEN)✅ Environment cleaned up$(NC)"
restart: ## Restart all services
@echo "$(YELLOW)🔄 Restarting all services...$(NC)"
@docker-compose -f $(COMPOSE_FILE) restart
@echo "$(GREEN)✅ All services restarted$(NC)"
up: start ## Alias for start
status: ## Show status of all services
@echo "$(BLUE)📊 Service Status:$(NC)"
@docker-compose -f $(COMPOSE_FILE) ps
logs: ## Show logs from all services
@echo "$(BLUE)📋 Showing logs from all services (Ctrl+C to exit):$(NC)"
@docker-compose -f $(COMPOSE_FILE) logs -f
logs-admin: ## Show admin server logs
@echo "$(BLUE)📋 Admin Server Logs:$(NC)"
@docker-compose -f $(COMPOSE_FILE) logs -f admin
logs-workers: ## Show all worker logs
@echo "$(BLUE)📋 Worker Logs:$(NC)"
@docker-compose -f $(COMPOSE_FILE) logs -f worker1 worker2 worker3
logs-worker1: ## Show worker1 logs
@docker-compose -f $(COMPOSE_FILE) logs -f worker1
logs-worker2: ## Show worker2 logs
@docker-compose -f $(COMPOSE_FILE) logs -f worker2
logs-worker3: ## Show worker3 logs
@docker-compose -f $(COMPOSE_FILE) logs -f worker3
logs-load: ## Show load generator logs
@echo "$(BLUE)📋 Load Generator Logs:$(NC)"
@docker-compose -f $(COMPOSE_FILE) logs -f load_generator
logs-monitor: ## Show monitor logs
@echo "$(BLUE)📋 Monitor Logs:$(NC)"
@docker-compose -f $(COMPOSE_FILE) logs -f monitor
logs-master: ## Show master logs
@docker-compose -f $(COMPOSE_FILE) logs -f master
logs-volumes: ## Show all volume server logs
@echo "$(BLUE)📋 Volume Server Logs:$(NC)"
@docker-compose -f $(COMPOSE_FILE) logs -f volume1 volume2 volume3 volume4 volume5 volume6
urls: ## Show monitoring URLs
@echo "$(BLUE)📊 Monitoring URLs:$(NC)"
@echo " • Master UI: http://localhost:9333"
@echo " • Filer: http://localhost:8888"
@echo " • Admin Server: http://localhost:9900/status"
@echo " • Monitor: http://localhost:9999/status"
@echo ""
@echo "$(BLUE)📈 Volume Servers:$(NC)"
@echo " • Volume1: http://localhost:8080/status"
@echo " • Volume2: http://localhost:8081/status"
@echo " • Volume3: http://localhost:8082/status"
@echo " • Volume4: http://localhost:8083/status"
@echo " • Volume5: http://localhost:8084/status"
@echo " • Volume6: http://localhost:8085/status"
health: ## Check health of all services
@echo "$(BLUE)🔍 Checking service health...$(NC)"
@echo -n " Master: "; \
if curl -s http://localhost:9333/cluster/status > /dev/null 2>&1; then \
echo "$(GREEN)✅ Healthy$(NC)"; \
else \
echo "$(RED)❌ Not responding$(NC)"; \
fi
@echo -n " Filer: "; \
if curl -s http://localhost:8888/ > /dev/null 2>&1; then \
echo "$(GREEN)✅ Healthy$(NC)"; \
else \
echo "$(RED)❌ Not responding$(NC)"; \
fi
@echo -n " Admin: "; \
if curl -s http://localhost:9900/health > /dev/null 2>&1; then \
echo "$(GREEN)✅ Healthy$(NC)"; \
else \
echo "$(RED)❌ Not responding$(NC)"; \
fi
@echo -n " Monitor: "; \
if curl -s http://localhost:9999/health > /dev/null 2>&1; then \
echo "$(GREEN)✅ Healthy$(NC)"; \
else \
echo "$(RED)❌ Not responding$(NC)"; \
fi
monitor: ## Open monitor dashboard in browser
@echo "$(BLUE)📊 Opening monitor dashboard...$(NC)"
@echo "Monitor URL: http://localhost:9999/status"
@command -v open >/dev/null 2>&1 && open http://localhost:9999/status || \
command -v xdg-open >/dev/null 2>&1 && xdg-open http://localhost:9999/status || \
echo "Please open http://localhost:9999/status in your browser"
monitor-status: ## Show current monitoring status via API
@echo "$(BLUE)📊 Current Monitor Status:$(NC)"
@curl -s http://localhost:9999/status | jq . 2>/dev/null || \
curl -s http://localhost:9999/status 2>/dev/null || \
echo "Monitor not available"
volume-status: ## Show volume status from master
@echo "$(BLUE)💾 Volume Status:$(NC)"
@curl -s http://localhost:9333/vol/status | jq . 2>/dev/null || \
curl -s http://localhost:9333/vol/status 2>/dev/null || \
echo "Master not available"
admin-status: ## Show admin server status
@echo "$(BLUE)🏭 Admin Server Status:$(NC)"
@curl -s http://localhost:9900/status | jq . 2>/dev/null || \
curl -s http://localhost:9900/status 2>/dev/null || \
echo "Admin server not available"
cluster-status: ## Show complete cluster status
@echo "$(BLUE)🌐 Cluster Status:$(NC)"
@curl -s http://localhost:9333/cluster/status | jq . 2>/dev/null || \
curl -s http://localhost:9333/cluster/status 2>/dev/null || \
echo "Master not available"
scale-workers: ## Scale workers (usage: make scale-workers WORKERS=5)
@echo "$(YELLOW)⚖️ Scaling workers to $(or $(WORKERS),3)...$(NC)"
@docker-compose -f $(COMPOSE_FILE) up -d --scale worker2=$(or $(WORKERS),3)
scale-load: ## Restart load generator with higher rate (usage: make scale-load RATE=20)
@echo "$(YELLOW)📈 Scaling load generation to $(or $(RATE),20) files/sec...$(NC)"
@docker-compose -f $(COMPOSE_FILE) stop load_generator
@docker-compose -f $(COMPOSE_FILE) run -d --name temp_load_generator \
-e WRITE_RATE=$(or $(RATE),20) -e DELETE_RATE=$(or $(shell expr $(or $(RATE),20) / 4),5) \
load_generator
@echo "$(GREEN)✅ Load generator restarted with higher rate$(NC)"
test-ec: ## Run a focused EC test scenario
@echo "$(YELLOW)🧪 Running focused EC test...$(NC)"
@$(MAKE) scale-load RATE=25
@echo "$(BLUE)Monitoring EC detection...$(NC)"
@echo "Watch for volumes >40MB that trigger EC conversion"
@echo "Monitor at: http://localhost:9999/status"
shell-admin: ## Open shell in admin container
@docker-compose -f $(COMPOSE_FILE) exec admin /bin/sh
shell-worker1: ## Open shell in worker1 container
@docker-compose -f $(COMPOSE_FILE) exec worker1 /bin/sh
shell-master: ## Open shell in master container
@docker-compose -f $(COMPOSE_FILE) exec master /bin/sh
docs: ## Show documentation
@echo "$(BLUE)📖 EC Testing Documentation:$(NC)"
@echo ""
@cat EC-TESTING-README.md
build: ## Build all Docker images without starting
@echo "$(YELLOW)🔨 Building all Docker images...$(NC)"
@docker-compose -f $(COMPOSE_FILE) build
@echo "$(GREEN)✅ All images built$(NC)"
pull: ## Pull latest SeaweedFS image
@echo "$(YELLOW)📥 Pulling latest SeaweedFS image...$(NC)"
@docker pull chrislusf/seaweedfs:latest
@echo "$(GREEN)✅ Latest image pulled$(NC)"
debug: ## Show debug information
@echo "$(BLUE)🔍 Debug Information:$(NC)"
@echo ""
@echo "$(YELLOW)Docker Compose Version:$(NC)"
@docker-compose --version
@echo ""
@echo "$(YELLOW)Docker Version:$(NC)"
@docker --version
@echo ""
@echo "$(YELLOW)Current Directory:$(NC)"
@pwd
@echo ""
@echo "$(YELLOW)Available Files:$(NC)"
@ls -la *.yml *.sh *.md 2>/dev/null || echo "No config files found"
@echo ""
@echo "$(YELLOW)Running Containers:$(NC)"
@docker ps --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}"
# Targets for development and testing
dev-start: ## Start with development settings (faster iteration)
@echo "$(YELLOW)🛠️ Starting development environment...$(NC)"
@mkdir -p monitor-data admin-config
@WRITE_RATE=50 DELETE_RATE=10 docker-compose -f $(COMPOSE_FILE) up --build -d
@echo "$(GREEN)✅ Development environment started with high load$(NC)"
dev-stop: stop ## Stop development environment
# Clean specific components
clean-volumes: ## Remove only data volumes
@echo "$(YELLOW)🗄️ Removing data volumes...$(NC)"
@docker-compose -f $(COMPOSE_FILE) down -v
@echo "$(GREEN)✅ Data volumes removed$(NC)"
clean-images: ## Remove built images
@echo "$(YELLOW)🖼️ Removing built images...$(NC)"
@docker-compose -f $(COMPOSE_FILE) down --rmi local
@echo "$(GREEN)✅ Built images removed$(NC)"
# Backup and restore
backup-logs: ## Backup all service logs
@echo "$(YELLOW)💾 Backing up service logs...$(NC)"
@mkdir -p logs-backup
@docker-compose -f $(COMPOSE_FILE) logs admin > logs-backup/admin.log 2>&1
@docker-compose -f $(COMPOSE_FILE) logs worker1 > logs-backup/worker1.log 2>&1
@docker-compose -f $(COMPOSE_FILE) logs worker2 > logs-backup/worker2.log 2>&1
@docker-compose -f $(COMPOSE_FILE) logs worker3 > logs-backup/worker3.log 2>&1
@docker-compose -f $(COMPOSE_FILE) logs monitor > logs-backup/monitor.log 2>&1
@docker-compose -f $(COMPOSE_FILE) logs load_generator > logs-backup/load_generator.log 2>&1
@echo "$(GREEN)✅ Logs backed up to logs-backup/$(NC)"
# Quick troubleshooting
troubleshoot: ## Run troubleshooting checks
@echo "$(BLUE)🔧 Running troubleshooting checks...$(NC)"
@echo ""
@echo "$(YELLOW)1. Checking required ports:$(NC)"
@for port in 9333 8888 9900 9999 8080 8081 8082 8083 8084 8085; do \
echo -n " Port $$port: "; \
if lsof -i :$$port >/dev/null 2>&1; then \
echo "$(RED)❌ In use$(NC)"; \
else \
echo "$(GREEN)✅ Available$(NC)"; \
fi; \
done
@echo ""
@echo "$(YELLOW)2. Docker resources:$(NC)"
@docker system df
@echo ""
@echo "$(YELLOW)3. Service health:$(NC)"
@$(MAKE) health

153
docker/admin_integration/admin-entrypoint.sh

@ -0,0 +1,153 @@
#!/bin/sh
set -e
echo "Starting SeaweedFS Admin Server..."
echo "Master Address: $MASTER_ADDRESS"
echo "Admin Port: $ADMIN_PORT"
echo "Scan Interval: $SCAN_INTERVAL"
# Wait for master to be ready
echo "Waiting for master to be ready..."
until curl -f http://$MASTER_ADDRESS/cluster/status > /dev/null 2>&1; do
echo "Master not ready, waiting..."
sleep 5
done
echo "Master is ready!"
# For now, use a simple HTTP server to simulate admin functionality
# In a real implementation, this would start the actual admin server
cat > /tmp/admin_server.go << 'EOF'
package main
import (
"encoding/json"
"fmt"
"log"
"net/http"
"os"
"strconv"
"time"
)
type AdminServer struct {
masterAddr string
port string
startTime time.Time
tasks []Task
workers []Worker
}
type Task struct {
ID string `json:"id"`
Type string `json:"type"`
VolumeID int `json:"volume_id"`
Status string `json:"status"`
Progress float64 `json:"progress"`
Created time.Time `json:"created"`
}
type Worker struct {
ID string `json:"id"`
Address string `json:"address"`
Capabilities []string `json:"capabilities"`
Status string `json:"status"`
LastSeen time.Time `json:"last_seen"`
}
func (s *AdminServer) healthHandler(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(map[string]interface{}{
"status": "healthy",
"uptime": time.Since(s.startTime).String(),
"tasks": len(s.tasks),
"workers": len(s.workers),
})
}
func (s *AdminServer) statusHandler(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(map[string]interface{}{
"admin_server": "running",
"master_addr": s.masterAddr,
"tasks": s.tasks,
"workers": s.workers,
"uptime": time.Since(s.startTime).String(),
})
}
func (s *AdminServer) detectVolumesForEC() {
// Simulate volume detection logic
// In real implementation, this would query the master for volume status
ticker := time.NewTicker(30 * time.Second)
go func() {
for range ticker.C {
log.Println("Scanning for volumes requiring EC...")
// Check master for volume status
resp, err := http.Get(fmt.Sprintf("http://%s/vol/status", s.masterAddr))
if err != nil {
log.Printf("Error checking master: %v", err)
continue
}
resp.Body.Close()
// Simulate detecting a volume that needs EC
if len(s.tasks) < 5 { // Don't create too many tasks
taskID := fmt.Sprintf("ec-task-%d", len(s.tasks)+1)
volumeID := 1000 + len(s.tasks)
task := Task{
ID: taskID,
Type: "erasure_coding",
VolumeID: volumeID,
Status: "pending",
Progress: 0.0,
Created: time.Now(),
}
s.tasks = append(s.tasks, task)
log.Printf("Created EC task %s for volume %d", taskID, volumeID)
}
}
}()
}
func main() {
masterAddr := os.Getenv("MASTER_ADDRESS")
if masterAddr == "" {
masterAddr = "master:9333"
}
port := os.Getenv("ADMIN_PORT")
if port == "" {
port = "9900"
}
server := &AdminServer{
masterAddr: masterAddr,
port: port,
startTime: time.Now(),
tasks: make([]Task, 0),
workers: make([]Worker, 0),
}
http.HandleFunc("/health", server.healthHandler)
http.HandleFunc("/status", server.statusHandler)
// Start volume detection
server.detectVolumesForEC()
log.Printf("Admin server starting on port %s", port)
log.Printf("Master address: %s", masterAddr)
if err := http.ListenAndServe(":"+port, nil); err != nil {
log.Fatal("Server failed to start:", err)
}
}
EOF
# Compile and run the admin server
cd /tmp
go mod init admin-server
go run admin_server.go

393
docker/admin_integration/docker-compose-ec-test.yml

@ -0,0 +1,393 @@
services:
# Master server - coordinates the cluster
master:
image: chrislusf/seaweedfs:latest
container_name: seaweed-master
ports:
- "9333:9333"
- "19333:19333"
command: >
master
-ip=master
-port=9333
-volumeSizeLimitMB=50
-defaultReplication=001
volumes:
- master_data:/data
networks:
- seaweed_net
healthcheck:
test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://master:9333/cluster/status"]
interval: 10s
timeout: 5s
retries: 3
# Volume Server 1
volume1:
image: chrislusf/seaweedfs:latest
container_name: seaweed-volume1
ports:
- "8080:8080"
- "18080:18080"
command: >
volume
-mserver=master:9333
-ip=volume1
-port=8080
-dir=/data
-max=100
-dataCenter=dc1
-rack=rack1
volumes:
- volume1_data:/data
depends_on:
master:
condition: service_healthy
networks:
- seaweed_net
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/status"]
interval: 10s
timeout: 5s
retries: 3
# Volume Server 2
volume2:
image: chrislusf/seaweedfs:latest
container_name: seaweed-volume2
ports:
- "8081:8080"
- "18081:18080"
command: >
volume
-mserver=master:9333
-ip=volume2
-port=8080
-dir=/data
-max=100
-dataCenter=dc1
-rack=rack1
volumes:
- volume2_data:/data
depends_on:
master:
condition: service_healthy
networks:
- seaweed_net
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/status"]
interval: 10s
timeout: 5s
retries: 3
# Volume Server 3
volume3:
image: chrislusf/seaweedfs:latest
container_name: seaweed-volume3
ports:
- "8082:8080"
- "18082:18080"
command: >
volume
-mserver=master:9333
-ip=volume3
-port=8080
-dir=/data
-max=100
-dataCenter=dc1
-rack=rack2
volumes:
- volume3_data:/data
depends_on:
master:
condition: service_healthy
networks:
- seaweed_net
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/status"]
interval: 10s
timeout: 5s
retries: 3
# Volume Server 4
volume4:
image: chrislusf/seaweedfs:latest
container_name: seaweed-volume4
ports:
- "8083:8080"
- "18083:18080"
command: >
volume
-mserver=master:9333
-ip=volume4
-port=8080
-dir=/data
-max=100
-dataCenter=dc2
-rack=rack1
volumes:
- volume4_data:/data
depends_on:
master:
condition: service_healthy
networks:
- seaweed_net
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/status"]
interval: 10s
timeout: 5s
retries: 3
# Volume Server 5
volume5:
image: chrislusf/seaweedfs:latest
container_name: seaweed-volume5
ports:
- "8084:8080"
- "18084:18080"
command: >
volume
-mserver=master:9333
-ip=volume5
-port=8080
-dir=/data
-max=100
-dataCenter=dc2
-rack=rack2
volumes:
- volume5_data:/data
depends_on:
master:
condition: service_healthy
networks:
- seaweed_net
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/status"]
interval: 10s
timeout: 5s
retries: 3
# Volume Server 6
volume6:
image: chrislusf/seaweedfs:latest
container_name: seaweed-volume6
ports:
- "8085:8080"
- "18085:18080"
command: >
volume
-mserver=master:9333
-ip=volume6
-port=8080
-dir=/data
-max=100
-dataCenter=dc2
-rack=rack3
volumes:
- volume6_data:/data
depends_on:
master:
condition: service_healthy
networks:
- seaweed_net
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/status"]
interval: 10s
timeout: 5s
retries: 3
# Filer for easier data access
filer:
image: chrislusf/seaweedfs:latest
container_name: seaweed-filer
ports:
- "8888:8888"
- "18888:18888"
command: >
filer
-master=master:9333
-ip=filer
-port=8888
depends_on:
master:
condition: service_healthy
networks:
- seaweed_net
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8888/"]
interval: 10s
timeout: 5s
retries: 3
# Admin Server - manages EC tasks
admin:
build:
context: ../../
dockerfile: docker/admin_integration/Dockerfile.admin
container_name: seaweed-admin
ports:
- "9900:9900"
environment:
- MASTER_ADDRESS=master:9333
- ADMIN_PORT=9900
- SCAN_INTERVAL=30s
- WORKER_TIMEOUT=5m
- TASK_TIMEOUT=30m
- MAX_RETRIES=3
- MAX_CONCURRENT_TASKS=5
volumes:
- admin_data:/data
- ./admin-config:/config
depends_on:
master:
condition: service_healthy
filer:
condition: service_healthy
networks:
- seaweed_net
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9900/health"]
interval: 15s
timeout: 5s
retries: 3
# EC Worker 1
worker1:
build:
context: ../../
dockerfile: docker/admin_integration/Dockerfile.worker
container_name: seaweed-worker1
environment:
- ADMIN_ADDRESS=admin:9900
- WORKER_ID=worker-1
- WORKER_ADDRESS=worker1:9001
- CAPABILITIES=erasure_coding
- MAX_CONCURRENT=2
- WORK_DIR=/work
volumes:
- worker1_data:/work
depends_on:
admin:
condition: service_healthy
networks:
- seaweed_net
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9001/health"]
interval: 15s
timeout: 5s
retries: 3
# EC Worker 2
worker2:
build:
context: ../../
dockerfile: docker/admin_integration/Dockerfile.worker
container_name: seaweed-worker2
environment:
- ADMIN_ADDRESS=admin:9900
- WORKER_ID=worker-2
- WORKER_ADDRESS=worker2:9001
- CAPABILITIES=erasure_coding,vacuum
- MAX_CONCURRENT=2
- WORK_DIR=/work
volumes:
- worker2_data:/work
depends_on:
admin:
condition: service_healthy
networks:
- seaweed_net
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9001/health"]
interval: 15s
timeout: 5s
retries: 3
# EC Worker 3
worker3:
build:
context: ../../
dockerfile: docker/admin_integration/Dockerfile.worker
container_name: seaweed-worker3
environment:
- ADMIN_ADDRESS=admin:9900
- WORKER_ID=worker-3
- WORKER_ADDRESS=worker3:9001
- CAPABILITIES=erasure_coding,vacuum
- MAX_CONCURRENT=1
- WORK_DIR=/work
volumes:
- worker3_data:/work
depends_on:
admin:
condition: service_healthy
networks:
- seaweed_net
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9001/health"]
interval: 15s
timeout: 5s
retries: 3
# Continuous Load Generator
load_generator:
build:
context: ../../
dockerfile: docker/admin_integration/Dockerfile.load
container_name: seaweed-load
environment:
- FILER_ADDRESS=filer:8888
- MASTER_ADDRESS=master:9333
- WRITE_RATE=10 # files per second
- DELETE_RATE=2 # files per second
- FILE_SIZE_MIN=1MB
- FILE_SIZE_MAX=5MB
- TEST_DURATION=3600 # 1 hour
depends_on:
filer:
condition: service_healthy
admin:
condition: service_healthy
networks:
- seaweed_net
# Monitoring and Health Check
monitor:
build:
context: ../../
dockerfile: docker/admin_integration/Dockerfile.monitor
container_name: seaweed-monitor
ports:
- "9999:9999"
environment:
- MASTER_ADDRESS=master:9333
- ADMIN_ADDRESS=admin:9900
- FILER_ADDRESS=filer:8888
- MONITOR_INTERVAL=10s
depends_on:
admin:
condition: service_healthy
networks:
- seaweed_net
volumes:
- ./monitor-data:/monitor-data
volumes:
master_data:
volume1_data:
volume2_data:
volume3_data:
volume4_data:
volume5_data:
volume6_data:
admin_data:
worker1_data:
worker2_data:
worker3_data:
networks:
seaweed_net:
driver: bridge
ipam:
config:
- subnet: 172.20.0.0/16

21
docker/admin_integration/load-entrypoint.sh

@ -0,0 +1,21 @@
#!/bin/sh
set -e
echo "Starting Load Generator..."
echo "Filer Address: $FILER_ADDRESS"
echo "Write Rate: $WRITE_RATE files/sec"
echo "Delete Rate: $DELETE_RATE files/sec"
echo "File Size Range: $FILE_SIZE_MIN - $FILE_SIZE_MAX"
echo "Test Duration: $TEST_DURATION seconds"
# Wait for filer to be ready
echo "Waiting for filer to be ready..."
until curl -f http://$FILER_ADDRESS/ > /dev/null 2>&1; do
echo "Filer not ready, waiting..."
sleep 5
done
echo "Filer is ready!"
# Start the load generator
exec ./load-generator

352
docker/admin_integration/load-generator.go

@ -0,0 +1,352 @@
package main
import (
"bytes"
"crypto/rand"
"fmt"
"io"
"log"
"net/http"
"os"
"strconv"
"strings"
"sync"
"time"
)
type LoadGenerator struct {
filerAddr string
masterAddr string
writeRate int
deleteRate int
fileSizeMin int64
fileSizeMax int64
testDuration int
collection string
// State tracking
createdFiles []string
mutex sync.RWMutex
stats LoadStats
}
type LoadStats struct {
FilesWritten int64
FilesDeleted int64
BytesWritten int64
Errors int64
StartTime time.Time
LastOperation time.Time
}
// parseSize converts size strings like "1MB", "5MB" to bytes
func parseSize(sizeStr string) int64 {
sizeStr = strings.ToUpper(strings.TrimSpace(sizeStr))
var multiplier int64 = 1
if strings.HasSuffix(sizeStr, "KB") {
multiplier = 1024
sizeStr = strings.TrimSuffix(sizeStr, "KB")
} else if strings.HasSuffix(sizeStr, "MB") {
multiplier = 1024 * 1024
sizeStr = strings.TrimSuffix(sizeStr, "MB")
} else if strings.HasSuffix(sizeStr, "GB") {
multiplier = 1024 * 1024 * 1024
sizeStr = strings.TrimSuffix(sizeStr, "GB")
}
size, err := strconv.ParseInt(sizeStr, 10, 64)
if err != nil {
return 1024 * 1024 // Default to 1MB
}
return size * multiplier
}
// generateRandomData creates random data of specified size
func (lg *LoadGenerator) generateRandomData(size int64) []byte {
data := make([]byte, size)
_, err := rand.Read(data)
if err != nil {
// Fallback to deterministic data
for i := range data {
data[i] = byte(i % 256)
}
}
return data
}
// uploadFile uploads a file to SeaweedFS via filer
func (lg *LoadGenerator) uploadFile(filename string, data []byte) error {
url := fmt.Sprintf("http://%s/%s", lg.filerAddr, filename)
if lg.collection != "" {
url = fmt.Sprintf("http://%s/%s/%s", lg.filerAddr, lg.collection, filename)
}
req, err := http.NewRequest("POST", url, bytes.NewReader(data))
if err != nil {
return err
}
req.Header.Set("Content-Type", "application/octet-stream")
client := &http.Client{Timeout: 30 * time.Second}
resp, err := client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusCreated {
return fmt.Errorf("upload failed with status: %d", resp.StatusCode)
}
return nil
}
// deleteFile deletes a file from SeaweedFS via filer
func (lg *LoadGenerator) deleteFile(filename string) error {
url := fmt.Sprintf("http://%s/%s", lg.filerAddr, filename)
if lg.collection != "" {
url = fmt.Sprintf("http://%s/%s/%s", lg.filerAddr, lg.collection, filename)
}
req, err := http.NewRequest("DELETE", url, nil)
if err != nil {
return err
}
client := &http.Client{Timeout: 10 * time.Second}
resp, err := client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusNoContent && resp.StatusCode != http.StatusNotFound {
return fmt.Errorf("delete failed with status: %d", resp.StatusCode)
}
return nil
}
// writeFiles continuously writes files at the specified rate
func (lg *LoadGenerator) writeFiles() {
writeInterval := time.Second / time.Duration(lg.writeRate)
ticker := time.NewTicker(writeInterval)
defer ticker.Stop()
fileCounter := 0
for range ticker.C {
fileCounter++
// Random file size between min and max
sizeDiff := lg.fileSizeMax - lg.fileSizeMin
randomSize := lg.fileSizeMin
if sizeDiff > 0 {
randomSize += int64(time.Now().UnixNano()) % sizeDiff
}
// Generate filename
filename := fmt.Sprintf("test-data/file-%d-%d.bin", time.Now().Unix(), fileCounter)
// Generate random data
data := lg.generateRandomData(randomSize)
// Upload file
err := lg.uploadFile(filename, data)
if err != nil {
log.Printf("Error uploading file %s: %v", filename, err)
lg.stats.Errors++
} else {
lg.mutex.Lock()
lg.createdFiles = append(lg.createdFiles, filename)
lg.stats.FilesWritten++
lg.stats.BytesWritten += randomSize
lg.stats.LastOperation = time.Now()
lg.mutex.Unlock()
log.Printf("Uploaded file: %s (size: %d bytes, total files: %d)",
filename, randomSize, lg.stats.FilesWritten)
}
}
}
// deleteFiles continuously deletes files at the specified rate
func (lg *LoadGenerator) deleteFiles() {
deleteInterval := time.Second / time.Duration(lg.deleteRate)
ticker := time.NewTicker(deleteInterval)
defer ticker.Stop()
for range ticker.C {
lg.mutex.Lock()
if len(lg.createdFiles) == 0 {
lg.mutex.Unlock()
continue
}
// Pick a random file to delete
index := int(time.Now().UnixNano()) % len(lg.createdFiles)
filename := lg.createdFiles[index]
// Remove from slice
lg.createdFiles = append(lg.createdFiles[:index], lg.createdFiles[index+1:]...)
lg.mutex.Unlock()
// Delete file
err := lg.deleteFile(filename)
if err != nil {
log.Printf("Error deleting file %s: %v", filename, err)
lg.stats.Errors++
} else {
lg.stats.FilesDeleted++
lg.stats.LastOperation = time.Now()
log.Printf("Deleted file: %s (remaining files: %d)", filename, len(lg.createdFiles))
}
}
}
// printStats periodically prints load generation statistics
func (lg *LoadGenerator) printStats() {
ticker := time.NewTicker(30 * time.Second)
defer ticker.Stop()
for range ticker.C {
uptime := time.Since(lg.stats.StartTime)
writeRate := float64(lg.stats.FilesWritten) / uptime.Seconds()
deleteRate := float64(lg.stats.FilesDeleted) / uptime.Seconds()
lg.mutex.RLock()
pendingFiles := len(lg.createdFiles)
lg.mutex.RUnlock()
log.Printf("STATS: Files written=%d, deleted=%d, pending=%d, errors=%d",
lg.stats.FilesWritten, lg.stats.FilesDeleted, pendingFiles, lg.stats.Errors)
log.Printf("RATES: Write=%.2f/sec, Delete=%.2f/sec, Data=%.2f MB written",
writeRate, deleteRate, float64(lg.stats.BytesWritten)/(1024*1024))
}
}
// checkClusterHealth periodically checks cluster status
func (lg *LoadGenerator) checkClusterHealth() {
ticker := time.NewTicker(1 * time.Minute)
defer ticker.Stop()
for range ticker.C {
// Check master status
resp, err := http.Get(fmt.Sprintf("http://%s/cluster/status", lg.masterAddr))
if err != nil {
log.Printf("WARNING: Cannot reach master: %v", err)
continue
}
body, err := io.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
log.Printf("WARNING: Cannot read master response: %v", err)
continue
}
if resp.StatusCode == http.StatusOK {
log.Printf("Cluster health check: OK (response size: %d bytes)", len(body))
} else {
log.Printf("WARNING: Cluster health check failed with status: %d", resp.StatusCode)
}
}
}
func main() {
filerAddr := os.Getenv("FILER_ADDRESS")
if filerAddr == "" {
filerAddr = "filer:8888"
}
masterAddr := os.Getenv("MASTER_ADDRESS")
if masterAddr == "" {
masterAddr = "master:9333"
}
writeRate, _ := strconv.Atoi(os.Getenv("WRITE_RATE"))
if writeRate <= 0 {
writeRate = 10
}
deleteRate, _ := strconv.Atoi(os.Getenv("DELETE_RATE"))
if deleteRate <= 0 {
deleteRate = 2
}
fileSizeMin := parseSize(os.Getenv("FILE_SIZE_MIN"))
if fileSizeMin <= 0 {
fileSizeMin = 1024 * 1024 // 1MB
}
fileSizeMax := parseSize(os.Getenv("FILE_SIZE_MAX"))
if fileSizeMax <= fileSizeMin {
fileSizeMax = 5 * 1024 * 1024 // 5MB
}
testDuration, _ := strconv.Atoi(os.Getenv("TEST_DURATION"))
if testDuration <= 0 {
testDuration = 3600 // 1 hour
}
collection := os.Getenv("COLLECTION")
lg := &LoadGenerator{
filerAddr: filerAddr,
masterAddr: masterAddr,
writeRate: writeRate,
deleteRate: deleteRate,
fileSizeMin: fileSizeMin,
fileSizeMax: fileSizeMax,
testDuration: testDuration,
collection: collection,
createdFiles: make([]string, 0),
stats: LoadStats{
StartTime: time.Now(),
},
}
log.Printf("Starting load generator...")
log.Printf("Filer: %s", filerAddr)
log.Printf("Master: %s", masterAddr)
log.Printf("Write rate: %d files/sec", writeRate)
log.Printf("Delete rate: %d files/sec", deleteRate)
log.Printf("File size: %d - %d bytes", fileSizeMin, fileSizeMax)
log.Printf("Test duration: %d seconds", testDuration)
log.Printf("Collection: '%s'", collection)
// Wait for filer to be ready
log.Println("Waiting for filer to be ready...")
for {
resp, err := http.Get(fmt.Sprintf("http://%s/", filerAddr))
if err == nil && resp.StatusCode == http.StatusOK {
resp.Body.Close()
break
}
if resp != nil {
resp.Body.Close()
}
log.Println("Filer not ready, waiting...")
time.Sleep(5 * time.Second)
}
log.Println("Filer is ready!")
// Start background goroutines
go lg.writeFiles()
go lg.deleteFiles()
go lg.printStats()
go lg.checkClusterHealth()
// Run for specified duration
log.Printf("Load test will run for %d seconds...", testDuration)
time.Sleep(time.Duration(testDuration) * time.Second)
log.Println("Load test completed!")
log.Printf("Final stats: Files written=%d, deleted=%d, errors=%d, total data=%.2f MB",
lg.stats.FilesWritten, lg.stats.FilesDeleted, lg.stats.Errors,
float64(lg.stats.BytesWritten)/(1024*1024))
}

38
docker/admin_integration/monitor-entrypoint.sh

@ -0,0 +1,38 @@
#!/bin/sh
set -e
echo "Starting Cluster Monitor..."
echo "Master Address: $MASTER_ADDRESS"
echo "Admin Address: $ADMIN_ADDRESS"
echo "Filer Address: $FILER_ADDRESS"
echo "Monitor Interval: $MONITOR_INTERVAL"
# Wait for core services to be ready
echo "Waiting for core services to be ready..."
echo "Waiting for master..."
until curl -f http://$MASTER_ADDRESS/cluster/status > /dev/null 2>&1; do
echo "Master not ready, waiting..."
sleep 5
done
echo "Master is ready!"
echo "Waiting for admin..."
until curl -f http://$ADMIN_ADDRESS/health > /dev/null 2>&1; do
echo "Admin not ready, waiting..."
sleep 5
done
echo "Admin is ready!"
echo "Waiting for filer..."
until curl -f http://$FILER_ADDRESS/ > /dev/null 2>&1; do
echo "Filer not ready, waiting..."
sleep 5
done
echo "Filer is ready!"
echo "All services ready! Starting monitor..."
# Start the monitor
exec ./monitor

366
docker/admin_integration/monitor.go

@ -0,0 +1,366 @@
package main
import (
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"os"
"time"
)
type Monitor struct {
masterAddr string
adminAddr string
filerAddr string
interval time.Duration
startTime time.Time
stats MonitorStats
}
type MonitorStats struct {
TotalChecks int64
MasterHealthy int64
AdminHealthy int64
FilerHealthy int64
VolumeCount int64
LastVolumeCheck time.Time
ECTasksDetected int64
WorkersActive int64
LastWorkerCheck time.Time
}
type ClusterStatus struct {
IsLeader bool `json:"IsLeader"`
Leader string `json:"Leader"`
Peers []string `json:"Peers"`
}
type VolumeStatus struct {
Volumes []VolumeInfo `json:"Volumes"`
}
type VolumeInfo struct {
Id uint32 `json:"Id"`
Size uint64 `json:"Size"`
Collection string `json:"Collection"`
FileCount int64 `json:"FileCount"`
DeleteCount int64 `json:"DeleteCount"`
DeletedByteCount uint64 `json:"DeletedByteCount"`
ReadOnly bool `json:"ReadOnly"`
CompactRevision uint32 `json:"CompactRevision"`
Version uint32 `json:"Version"`
}
type AdminStatus struct {
Status string `json:"status"`
Uptime string `json:"uptime"`
Tasks int `json:"tasks"`
Workers int `json:"workers"`
}
// checkMasterHealth checks the master server health
func (m *Monitor) checkMasterHealth() bool {
resp, err := http.Get(fmt.Sprintf("http://%s/cluster/status", m.masterAddr))
if err != nil {
log.Printf("ERROR: Cannot reach master %s: %v", m.masterAddr, err)
return false
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
log.Printf("ERROR: Master returned status %d", resp.StatusCode)
return false
}
var status ClusterStatus
body, err := io.ReadAll(resp.Body)
if err != nil {
log.Printf("ERROR: Cannot read master response: %v", err)
return false
}
err = json.Unmarshal(body, &status)
if err != nil {
log.Printf("WARNING: Cannot parse master status: %v", err)
// Still consider it healthy if we got a response
return true
}
log.Printf("Master status: Leader=%s, IsLeader=%t, Peers=%d",
status.Leader, status.IsLeader, len(status.Peers))
m.stats.MasterHealthy++
return true
}
// checkAdminHealth checks the admin server health
func (m *Monitor) checkAdminHealth() bool {
resp, err := http.Get(fmt.Sprintf("http://%s/health", m.adminAddr))
if err != nil {
log.Printf("ERROR: Cannot reach admin %s: %v", m.adminAddr, err)
return false
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
log.Printf("ERROR: Admin returned status %d", resp.StatusCode)
return false
}
var status AdminStatus
body, err := io.ReadAll(resp.Body)
if err != nil {
log.Printf("ERROR: Cannot read admin response: %v", err)
return false
}
err = json.Unmarshal(body, &status)
if err != nil {
log.Printf("WARNING: Cannot parse admin status: %v", err)
return true
}
log.Printf("Admin status: %s, Uptime=%s, Tasks=%d, Workers=%d",
status.Status, status.Uptime, status.Tasks, status.Workers)
m.stats.AdminHealthy++
m.stats.ECTasksDetected += int64(status.Tasks)
m.stats.WorkersActive = int64(status.Workers)
m.stats.LastWorkerCheck = time.Now()
return true
}
// checkFilerHealth checks the filer health
func (m *Monitor) checkFilerHealth() bool {
resp, err := http.Get(fmt.Sprintf("http://%s/", m.filerAddr))
if err != nil {
log.Printf("ERROR: Cannot reach filer %s: %v", m.filerAddr, err)
return false
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
log.Printf("ERROR: Filer returned status %d", resp.StatusCode)
return false
}
m.stats.FilerHealthy++
return true
}
// checkVolumeStatus checks volume information from master
func (m *Monitor) checkVolumeStatus() {
resp, err := http.Get(fmt.Sprintf("http://%s/vol/status", m.masterAddr))
if err != nil {
log.Printf("ERROR: Cannot get volume status: %v", err)
return
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
log.Printf("ERROR: Volume status returned status %d", resp.StatusCode)
return
}
body, err := io.ReadAll(resp.Body)
if err != nil {
log.Printf("ERROR: Cannot read volume status: %v", err)
return
}
var volumeStatus VolumeStatus
err = json.Unmarshal(body, &volumeStatus)
if err != nil {
log.Printf("WARNING: Cannot parse volume status: %v", err)
return
}
m.stats.VolumeCount = int64(len(volumeStatus.Volumes))
m.stats.LastVolumeCheck = time.Now()
// Analyze volumes
var readOnlyCount, fullVolumeCount, ecCandidates int
var totalSize, totalFiles uint64
for _, vol := range volumeStatus.Volumes {
totalSize += vol.Size
totalFiles += uint64(vol.FileCount)
if vol.ReadOnly {
readOnlyCount++
}
// Volume is close to full (>40MB for 50MB limit)
if vol.Size > 40*1024*1024 {
fullVolumeCount++
if !vol.ReadOnly {
ecCandidates++
}
}
}
log.Printf("Volume analysis: Total=%d, ReadOnly=%d, Full=%d, EC_Candidates=%d",
len(volumeStatus.Volumes), readOnlyCount, fullVolumeCount, ecCandidates)
log.Printf("Storage stats: Total_Size=%.2fMB, Total_Files=%d",
float64(totalSize)/(1024*1024), totalFiles)
if ecCandidates > 0 {
log.Printf("⚠️ DETECTED %d volumes that should be EC'd!", ecCandidates)
}
}
// healthHandler provides a health endpoint for the monitor itself
func (m *Monitor) healthHandler(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(map[string]interface{}{
"status": "healthy",
"uptime": time.Since(m.startTime).String(),
"checks": m.stats.TotalChecks,
"last_check": m.stats.LastVolumeCheck.Format(time.RFC3339),
})
}
// statusHandler provides detailed monitoring status
func (m *Monitor) statusHandler(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(map[string]interface{}{
"monitor": map[string]interface{}{
"uptime": time.Since(m.startTime).String(),
"master_addr": m.masterAddr,
"admin_addr": m.adminAddr,
"filer_addr": m.filerAddr,
"interval": m.interval.String(),
},
"stats": m.stats,
"health": map[string]interface{}{
"master_healthy": m.stats.MasterHealthy > 0 && time.Since(m.stats.LastVolumeCheck) < 2*m.interval,
"admin_healthy": m.stats.AdminHealthy > 0 && time.Since(m.stats.LastWorkerCheck) < 2*m.interval,
"filer_healthy": m.stats.FilerHealthy > 0,
},
})
}
// runMonitoring runs the main monitoring loop
func (m *Monitor) runMonitoring() {
ticker := time.NewTicker(m.interval)
defer ticker.Stop()
log.Printf("Starting monitoring loop every %v", m.interval)
for {
m.stats.TotalChecks++
log.Printf("=== Monitoring Check #%d ===", m.stats.TotalChecks)
// Check master health
if m.checkMasterHealth() {
// If master is healthy, check volumes
m.checkVolumeStatus()
}
// Check admin health
m.checkAdminHealth()
// Check filer health
m.checkFilerHealth()
// Print summary
log.Printf("Health Summary: Master=%t, Admin=%t, Filer=%t, Volumes=%d, Workers=%d",
m.stats.MasterHealthy > 0,
m.stats.AdminHealthy > 0,
m.stats.FilerHealthy > 0,
m.stats.VolumeCount,
m.stats.WorkersActive)
log.Printf("=== End Check #%d ===", m.stats.TotalChecks)
<-ticker.C
}
}
func main() {
masterAddr := os.Getenv("MASTER_ADDRESS")
if masterAddr == "" {
masterAddr = "master:9333"
}
adminAddr := os.Getenv("ADMIN_ADDRESS")
if adminAddr == "" {
adminAddr = "admin:9900"
}
filerAddr := os.Getenv("FILER_ADDRESS")
if filerAddr == "" {
filerAddr = "filer:8888"
}
intervalStr := os.Getenv("MONITOR_INTERVAL")
interval, err := time.ParseDuration(intervalStr)
if err != nil {
interval = 10 * time.Second
}
monitor := &Monitor{
masterAddr: masterAddr,
adminAddr: adminAddr,
filerAddr: filerAddr,
interval: interval,
startTime: time.Now(),
stats: MonitorStats{},
}
log.Printf("Starting SeaweedFS Cluster Monitor")
log.Printf("Master: %s", masterAddr)
log.Printf("Admin: %s", adminAddr)
log.Printf("Filer: %s", filerAddr)
log.Printf("Interval: %v", interval)
// Setup HTTP endpoints
http.HandleFunc("/health", monitor.healthHandler)
http.HandleFunc("/status", monitor.statusHandler)
// Start HTTP server in background
go func() {
log.Println("Monitor HTTP server starting on :9999")
if err := http.ListenAndServe(":9999", nil); err != nil {
log.Printf("Monitor HTTP server error: %v", err)
}
}()
// Wait for services to be ready
log.Println("Waiting for services to be ready...")
for {
masterOK := false
adminOK := false
filerOK := false
if resp, err := http.Get(fmt.Sprintf("http://%s/cluster/status", masterAddr)); err == nil && resp.StatusCode == http.StatusOK {
masterOK = true
resp.Body.Close()
}
if resp, err := http.Get(fmt.Sprintf("http://%s/health", adminAddr)); err == nil && resp.StatusCode == http.StatusOK {
adminOK = true
resp.Body.Close()
}
if resp, err := http.Get(fmt.Sprintf("http://%s/", filerAddr)); err == nil && resp.StatusCode == http.StatusOK {
filerOK = true
resp.Body.Close()
}
if masterOK && adminOK && filerOK {
log.Println("All services are ready!")
break
}
log.Printf("Services ready: Master=%t, Admin=%t, Filer=%t", masterOK, adminOK, filerOK)
time.Sleep(5 * time.Second)
}
// Start monitoring
monitor.runMonitoring()
}

106
docker/admin_integration/run-ec-test.sh

@ -0,0 +1,106 @@
#!/bin/bash
set -e
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
echo -e "${BLUE}🧪 SeaweedFS EC Worker Testing Environment${NC}"
echo -e "${BLUE}===========================================${NC}"
# Check if docker-compose is available
if ! command -v docker-compose &> /dev/null; then
echo -e "${RED}❌ docker-compose is required but not installed${NC}"
exit 1
fi
# Create necessary directories
echo -e "${YELLOW}📁 Creating required directories...${NC}"
mkdir -p monitor-data admin-config
# Make scripts executable
echo -e "${YELLOW}🔧 Making scripts executable...${NC}"
chmod +x *.sh
# Stop any existing containers
echo -e "${YELLOW}🛑 Stopping any existing containers...${NC}"
docker-compose -f docker-compose-ec-test.yml down -v 2>/dev/null || true
# Build and start the environment
echo -e "${GREEN}🚀 Starting SeaweedFS EC testing environment...${NC}"
echo -e "${BLUE}This will start:${NC}"
echo -e " • 1 Master server (port 9333)"
echo -e " • 6 Volume servers (ports 8080-8085) with 50MB volume limit"
echo -e " • 1 Filer (port 8888)"
echo -e " • 1 Admin server (port 9900)"
echo -e " • 3 EC Workers"
echo -e " • 1 Load generator (continuous read/write)"
echo -e " • 1 Monitor (port 9999)"
echo ""
docker-compose -f docker-compose-ec-test.yml up --build -d
echo -e "${GREEN}✅ Environment started successfully!${NC}"
echo ""
echo -e "${BLUE}📊 Monitoring URLs:${NC}"
echo -e " • Master UI: http://localhost:9333"
echo -e " • Filer: http://localhost:8888"
echo -e " • Admin Server: http://localhost:9900/status"
echo -e " • Monitor: http://localhost:9999/status"
echo ""
echo -e "${BLUE}📈 Volume Servers:${NC}"
echo -e " • Volume1: http://localhost:8080/status"
echo -e " • Volume2: http://localhost:8081/status"
echo -e " • Volume3: http://localhost:8082/status"
echo -e " • Volume4: http://localhost:8083/status"
echo -e " • Volume5: http://localhost:8084/status"
echo -e " • Volume6: http://localhost:8085/status"
echo ""
echo -e "${YELLOW}⏳ Waiting for services to be ready...${NC}"
sleep 10
# Check service health
echo -e "${BLUE}🔍 Checking service health...${NC}"
check_service() {
local name=$1
local url=$2
if curl -s "$url" > /dev/null 2>&1; then
echo -e "$name: ${GREEN}Healthy${NC}"
return 0
else
echo -e "$name: ${RED}Not responding${NC}"
return 1
fi
}
check_service "Master" "http://localhost:9333/cluster/status"
check_service "Filer" "http://localhost:8888/"
check_service "Admin" "http://localhost:9900/health"
check_service "Monitor" "http://localhost:9999/health"
echo ""
echo -e "${GREEN}🎯 Test Environment is Ready!${NC}"
echo ""
echo -e "${BLUE}What's happening:${NC}"
echo -e " 1. 📝 Load generator continuously writes 1-5MB files at 10 files/sec"
echo -e " 2. 🗑️ Load generator deletes files at 2 files/sec"
echo -e " 3. 📊 Volumes fill up to 50MB limit and trigger EC conversion"
echo -e " 4. 🏭 Admin server detects volumes needing EC and assigns to workers"
echo -e " 5. ⚡ Workers perform comprehensive EC (copy→encode→distribute)"
echo -e " 6. 📈 Monitor tracks all activity and volume states"
echo ""
echo -e "${YELLOW}📋 Useful Commands:${NC}"
echo -e " • View logs: docker-compose -f docker-compose-ec-test.yml logs -f [service]"
echo -e " • Check worker status: docker-compose -f docker-compose-ec-test.yml logs worker1"
echo -e " • Stop environment: docker-compose -f docker-compose-ec-test.yml down -v"
echo -e " • Monitor logs: docker-compose -f docker-compose-ec-test.yml logs -f monitor"
echo ""
echo -e "${GREEN}🔥 The test will run for 1 hour by default${NC}"
echo -e "${BLUE}Monitor progress at: http://localhost:9999/status${NC}"

230
docker/admin_integration/worker-entrypoint.sh

@ -0,0 +1,230 @@
#!/bin/sh
set -e
echo "Starting SeaweedFS EC Worker..."
echo "Worker ID: $WORKER_ID"
echo "Admin Address: $ADMIN_ADDRESS"
echo "Capabilities: $CAPABILITIES"
# Wait for admin server to be ready
echo "Waiting for admin server to be ready..."
until curl -f http://$ADMIN_ADDRESS/health > /dev/null 2>&1; do
echo "Admin server not ready, waiting..."
sleep 5
done
echo "Admin server is ready!"
# Create worker simulation
cat > /tmp/worker.go << 'EOF'
package main
import (
"encoding/json"
"fmt"
"log"
"net/http"
"os"
"strings"
"time"
)
type Worker struct {
id string
adminAddr string
address string
capabilities []string
maxConcurrent int
workDir string
startTime time.Time
activeTasks map[string]*Task
}
type Task struct {
ID string `json:"id"`
Type string `json:"type"`
VolumeID int `json:"volume_id"`
Status string `json:"status"`
Progress float64 `json:"progress"`
Started time.Time `json:"started"`
}
func (w *Worker) healthHandler(res http.ResponseWriter, req *http.Request) {
res.Header().Set("Content-Type", "application/json")
json.NewEncoder(res).Encode(map[string]interface{}{
"status": "healthy",
"worker_id": w.id,
"uptime": time.Since(w.startTime).String(),
"active_tasks": len(w.activeTasks),
"capabilities": w.capabilities,
})
}
func (w *Worker) statusHandler(res http.ResponseWriter, req *http.Request) {
res.Header().Set("Content-Type", "application/json")
json.NewEncoder(res).Encode(map[string]interface{}{
"worker_id": w.id,
"admin_addr": w.adminAddr,
"capabilities": w.capabilities,
"max_concurrent": w.maxConcurrent,
"active_tasks": w.activeTasks,
"uptime": time.Since(w.startTime).String(),
})
}
func (w *Worker) simulateECTask(taskID string, volumeID int) {
log.Printf("Starting EC task %s for volume %d", taskID, volumeID)
task := &Task{
ID: taskID,
Type: "erasure_coding",
VolumeID: volumeID,
Status: "running",
Progress: 0.0,
Started: time.Now(),
}
w.activeTasks[taskID] = task
// Simulate EC process phases
phases := []struct {
progress float64
phase string
duration time.Duration
}{
{5.0, "Copying volume data locally", 10 * time.Second},
{25.0, "Marking volume read-only", 2 * time.Second},
{60.0, "Performing local EC encoding", 30 * time.Second},
{70.0, "Calculating optimal shard placement", 5 * time.Second},
{90.0, "Distributing shards to servers", 20 * time.Second},
{100.0, "Verification and cleanup", 3 * time.Second},
}
go func() {
for _, phase := range phases {
if task.Status != "running" {
break
}
time.Sleep(phase.duration)
task.Progress = phase.progress
log.Printf("Task %s: %.1f%% - %s", taskID, phase.progress, phase.phase)
}
if task.Status == "running" {
task.Status = "completed"
task.Progress = 100.0
log.Printf("Task %s completed successfully", taskID)
}
// Remove from active tasks after completion
time.Sleep(5 * time.Second)
delete(w.activeTasks, taskID)
}()
}
func (w *Worker) registerWithAdmin() {
ticker := time.NewTicker(30 * time.Second)
go func() {
for {
// Register/heartbeat with admin server
log.Printf("Sending heartbeat to admin server...")
data := map[string]interface{}{
"worker_id": w.id,
"address": w.address,
"capabilities": w.capabilities,
"max_concurrent": w.maxConcurrent,
"active_tasks": len(w.activeTasks),
"status": "active",
}
jsonData, _ := json.Marshal(data)
// In real implementation, this would be a proper gRPC call
resp, err := http.Post(
fmt.Sprintf("http://%s/register-worker", w.adminAddr),
"application/json",
strings.NewReader(string(jsonData)),
)
if err != nil {
log.Printf("Failed to register with admin: %v", err)
} else {
resp.Body.Close()
log.Printf("Successfully sent heartbeat to admin")
}
// Simulate requesting new tasks
if len(w.activeTasks) < w.maxConcurrent {
// In real implementation, worker would request tasks from admin
// For simulation, we'll create some tasks periodically
if len(w.activeTasks) == 0 && time.Since(w.startTime) > 1*time.Minute {
taskID := fmt.Sprintf("%s-task-%d", w.id, time.Now().Unix())
volumeID := 2000 + int(time.Now().Unix()%1000)
w.simulateECTask(taskID, volumeID)
}
}
<-ticker.C
}
}()
}
func main() {
workerID := os.Getenv("WORKER_ID")
if workerID == "" {
workerID = "worker-1"
}
adminAddr := os.Getenv("ADMIN_ADDRESS")
if adminAddr == "" {
adminAddr = "admin:9900"
}
address := os.Getenv("WORKER_ADDRESS")
if address == "" {
address = "worker:9001"
}
capabilities := strings.Split(os.Getenv("CAPABILITIES"), ",")
if len(capabilities) == 0 || capabilities[0] == "" {
capabilities = []string{"erasure_coding"}
}
worker := &Worker{
id: workerID,
adminAddr: adminAddr,
address: address,
capabilities: capabilities,
maxConcurrent: 2,
workDir: "/work",
startTime: time.Now(),
activeTasks: make(map[string]*Task),
}
http.HandleFunc("/health", worker.healthHandler)
http.HandleFunc("/status", worker.statusHandler)
// Start registration and heartbeat
worker.registerWithAdmin()
log.Printf("Worker %s starting on address %s", workerID, address)
log.Printf("Admin address: %s", adminAddr)
log.Printf("Capabilities: %v", capabilities)
port := ":9001"
if strings.Contains(address, ":") {
parts := strings.Split(address, ":")
port = ":" + parts[1]
}
if err := http.ListenAndServe(port, nil); err != nil {
log.Fatal("Worker failed to start:", err)
}
}
EOF
# Compile and run the worker
cd /tmp
go mod init worker
go run worker.go
Loading…
Cancel
Save