Browse Source
start master, volume servers, filer
start master, volume servers, filer
Current Status ✅ Master: Healthy and running (port 9333) ✅ Filer: Healthy and running (port 8888) ✅ Volume Servers: All 6 servers running (ports 8080-8085) 🔄 Admin/Workers: Will start when dependencies are readyworker-execute-ec-tasks
14 changed files with 2551 additions and 0 deletions
-
33docker/admin_integration/Dockerfile.admin
-
44docker/admin_integration/Dockerfile.load
-
48docker/admin_integration/Dockerfile.monitor
-
33docker/admin_integration/Dockerfile.worker
-
433docker/admin_integration/EC-TESTING-README.md
-
301docker/admin_integration/Makefile
-
153docker/admin_integration/admin-entrypoint.sh
-
393docker/admin_integration/docker-compose-ec-test.yml
-
21docker/admin_integration/load-entrypoint.sh
-
352docker/admin_integration/load-generator.go
-
38docker/admin_integration/monitor-entrypoint.sh
-
366docker/admin_integration/monitor.go
-
106docker/admin_integration/run-ec-test.sh
-
230docker/admin_integration/worker-entrypoint.sh
@ -0,0 +1,33 @@ |
|||
# Final stage |
|||
FROM alpine:latest |
|||
|
|||
# Install dependencies including Go for the entrypoint script |
|||
RUN apk --no-cache add curl ca-certificates go |
|||
|
|||
WORKDIR /root/ |
|||
|
|||
# Copy admin server binary (if it exists) or create a simple one |
|||
COPY ./docker/admin_integration/admin-entrypoint.sh /entrypoint.sh |
|||
RUN chmod +x /entrypoint.sh |
|||
|
|||
# Create directories |
|||
RUN mkdir -p /data /config /work |
|||
|
|||
# Expose admin port |
|||
EXPOSE 9900 |
|||
|
|||
# Set environment variables |
|||
ENV MASTER_ADDRESS="master:9333" |
|||
ENV ADMIN_PORT="9900" |
|||
ENV SCAN_INTERVAL="30s" |
|||
ENV WORKER_TIMEOUT="5m" |
|||
ENV TASK_TIMEOUT="30m" |
|||
ENV MAX_RETRIES="3" |
|||
ENV MAX_CONCURRENT_TASKS="5" |
|||
|
|||
# Health check |
|||
HEALTHCHECK --interval=15s --timeout=5s --start-period=30s --retries=3 \ |
|||
CMD curl -f http://localhost:9900/health || exit 1 |
|||
|
|||
# Start admin server |
|||
ENTRYPOINT ["/entrypoint.sh"] |
|||
@ -0,0 +1,44 @@ |
|||
FROM golang:1.24-alpine AS builder |
|||
|
|||
# Install dependencies |
|||
RUN apk add --no-cache git build-base |
|||
|
|||
# Set working directory |
|||
WORKDIR /app |
|||
|
|||
# Copy and create load generator |
|||
COPY ./docker/admin_integration/load-generator.go . |
|||
COPY go.mod go.sum ./ |
|||
RUN go mod download |
|||
RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o load-generator load-generator.go |
|||
|
|||
# Final stage |
|||
FROM alpine:latest |
|||
|
|||
# Install dependencies |
|||
RUN apk --no-cache add curl ca-certificates openssl |
|||
|
|||
WORKDIR /root/ |
|||
|
|||
# Copy the binary |
|||
COPY --from=builder /app/load-generator . |
|||
|
|||
# Copy load generator script |
|||
COPY ./docker/admin_integration/load-entrypoint.sh /entrypoint.sh |
|||
RUN chmod +x /entrypoint.sh |
|||
|
|||
# Create directories for test data |
|||
RUN mkdir -p /test-data /temp |
|||
|
|||
# Set environment variables |
|||
ENV FILER_ADDRESS="filer:8888" |
|||
ENV MASTER_ADDRESS="master:9333" |
|||
ENV WRITE_RATE="10" |
|||
ENV DELETE_RATE="2" |
|||
ENV FILE_SIZE_MIN="1MB" |
|||
ENV FILE_SIZE_MAX="5MB" |
|||
ENV TEST_DURATION="3600" |
|||
ENV COLLECTION="" |
|||
|
|||
# Start load generator |
|||
ENTRYPOINT ["/entrypoint.sh"] |
|||
@ -0,0 +1,48 @@ |
|||
FROM golang:1.24-alpine AS builder |
|||
|
|||
# Install dependencies |
|||
RUN apk add --no-cache git build-base |
|||
|
|||
# Set working directory |
|||
WORKDIR /app |
|||
|
|||
# Copy and create monitor |
|||
COPY ./docker/admin_integration/monitor.go . |
|||
COPY go.mod go.sum ./ |
|||
RUN go mod download |
|||
RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o monitor monitor.go |
|||
|
|||
# Final stage |
|||
FROM alpine:latest |
|||
|
|||
# Install dependencies |
|||
RUN apk --no-cache add curl ca-certificates jq |
|||
|
|||
WORKDIR /root/ |
|||
|
|||
# Copy the binary |
|||
COPY --from=builder /app/monitor . |
|||
|
|||
# Copy monitor scripts |
|||
COPY ./docker/admin_integration/monitor-entrypoint.sh /entrypoint.sh |
|||
RUN chmod +x /entrypoint.sh |
|||
|
|||
# Create monitoring directories |
|||
RUN mkdir -p /monitor-data /logs |
|||
|
|||
# Expose monitor port |
|||
EXPOSE 9999 |
|||
|
|||
# Set environment variables |
|||
ENV MASTER_ADDRESS="master:9333" |
|||
ENV ADMIN_ADDRESS="admin:9900" |
|||
ENV FILER_ADDRESS="filer:8888" |
|||
ENV MONITOR_INTERVAL="10s" |
|||
ENV LOG_LEVEL="info" |
|||
|
|||
# Health check |
|||
HEALTHCHECK --interval=30s --timeout=5s --start-period=30s --retries=3 \ |
|||
CMD curl -f http://localhost:9999/health || exit 1 |
|||
|
|||
# Start monitor |
|||
ENTRYPOINT ["/entrypoint.sh"] |
|||
@ -0,0 +1,33 @@ |
|||
# Final stage |
|||
FROM alpine:latest |
|||
|
|||
# Install dependencies including Go for the entrypoint script |
|||
RUN apk --no-cache add curl ca-certificates go |
|||
|
|||
WORKDIR /root/ |
|||
|
|||
# Copy worker entrypoint script |
|||
COPY ./docker/admin_integration/worker-entrypoint.sh /entrypoint.sh |
|||
RUN chmod +x /entrypoint.sh |
|||
|
|||
# Create working directories |
|||
RUN mkdir -p /work /tmp/ec_work |
|||
|
|||
# Expose worker port |
|||
EXPOSE 9001 |
|||
|
|||
# Set environment variables |
|||
ENV ADMIN_ADDRESS="admin:9900" |
|||
ENV WORKER_ID="worker-1" |
|||
ENV WORKER_ADDRESS="worker:9001" |
|||
ENV CAPABILITIES="erasure_coding" |
|||
ENV MAX_CONCURRENT="2" |
|||
ENV WORK_DIR="/work" |
|||
ENV HEARTBEAT_INTERVAL="10s" |
|||
|
|||
# Health check |
|||
HEALTHCHECK --interval=15s --timeout=5s --start-period=30s --retries=3 \ |
|||
CMD curl -f http://localhost:9001/health || exit 1 |
|||
|
|||
# Start worker |
|||
ENTRYPOINT ["/entrypoint.sh"] |
|||
@ -0,0 +1,433 @@ |
|||
# SeaweedFS EC Worker Testing Environment |
|||
|
|||
This Docker Compose setup provides a comprehensive testing environment for SeaweedFS Erasure Coding (EC) workers with real workload simulation. |
|||
|
|||
## 📂 Directory Structure |
|||
|
|||
The testing environment is located in `docker/admin_integration/` and includes: |
|||
|
|||
``` |
|||
docker/admin_integration/ |
|||
├── Makefile # Main management interface |
|||
├── docker-compose-ec-test.yml # Docker compose configuration |
|||
├── EC-TESTING-README.md # This documentation |
|||
├── Dockerfile.admin # Admin server image |
|||
├── Dockerfile.worker # EC worker image |
|||
├── Dockerfile.load # Load generator image |
|||
├── Dockerfile.monitor # Monitor service image |
|||
├── admin-entrypoint.sh # Admin server startup script |
|||
├── worker-entrypoint.sh # Worker startup script |
|||
├── load-generator.go # Load generator source code |
|||
├── load-entrypoint.sh # Load generator startup script |
|||
├── monitor.go # Monitor service source code |
|||
└── monitor-entrypoint.sh # Monitor startup script |
|||
``` |
|||
|
|||
## 🏗️ Architecture |
|||
|
|||
The testing environment includes: |
|||
|
|||
- **1 Master Server** (port 9333) - Coordinates the cluster with 50MB volume size limit |
|||
- **6 Volume Servers** (ports 8080-8085) - Distributed across 2 data centers and 3 racks for diversity |
|||
- **1 Filer** (port 8888) - Provides file system interface |
|||
- **1 Admin Server** (port 9900) - Detects volumes needing EC and manages workers |
|||
- **3 EC Workers** - Execute erasure coding tasks with different capabilities |
|||
- **1 Load Generator** - Continuously writes and deletes files to trigger EC |
|||
- **1 Monitor** (port 9999) - Tracks cluster health and EC progress |
|||
|
|||
## 🚀 Quick Start |
|||
|
|||
### Prerequisites |
|||
|
|||
- Docker and Docker Compose installed |
|||
- GNU Make installed |
|||
- At least 4GB RAM available for containers |
|||
- Ports 8080-8085, 8888, 9333, 9900, 9999 available |
|||
|
|||
### Start the Environment |
|||
|
|||
```bash |
|||
# Navigate to the admin integration directory |
|||
cd docker/admin_integration/ |
|||
|
|||
# Show available commands |
|||
make help |
|||
|
|||
# Start the complete testing environment |
|||
make start |
|||
``` |
|||
|
|||
The `make start` command will: |
|||
1. Build all necessary Docker images |
|||
2. Start all services in the correct order |
|||
3. Wait for services to be ready |
|||
4. Display monitoring URLs and run health checks |
|||
|
|||
### Alternative Commands |
|||
|
|||
```bash |
|||
# Quick start aliases |
|||
make up # Same as 'make start' |
|||
|
|||
# Development mode (higher load for faster testing) |
|||
make dev-start |
|||
|
|||
# Build images without starting |
|||
make build |
|||
``` |
|||
|
|||
## 📋 Available Make Targets |
|||
|
|||
Run `make help` to see all available targets: |
|||
|
|||
### **🚀 Main Operations** |
|||
- `make start` - Start the complete EC testing environment |
|||
- `make stop` - Stop all services |
|||
- `make restart` - Restart all services |
|||
- `make clean` - Complete cleanup (containers, volumes, images) |
|||
|
|||
### **📊 Monitoring & Status** |
|||
- `make health` - Check health of all services |
|||
- `make status` - Show status of all containers |
|||
- `make urls` - Display all monitoring URLs |
|||
- `make monitor` - Open monitor dashboard in browser |
|||
- `make monitor-status` - Show monitor status via API |
|||
- `make volume-status` - Show volume status from master |
|||
- `make admin-status` - Show admin server status |
|||
- `make cluster-status` - Show complete cluster status |
|||
|
|||
### **📋 Logs Management** |
|||
- `make logs` - Show logs from all services |
|||
- `make logs-admin` - Show admin server logs |
|||
- `make logs-workers` - Show all worker logs |
|||
- `make logs-worker1/2/3` - Show specific worker logs |
|||
- `make logs-load` - Show load generator logs |
|||
- `make logs-monitor` - Show monitor logs |
|||
- `make backup-logs` - Backup all logs to files |
|||
|
|||
### **⚖️ Scaling & Testing** |
|||
- `make scale-workers WORKERS=5` - Scale workers to 5 instances |
|||
- `make scale-load RATE=25` - Increase load generation rate |
|||
- `make test-ec` - Run focused EC test scenario |
|||
|
|||
### **🔧 Development & Debug** |
|||
- `make shell-admin` - Open shell in admin container |
|||
- `make shell-worker1` - Open shell in worker container |
|||
- `make debug` - Show debug information |
|||
- `make troubleshoot` - Run troubleshooting checks |
|||
|
|||
## 📊 Monitoring URLs |
|||
|
|||
| Service | URL | Description | |
|||
|---------|-----|-------------| |
|||
| Master UI | http://localhost:9333 | Cluster status and topology | |
|||
| Filer | http://localhost:8888 | File operations | |
|||
| Admin Server | http://localhost:9900/status | Task management | |
|||
| Monitor | http://localhost:9999/status | Complete cluster monitoring | |
|||
| Volume Servers | http://localhost:8080-8085/status | Individual volume server stats | |
|||
|
|||
Quick access: `make urls` or `make monitor` |
|||
|
|||
## 🔄 How EC Testing Works |
|||
|
|||
### 1. Continuous Load Generation |
|||
- **Write Rate**: 10 files/second (1-5MB each) |
|||
- **Delete Rate**: 2 files/second |
|||
- **Target**: Fill volumes to 50MB limit quickly |
|||
|
|||
### 2. Volume Detection |
|||
- Admin server scans master every 30 seconds |
|||
- Identifies volumes >40MB (80% of 50MB limit) |
|||
- Queues EC tasks for eligible volumes |
|||
|
|||
### 3. EC Worker Assignment |
|||
- **Worker 1**: EC specialist (max 2 concurrent tasks) |
|||
- **Worker 2**: EC + Vacuum hybrid (max 2 concurrent tasks) |
|||
- **Worker 3**: EC + Vacuum hybrid (max 1 concurrent task) |
|||
|
|||
### 4. Comprehensive EC Process |
|||
Each EC task follows 6 phases: |
|||
1. **Copy Volume Data** (5-15%) - Stream .dat/.idx files locally |
|||
2. **Mark Read-Only** (20-25%) - Ensure data consistency |
|||
3. **Local Encoding** (30-60%) - Create 14 shards (10+4 Reed-Solomon) |
|||
4. **Calculate Placement** (65-70%) - Smart rack-aware distribution |
|||
5. **Distribute Shards** (75-90%) - Upload to optimal servers |
|||
6. **Verify & Cleanup** (95-100%) - Validate and clean temporary files |
|||
|
|||
### 5. Real-Time Monitoring |
|||
- Volume analysis and EC candidate detection |
|||
- Worker health and task progress |
|||
- No data loss verification |
|||
- Performance metrics |
|||
|
|||
## 📋 Key Features Tested |
|||
|
|||
### ✅ EC Implementation Features |
|||
- [x] Local volume data copying with progress tracking |
|||
- [x] Local Reed-Solomon encoding (10+4 shards) |
|||
- [x] Intelligent shard placement with rack awareness |
|||
- [x] Load balancing across available servers |
|||
- [x] Backup server selection for redundancy |
|||
- [x] Detailed step-by-step progress tracking |
|||
- [x] Comprehensive error handling and recovery |
|||
|
|||
### ✅ Infrastructure Features |
|||
- [x] Multi-datacenter topology (dc1, dc2) |
|||
- [x] Rack diversity (rack1, rack2, rack3) |
|||
- [x] Volume size limits (50MB) |
|||
- [x] Worker capability matching |
|||
- [x] Health monitoring and alerting |
|||
- [x] Continuous workload simulation |
|||
|
|||
## 🛠️ Common Usage Patterns |
|||
|
|||
### Basic Testing Workflow |
|||
```bash |
|||
# Start environment |
|||
make start |
|||
|
|||
# Watch progress |
|||
make monitor-status |
|||
|
|||
# Check for EC candidates |
|||
make volume-status |
|||
|
|||
# View worker activity |
|||
make logs-workers |
|||
|
|||
# Stop when done |
|||
make stop |
|||
``` |
|||
|
|||
### High-Load Testing |
|||
```bash |
|||
# Start with higher load |
|||
make dev-start |
|||
|
|||
# Scale up workers and load |
|||
make scale-workers WORKERS=5 |
|||
make scale-load RATE=50 |
|||
|
|||
# Monitor intensive EC activity |
|||
make logs-admin |
|||
``` |
|||
|
|||
### Debugging Issues |
|||
```bash |
|||
# Check port conflicts and system state |
|||
make troubleshoot |
|||
|
|||
# View specific service logs |
|||
make logs-admin |
|||
make logs-worker1 |
|||
|
|||
# Get shell access for debugging |
|||
make shell-admin |
|||
make shell-worker1 |
|||
|
|||
# Check detailed status |
|||
make debug |
|||
``` |
|||
|
|||
### Development Iteration |
|||
```bash |
|||
# Quick restart after code changes |
|||
make restart |
|||
|
|||
# Rebuild and restart |
|||
make clean |
|||
make start |
|||
|
|||
# Monitor specific components |
|||
make logs-monitor |
|||
``` |
|||
|
|||
## 📈 Expected Results |
|||
|
|||
### Successful EC Testing Shows: |
|||
1. **Volume Growth**: Steady increase in volume sizes toward 50MB limit |
|||
2. **EC Detection**: Admin server identifies volumes >40MB for EC |
|||
3. **Task Assignment**: Workers receive and execute EC tasks |
|||
4. **Shard Distribution**: 14 shards distributed across 6 volume servers |
|||
5. **No Data Loss**: All files remain accessible during and after EC |
|||
6. **Performance**: EC tasks complete within estimated timeframes |
|||
|
|||
### Sample Monitor Output: |
|||
```bash |
|||
# Check current status |
|||
make monitor-status |
|||
|
|||
# Output example: |
|||
{ |
|||
"monitor": { |
|||
"uptime": "15m30s", |
|||
"master_addr": "master:9333", |
|||
"admin_addr": "admin:9900" |
|||
}, |
|||
"stats": { |
|||
"VolumeCount": 12, |
|||
"ECTasksDetected": 3, |
|||
"WorkersActive": 3 |
|||
} |
|||
} |
|||
``` |
|||
|
|||
## 🔧 Configuration |
|||
|
|||
### Environment Variables |
|||
|
|||
You can customize the environment by setting variables: |
|||
|
|||
```bash |
|||
# High load testing |
|||
WRITE_RATE=25 DELETE_RATE=5 make start |
|||
|
|||
# Extended test duration |
|||
TEST_DURATION=7200 make start # 2 hours |
|||
``` |
|||
|
|||
### Scaling Examples |
|||
|
|||
```bash |
|||
# Scale workers |
|||
make scale-workers WORKERS=6 |
|||
|
|||
# Increase load generation |
|||
make scale-load RATE=30 |
|||
|
|||
# Combined scaling |
|||
make scale-workers WORKERS=4 |
|||
make scale-load RATE=40 |
|||
``` |
|||
|
|||
## 🧹 Cleanup Options |
|||
|
|||
```bash |
|||
# Stop services only |
|||
make stop |
|||
|
|||
# Remove containers but keep volumes |
|||
make down |
|||
|
|||
# Remove data volumes only |
|||
make clean-volumes |
|||
|
|||
# Remove built images only |
|||
make clean-images |
|||
|
|||
# Complete cleanup (everything) |
|||
make clean |
|||
``` |
|||
|
|||
## 🐛 Troubleshooting |
|||
|
|||
### Quick Diagnostics |
|||
```bash |
|||
# Run complete troubleshooting |
|||
make troubleshoot |
|||
|
|||
# Check specific components |
|||
make health |
|||
make debug |
|||
make status |
|||
``` |
|||
|
|||
### Common Issues |
|||
|
|||
**Services not starting:** |
|||
```bash |
|||
# Check port availability |
|||
make troubleshoot |
|||
|
|||
# View startup logs |
|||
make logs-master |
|||
make logs-admin |
|||
``` |
|||
|
|||
**No EC tasks being created:** |
|||
```bash |
|||
# Check volume status |
|||
make volume-status |
|||
|
|||
# Increase load to fill volumes faster |
|||
make scale-load RATE=30 |
|||
|
|||
# Check admin detection |
|||
make logs-admin |
|||
``` |
|||
|
|||
**Workers not responding:** |
|||
```bash |
|||
# Check worker registration |
|||
make admin-status |
|||
|
|||
# View worker logs |
|||
make logs-workers |
|||
|
|||
# Restart workers |
|||
make restart |
|||
``` |
|||
|
|||
### Performance Tuning |
|||
|
|||
**For faster testing:** |
|||
```bash |
|||
make dev-start # Higher default load |
|||
make scale-load RATE=50 # Very high load |
|||
``` |
|||
|
|||
**For stress testing:** |
|||
```bash |
|||
make scale-workers WORKERS=8 |
|||
make scale-load RATE=100 |
|||
``` |
|||
|
|||
## 📚 Technical Details |
|||
|
|||
### Network Architecture |
|||
- Custom bridge network (172.20.0.0/16) |
|||
- Service discovery via container names |
|||
- Health checks for all services |
|||
|
|||
### Storage Layout |
|||
- Each volume server: max 100 volumes |
|||
- Data centers: dc1, dc2 |
|||
- Racks: rack1, rack2, rack3 |
|||
- Volume limit: 50MB per volume |
|||
|
|||
### EC Algorithm |
|||
- Reed-Solomon RS(10,4) |
|||
- 10 data shards + 4 parity shards |
|||
- Rack-aware distribution |
|||
- Backup server redundancy |
|||
|
|||
### Make Integration |
|||
- Color-coded output for better readability |
|||
- Comprehensive help system (`make help`) |
|||
- Parallel execution support |
|||
- Error handling and cleanup |
|||
- Cross-platform compatibility |
|||
|
|||
## 🎯 Quick Reference |
|||
|
|||
```bash |
|||
# Essential commands |
|||
make help # Show all available targets |
|||
make start # Start complete environment |
|||
make health # Check all services |
|||
make monitor # Open dashboard |
|||
make logs-admin # View admin activity |
|||
make clean # Complete cleanup |
|||
|
|||
# Monitoring |
|||
make volume-status # Check for EC candidates |
|||
make admin-status # Check task queue |
|||
make monitor-status # Full cluster status |
|||
|
|||
# Scaling & Testing |
|||
make test-ec # Run focused EC test |
|||
make scale-load RATE=X # Increase load |
|||
make troubleshoot # Diagnose issues |
|||
``` |
|||
|
|||
This environment provides a realistic testing scenario for SeaweedFS EC workers with actual data operations, comprehensive monitoring, and easy management through Make targets. |
|||
@ -0,0 +1,301 @@ |
|||
# SeaweedFS EC Worker Testing Environment Makefile
|
|||
# Usage: make <target>
|
|||
|
|||
.PHONY: help start stop clean logs status monitor health up down restart scale docs test |
|||
|
|||
# Default target
|
|||
.DEFAULT_GOAL := help |
|||
|
|||
# Docker compose file
|
|||
COMPOSE_FILE := docker-compose-ec-test.yml |
|||
|
|||
# Color codes for output
|
|||
GREEN := \033[32m |
|||
YELLOW := \033[33m |
|||
BLUE := \033[34m |
|||
RED := \033[31m |
|||
NC := \033[0m # No Color |
|||
|
|||
help: ## Show this help message
|
|||
@echo "$(BLUE)🧪 SeaweedFS EC Worker Testing Environment$(NC)" |
|||
@echo "$(BLUE)===========================================$(NC)" |
|||
@echo "" |
|||
@echo "$(YELLOW)Available targets:$(NC)" |
|||
@awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf " $(GREEN)%-15s$(NC) %s\n", $$1, $$2}' $(MAKEFILE_LIST) |
|||
@echo "" |
|||
@echo "$(YELLOW)Quick start:$(NC) make start" |
|||
@echo "$(YELLOW)Monitor:$(NC) make monitor" |
|||
@echo "$(YELLOW)Cleanup:$(NC) make clean" |
|||
|
|||
start: ## Start the complete EC testing environment
|
|||
@echo "$(GREEN)🚀 Starting SeaweedFS EC testing environment...$(NC)" |
|||
@echo "$(BLUE)This will start:$(NC)" |
|||
@echo " • 1 Master server (port 9333)" |
|||
@echo " • 6 Volume servers (ports 8080-8085) with 50MB volume limit" |
|||
@echo " • 1 Filer (port 8888)" |
|||
@echo " • 1 Admin server (port 9900)" |
|||
@echo " • 3 EC Workers" |
|||
@echo " • 1 Load generator (continuous read/write)" |
|||
@echo " • 1 Monitor (port 9999)" |
|||
@echo "" |
|||
@mkdir -p monitor-data admin-config |
|||
@chmod +x *.sh 2>/dev/null || true |
|||
@docker-compose -f $(COMPOSE_FILE) down -v 2>/dev/null || true |
|||
@docker-compose -f $(COMPOSE_FILE) up --build -d |
|||
@echo "" |
|||
@echo "$(GREEN)✅ Environment started successfully!$(NC)" |
|||
@echo "" |
|||
@$(MAKE) urls |
|||
@echo "" |
|||
@echo "$(YELLOW)⏳ Waiting for services to be ready...$(NC)" |
|||
@sleep 10 |
|||
@$(MAKE) health |
|||
|
|||
stop: ## Stop all services
|
|||
@echo "$(YELLOW)🛑 Stopping all services...$(NC)" |
|||
@docker-compose -f $(COMPOSE_FILE) stop |
|||
@echo "$(GREEN)✅ All services stopped$(NC)" |
|||
|
|||
down: ## Stop and remove all containers
|
|||
@echo "$(YELLOW)🛑 Stopping and removing containers...$(NC)" |
|||
@docker-compose -f $(COMPOSE_FILE) down |
|||
@echo "$(GREEN)✅ Containers stopped and removed$(NC)" |
|||
|
|||
clean: ## Stop and remove all containers, networks, volumes, and images
|
|||
@echo "$(RED)🧹 Cleaning up entire environment...$(NC)" |
|||
@docker-compose -f $(COMPOSE_FILE) down -v --rmi all 2>/dev/null || true |
|||
@docker system prune -f |
|||
@echo "$(GREEN)✅ Environment cleaned up$(NC)" |
|||
|
|||
restart: ## Restart all services
|
|||
@echo "$(YELLOW)🔄 Restarting all services...$(NC)" |
|||
@docker-compose -f $(COMPOSE_FILE) restart |
|||
@echo "$(GREEN)✅ All services restarted$(NC)" |
|||
|
|||
up: start ## Alias for start
|
|||
|
|||
status: ## Show status of all services
|
|||
@echo "$(BLUE)📊 Service Status:$(NC)" |
|||
@docker-compose -f $(COMPOSE_FILE) ps |
|||
|
|||
logs: ## Show logs from all services
|
|||
@echo "$(BLUE)📋 Showing logs from all services (Ctrl+C to exit):$(NC)" |
|||
@docker-compose -f $(COMPOSE_FILE) logs -f |
|||
|
|||
logs-admin: ## Show admin server logs
|
|||
@echo "$(BLUE)📋 Admin Server Logs:$(NC)" |
|||
@docker-compose -f $(COMPOSE_FILE) logs -f admin |
|||
|
|||
logs-workers: ## Show all worker logs
|
|||
@echo "$(BLUE)📋 Worker Logs:$(NC)" |
|||
@docker-compose -f $(COMPOSE_FILE) logs -f worker1 worker2 worker3 |
|||
|
|||
logs-worker1: ## Show worker1 logs
|
|||
@docker-compose -f $(COMPOSE_FILE) logs -f worker1 |
|||
|
|||
logs-worker2: ## Show worker2 logs
|
|||
@docker-compose -f $(COMPOSE_FILE) logs -f worker2 |
|||
|
|||
logs-worker3: ## Show worker3 logs
|
|||
@docker-compose -f $(COMPOSE_FILE) logs -f worker3 |
|||
|
|||
logs-load: ## Show load generator logs
|
|||
@echo "$(BLUE)📋 Load Generator Logs:$(NC)" |
|||
@docker-compose -f $(COMPOSE_FILE) logs -f load_generator |
|||
|
|||
logs-monitor: ## Show monitor logs
|
|||
@echo "$(BLUE)📋 Monitor Logs:$(NC)" |
|||
@docker-compose -f $(COMPOSE_FILE) logs -f monitor |
|||
|
|||
logs-master: ## Show master logs
|
|||
@docker-compose -f $(COMPOSE_FILE) logs -f master |
|||
|
|||
logs-volumes: ## Show all volume server logs
|
|||
@echo "$(BLUE)📋 Volume Server Logs:$(NC)" |
|||
@docker-compose -f $(COMPOSE_FILE) logs -f volume1 volume2 volume3 volume4 volume5 volume6 |
|||
|
|||
urls: ## Show monitoring URLs
|
|||
@echo "$(BLUE)📊 Monitoring URLs:$(NC)" |
|||
@echo " • Master UI: http://localhost:9333" |
|||
@echo " • Filer: http://localhost:8888" |
|||
@echo " • Admin Server: http://localhost:9900/status" |
|||
@echo " • Monitor: http://localhost:9999/status" |
|||
@echo "" |
|||
@echo "$(BLUE)📈 Volume Servers:$(NC)" |
|||
@echo " • Volume1: http://localhost:8080/status" |
|||
@echo " • Volume2: http://localhost:8081/status" |
|||
@echo " • Volume3: http://localhost:8082/status" |
|||
@echo " • Volume4: http://localhost:8083/status" |
|||
@echo " • Volume5: http://localhost:8084/status" |
|||
@echo " • Volume6: http://localhost:8085/status" |
|||
|
|||
health: ## Check health of all services
|
|||
@echo "$(BLUE)🔍 Checking service health...$(NC)" |
|||
@echo -n " Master: "; \
|
|||
if curl -s http://localhost:9333/cluster/status > /dev/null 2>&1; then \
|
|||
echo "$(GREEN)✅ Healthy$(NC)"; \
|
|||
else \
|
|||
echo "$(RED)❌ Not responding$(NC)"; \
|
|||
fi |
|||
@echo -n " Filer: "; \
|
|||
if curl -s http://localhost:8888/ > /dev/null 2>&1; then \
|
|||
echo "$(GREEN)✅ Healthy$(NC)"; \
|
|||
else \
|
|||
echo "$(RED)❌ Not responding$(NC)"; \
|
|||
fi |
|||
@echo -n " Admin: "; \
|
|||
if curl -s http://localhost:9900/health > /dev/null 2>&1; then \
|
|||
echo "$(GREEN)✅ Healthy$(NC)"; \
|
|||
else \
|
|||
echo "$(RED)❌ Not responding$(NC)"; \
|
|||
fi |
|||
@echo -n " Monitor: "; \
|
|||
if curl -s http://localhost:9999/health > /dev/null 2>&1; then \
|
|||
echo "$(GREEN)✅ Healthy$(NC)"; \
|
|||
else \
|
|||
echo "$(RED)❌ Not responding$(NC)"; \
|
|||
fi |
|||
|
|||
monitor: ## Open monitor dashboard in browser
|
|||
@echo "$(BLUE)📊 Opening monitor dashboard...$(NC)" |
|||
@echo "Monitor URL: http://localhost:9999/status" |
|||
@command -v open >/dev/null 2>&1 && open http://localhost:9999/status || \
|
|||
command -v xdg-open >/dev/null 2>&1 && xdg-open http://localhost:9999/status || \
|
|||
echo "Please open http://localhost:9999/status in your browser" |
|||
|
|||
monitor-status: ## Show current monitoring status via API
|
|||
@echo "$(BLUE)📊 Current Monitor Status:$(NC)" |
|||
@curl -s http://localhost:9999/status | jq . 2>/dev/null || \
|
|||
curl -s http://localhost:9999/status 2>/dev/null || \
|
|||
echo "Monitor not available" |
|||
|
|||
volume-status: ## Show volume status from master
|
|||
@echo "$(BLUE)💾 Volume Status:$(NC)" |
|||
@curl -s http://localhost:9333/vol/status | jq . 2>/dev/null || \
|
|||
curl -s http://localhost:9333/vol/status 2>/dev/null || \
|
|||
echo "Master not available" |
|||
|
|||
admin-status: ## Show admin server status
|
|||
@echo "$(BLUE)🏭 Admin Server Status:$(NC)" |
|||
@curl -s http://localhost:9900/status | jq . 2>/dev/null || \
|
|||
curl -s http://localhost:9900/status 2>/dev/null || \
|
|||
echo "Admin server not available" |
|||
|
|||
cluster-status: ## Show complete cluster status
|
|||
@echo "$(BLUE)🌐 Cluster Status:$(NC)" |
|||
@curl -s http://localhost:9333/cluster/status | jq . 2>/dev/null || \
|
|||
curl -s http://localhost:9333/cluster/status 2>/dev/null || \
|
|||
echo "Master not available" |
|||
|
|||
scale-workers: ## Scale workers (usage: make scale-workers WORKERS=5)
|
|||
@echo "$(YELLOW)⚖️ Scaling workers to $(or $(WORKERS),3)...$(NC)" |
|||
@docker-compose -f $(COMPOSE_FILE) up -d --scale worker2=$(or $(WORKERS),3) |
|||
|
|||
scale-load: ## Restart load generator with higher rate (usage: make scale-load RATE=20)
|
|||
@echo "$(YELLOW)📈 Scaling load generation to $(or $(RATE),20) files/sec...$(NC)" |
|||
@docker-compose -f $(COMPOSE_FILE) stop load_generator |
|||
@docker-compose -f $(COMPOSE_FILE) run -d --name temp_load_generator \
|
|||
-e WRITE_RATE=$(or $(RATE),20) -e DELETE_RATE=$(or $(shell expr $(or $(RATE),20) / 4),5) \
|
|||
load_generator |
|||
@echo "$(GREEN)✅ Load generator restarted with higher rate$(NC)" |
|||
|
|||
test-ec: ## Run a focused EC test scenario
|
|||
@echo "$(YELLOW)🧪 Running focused EC test...$(NC)" |
|||
@$(MAKE) scale-load RATE=25 |
|||
@echo "$(BLUE)Monitoring EC detection...$(NC)" |
|||
@echo "Watch for volumes >40MB that trigger EC conversion" |
|||
@echo "Monitor at: http://localhost:9999/status" |
|||
|
|||
shell-admin: ## Open shell in admin container
|
|||
@docker-compose -f $(COMPOSE_FILE) exec admin /bin/sh |
|||
|
|||
shell-worker1: ## Open shell in worker1 container
|
|||
@docker-compose -f $(COMPOSE_FILE) exec worker1 /bin/sh |
|||
|
|||
shell-master: ## Open shell in master container
|
|||
@docker-compose -f $(COMPOSE_FILE) exec master /bin/sh |
|||
|
|||
docs: ## Show documentation
|
|||
@echo "$(BLUE)📖 EC Testing Documentation:$(NC)" |
|||
@echo "" |
|||
@cat EC-TESTING-README.md |
|||
|
|||
build: ## Build all Docker images without starting
|
|||
@echo "$(YELLOW)🔨 Building all Docker images...$(NC)" |
|||
@docker-compose -f $(COMPOSE_FILE) build |
|||
@echo "$(GREEN)✅ All images built$(NC)" |
|||
|
|||
pull: ## Pull latest SeaweedFS image
|
|||
@echo "$(YELLOW)📥 Pulling latest SeaweedFS image...$(NC)" |
|||
@docker pull chrislusf/seaweedfs:latest |
|||
@echo "$(GREEN)✅ Latest image pulled$(NC)" |
|||
|
|||
debug: ## Show debug information
|
|||
@echo "$(BLUE)🔍 Debug Information:$(NC)" |
|||
@echo "" |
|||
@echo "$(YELLOW)Docker Compose Version:$(NC)" |
|||
@docker-compose --version |
|||
@echo "" |
|||
@echo "$(YELLOW)Docker Version:$(NC)" |
|||
@docker --version |
|||
@echo "" |
|||
@echo "$(YELLOW)Current Directory:$(NC)" |
|||
@pwd |
|||
@echo "" |
|||
@echo "$(YELLOW)Available Files:$(NC)" |
|||
@ls -la *.yml *.sh *.md 2>/dev/null || echo "No config files found" |
|||
@echo "" |
|||
@echo "$(YELLOW)Running Containers:$(NC)" |
|||
@docker ps --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}" |
|||
|
|||
# Targets for development and testing
|
|||
dev-start: ## Start with development settings (faster iteration)
|
|||
@echo "$(YELLOW)🛠️ Starting development environment...$(NC)" |
|||
@mkdir -p monitor-data admin-config |
|||
@WRITE_RATE=50 DELETE_RATE=10 docker-compose -f $(COMPOSE_FILE) up --build -d |
|||
@echo "$(GREEN)✅ Development environment started with high load$(NC)" |
|||
|
|||
dev-stop: stop ## Stop development environment
|
|||
|
|||
# Clean specific components
|
|||
clean-volumes: ## Remove only data volumes
|
|||
@echo "$(YELLOW)🗄️ Removing data volumes...$(NC)" |
|||
@docker-compose -f $(COMPOSE_FILE) down -v |
|||
@echo "$(GREEN)✅ Data volumes removed$(NC)" |
|||
|
|||
clean-images: ## Remove built images
|
|||
@echo "$(YELLOW)🖼️ Removing built images...$(NC)" |
|||
@docker-compose -f $(COMPOSE_FILE) down --rmi local |
|||
@echo "$(GREEN)✅ Built images removed$(NC)" |
|||
|
|||
# Backup and restore
|
|||
backup-logs: ## Backup all service logs
|
|||
@echo "$(YELLOW)💾 Backing up service logs...$(NC)" |
|||
@mkdir -p logs-backup |
|||
@docker-compose -f $(COMPOSE_FILE) logs admin > logs-backup/admin.log 2>&1 |
|||
@docker-compose -f $(COMPOSE_FILE) logs worker1 > logs-backup/worker1.log 2>&1 |
|||
@docker-compose -f $(COMPOSE_FILE) logs worker2 > logs-backup/worker2.log 2>&1 |
|||
@docker-compose -f $(COMPOSE_FILE) logs worker3 > logs-backup/worker3.log 2>&1 |
|||
@docker-compose -f $(COMPOSE_FILE) logs monitor > logs-backup/monitor.log 2>&1 |
|||
@docker-compose -f $(COMPOSE_FILE) logs load_generator > logs-backup/load_generator.log 2>&1 |
|||
@echo "$(GREEN)✅ Logs backed up to logs-backup/$(NC)" |
|||
|
|||
# Quick troubleshooting
|
|||
troubleshoot: ## Run troubleshooting checks
|
|||
@echo "$(BLUE)🔧 Running troubleshooting checks...$(NC)" |
|||
@echo "" |
|||
@echo "$(YELLOW)1. Checking required ports:$(NC)" |
|||
@for port in 9333 8888 9900 9999 8080 8081 8082 8083 8084 8085; do \
|
|||
echo -n " Port $$port: "; \
|
|||
if lsof -i :$$port >/dev/null 2>&1; then \
|
|||
echo "$(RED)❌ In use$(NC)"; \
|
|||
else \
|
|||
echo "$(GREEN)✅ Available$(NC)"; \
|
|||
fi; \
|
|||
done |
|||
@echo "" |
|||
@echo "$(YELLOW)2. Docker resources:$(NC)" |
|||
@docker system df |
|||
@echo "" |
|||
@echo "$(YELLOW)3. Service health:$(NC)" |
|||
@$(MAKE) health |
|||
@ -0,0 +1,153 @@ |
|||
#!/bin/sh |
|||
|
|||
set -e |
|||
|
|||
echo "Starting SeaweedFS Admin Server..." |
|||
echo "Master Address: $MASTER_ADDRESS" |
|||
echo "Admin Port: $ADMIN_PORT" |
|||
echo "Scan Interval: $SCAN_INTERVAL" |
|||
|
|||
# Wait for master to be ready |
|||
echo "Waiting for master to be ready..." |
|||
until curl -f http://$MASTER_ADDRESS/cluster/status > /dev/null 2>&1; do |
|||
echo "Master not ready, waiting..." |
|||
sleep 5 |
|||
done |
|||
echo "Master is ready!" |
|||
|
|||
# For now, use a simple HTTP server to simulate admin functionality |
|||
# In a real implementation, this would start the actual admin server |
|||
cat > /tmp/admin_server.go << 'EOF' |
|||
package main |
|||
|
|||
import ( |
|||
"encoding/json" |
|||
"fmt" |
|||
"log" |
|||
"net/http" |
|||
"os" |
|||
"strconv" |
|||
"time" |
|||
) |
|||
|
|||
type AdminServer struct { |
|||
masterAddr string |
|||
port string |
|||
startTime time.Time |
|||
tasks []Task |
|||
workers []Worker |
|||
} |
|||
|
|||
type Task struct { |
|||
ID string `json:"id"` |
|||
Type string `json:"type"` |
|||
VolumeID int `json:"volume_id"` |
|||
Status string `json:"status"` |
|||
Progress float64 `json:"progress"` |
|||
Created time.Time `json:"created"` |
|||
} |
|||
|
|||
type Worker struct { |
|||
ID string `json:"id"` |
|||
Address string `json:"address"` |
|||
Capabilities []string `json:"capabilities"` |
|||
Status string `json:"status"` |
|||
LastSeen time.Time `json:"last_seen"` |
|||
} |
|||
|
|||
func (s *AdminServer) healthHandler(w http.ResponseWriter, r *http.Request) { |
|||
w.Header().Set("Content-Type", "application/json") |
|||
json.NewEncoder(w).Encode(map[string]interface{}{ |
|||
"status": "healthy", |
|||
"uptime": time.Since(s.startTime).String(), |
|||
"tasks": len(s.tasks), |
|||
"workers": len(s.workers), |
|||
}) |
|||
} |
|||
|
|||
func (s *AdminServer) statusHandler(w http.ResponseWriter, r *http.Request) { |
|||
w.Header().Set("Content-Type", "application/json") |
|||
json.NewEncoder(w).Encode(map[string]interface{}{ |
|||
"admin_server": "running", |
|||
"master_addr": s.masterAddr, |
|||
"tasks": s.tasks, |
|||
"workers": s.workers, |
|||
"uptime": time.Since(s.startTime).String(), |
|||
}) |
|||
} |
|||
|
|||
func (s *AdminServer) detectVolumesForEC() { |
|||
// Simulate volume detection logic |
|||
// In real implementation, this would query the master for volume status |
|||
ticker := time.NewTicker(30 * time.Second) |
|||
go func() { |
|||
for range ticker.C { |
|||
log.Println("Scanning for volumes requiring EC...") |
|||
|
|||
// Check master for volume status |
|||
resp, err := http.Get(fmt.Sprintf("http://%s/vol/status", s.masterAddr)) |
|||
if err != nil { |
|||
log.Printf("Error checking master: %v", err) |
|||
continue |
|||
} |
|||
resp.Body.Close() |
|||
|
|||
// Simulate detecting a volume that needs EC |
|||
if len(s.tasks) < 5 { // Don't create too many tasks |
|||
taskID := fmt.Sprintf("ec-task-%d", len(s.tasks)+1) |
|||
volumeID := 1000 + len(s.tasks) |
|||
|
|||
task := Task{ |
|||
ID: taskID, |
|||
Type: "erasure_coding", |
|||
VolumeID: volumeID, |
|||
Status: "pending", |
|||
Progress: 0.0, |
|||
Created: time.Now(), |
|||
} |
|||
|
|||
s.tasks = append(s.tasks, task) |
|||
log.Printf("Created EC task %s for volume %d", taskID, volumeID) |
|||
} |
|||
} |
|||
}() |
|||
} |
|||
|
|||
func main() { |
|||
masterAddr := os.Getenv("MASTER_ADDRESS") |
|||
if masterAddr == "" { |
|||
masterAddr = "master:9333" |
|||
} |
|||
|
|||
port := os.Getenv("ADMIN_PORT") |
|||
if port == "" { |
|||
port = "9900" |
|||
} |
|||
|
|||
server := &AdminServer{ |
|||
masterAddr: masterAddr, |
|||
port: port, |
|||
startTime: time.Now(), |
|||
tasks: make([]Task, 0), |
|||
workers: make([]Worker, 0), |
|||
} |
|||
|
|||
http.HandleFunc("/health", server.healthHandler) |
|||
http.HandleFunc("/status", server.statusHandler) |
|||
|
|||
// Start volume detection |
|||
server.detectVolumesForEC() |
|||
|
|||
log.Printf("Admin server starting on port %s", port) |
|||
log.Printf("Master address: %s", masterAddr) |
|||
|
|||
if err := http.ListenAndServe(":"+port, nil); err != nil { |
|||
log.Fatal("Server failed to start:", err) |
|||
} |
|||
} |
|||
EOF |
|||
|
|||
# Compile and run the admin server |
|||
cd /tmp |
|||
go mod init admin-server |
|||
go run admin_server.go |
|||
@ -0,0 +1,393 @@ |
|||
services: |
|||
# Master server - coordinates the cluster |
|||
master: |
|||
image: chrislusf/seaweedfs:latest |
|||
container_name: seaweed-master |
|||
ports: |
|||
- "9333:9333" |
|||
- "19333:19333" |
|||
command: > |
|||
master |
|||
-ip=master |
|||
-port=9333 |
|||
-volumeSizeLimitMB=50 |
|||
-defaultReplication=001 |
|||
volumes: |
|||
- master_data:/data |
|||
networks: |
|||
- seaweed_net |
|||
healthcheck: |
|||
test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://master:9333/cluster/status"] |
|||
interval: 10s |
|||
timeout: 5s |
|||
retries: 3 |
|||
|
|||
# Volume Server 1 |
|||
volume1: |
|||
image: chrislusf/seaweedfs:latest |
|||
container_name: seaweed-volume1 |
|||
ports: |
|||
- "8080:8080" |
|||
- "18080:18080" |
|||
command: > |
|||
volume |
|||
-mserver=master:9333 |
|||
-ip=volume1 |
|||
-port=8080 |
|||
-dir=/data |
|||
-max=100 |
|||
-dataCenter=dc1 |
|||
-rack=rack1 |
|||
volumes: |
|||
- volume1_data:/data |
|||
depends_on: |
|||
master: |
|||
condition: service_healthy |
|||
networks: |
|||
- seaweed_net |
|||
healthcheck: |
|||
test: ["CMD", "curl", "-f", "http://localhost:8080/status"] |
|||
interval: 10s |
|||
timeout: 5s |
|||
retries: 3 |
|||
|
|||
# Volume Server 2 |
|||
volume2: |
|||
image: chrislusf/seaweedfs:latest |
|||
container_name: seaweed-volume2 |
|||
ports: |
|||
- "8081:8080" |
|||
- "18081:18080" |
|||
command: > |
|||
volume |
|||
-mserver=master:9333 |
|||
-ip=volume2 |
|||
-port=8080 |
|||
-dir=/data |
|||
-max=100 |
|||
-dataCenter=dc1 |
|||
-rack=rack1 |
|||
volumes: |
|||
- volume2_data:/data |
|||
depends_on: |
|||
master: |
|||
condition: service_healthy |
|||
networks: |
|||
- seaweed_net |
|||
healthcheck: |
|||
test: ["CMD", "curl", "-f", "http://localhost:8080/status"] |
|||
interval: 10s |
|||
timeout: 5s |
|||
retries: 3 |
|||
|
|||
# Volume Server 3 |
|||
volume3: |
|||
image: chrislusf/seaweedfs:latest |
|||
container_name: seaweed-volume3 |
|||
ports: |
|||
- "8082:8080" |
|||
- "18082:18080" |
|||
command: > |
|||
volume |
|||
-mserver=master:9333 |
|||
-ip=volume3 |
|||
-port=8080 |
|||
-dir=/data |
|||
-max=100 |
|||
-dataCenter=dc1 |
|||
-rack=rack2 |
|||
volumes: |
|||
- volume3_data:/data |
|||
depends_on: |
|||
master: |
|||
condition: service_healthy |
|||
networks: |
|||
- seaweed_net |
|||
healthcheck: |
|||
test: ["CMD", "curl", "-f", "http://localhost:8080/status"] |
|||
interval: 10s |
|||
timeout: 5s |
|||
retries: 3 |
|||
|
|||
# Volume Server 4 |
|||
volume4: |
|||
image: chrislusf/seaweedfs:latest |
|||
container_name: seaweed-volume4 |
|||
ports: |
|||
- "8083:8080" |
|||
- "18083:18080" |
|||
command: > |
|||
volume |
|||
-mserver=master:9333 |
|||
-ip=volume4 |
|||
-port=8080 |
|||
-dir=/data |
|||
-max=100 |
|||
-dataCenter=dc2 |
|||
-rack=rack1 |
|||
volumes: |
|||
- volume4_data:/data |
|||
depends_on: |
|||
master: |
|||
condition: service_healthy |
|||
networks: |
|||
- seaweed_net |
|||
healthcheck: |
|||
test: ["CMD", "curl", "-f", "http://localhost:8080/status"] |
|||
interval: 10s |
|||
timeout: 5s |
|||
retries: 3 |
|||
|
|||
# Volume Server 5 |
|||
volume5: |
|||
image: chrislusf/seaweedfs:latest |
|||
container_name: seaweed-volume5 |
|||
ports: |
|||
- "8084:8080" |
|||
- "18084:18080" |
|||
command: > |
|||
volume |
|||
-mserver=master:9333 |
|||
-ip=volume5 |
|||
-port=8080 |
|||
-dir=/data |
|||
-max=100 |
|||
-dataCenter=dc2 |
|||
-rack=rack2 |
|||
volumes: |
|||
- volume5_data:/data |
|||
depends_on: |
|||
master: |
|||
condition: service_healthy |
|||
networks: |
|||
- seaweed_net |
|||
healthcheck: |
|||
test: ["CMD", "curl", "-f", "http://localhost:8080/status"] |
|||
interval: 10s |
|||
timeout: 5s |
|||
retries: 3 |
|||
|
|||
# Volume Server 6 |
|||
volume6: |
|||
image: chrislusf/seaweedfs:latest |
|||
container_name: seaweed-volume6 |
|||
ports: |
|||
- "8085:8080" |
|||
- "18085:18080" |
|||
command: > |
|||
volume |
|||
-mserver=master:9333 |
|||
-ip=volume6 |
|||
-port=8080 |
|||
-dir=/data |
|||
-max=100 |
|||
-dataCenter=dc2 |
|||
-rack=rack3 |
|||
volumes: |
|||
- volume6_data:/data |
|||
depends_on: |
|||
master: |
|||
condition: service_healthy |
|||
networks: |
|||
- seaweed_net |
|||
healthcheck: |
|||
test: ["CMD", "curl", "-f", "http://localhost:8080/status"] |
|||
interval: 10s |
|||
timeout: 5s |
|||
retries: 3 |
|||
|
|||
# Filer for easier data access |
|||
filer: |
|||
image: chrislusf/seaweedfs:latest |
|||
container_name: seaweed-filer |
|||
ports: |
|||
- "8888:8888" |
|||
- "18888:18888" |
|||
command: > |
|||
filer |
|||
-master=master:9333 |
|||
-ip=filer |
|||
-port=8888 |
|||
depends_on: |
|||
master: |
|||
condition: service_healthy |
|||
networks: |
|||
- seaweed_net |
|||
healthcheck: |
|||
test: ["CMD", "curl", "-f", "http://localhost:8888/"] |
|||
interval: 10s |
|||
timeout: 5s |
|||
retries: 3 |
|||
|
|||
# Admin Server - manages EC tasks |
|||
admin: |
|||
build: |
|||
context: ../../ |
|||
dockerfile: docker/admin_integration/Dockerfile.admin |
|||
container_name: seaweed-admin |
|||
ports: |
|||
- "9900:9900" |
|||
environment: |
|||
- MASTER_ADDRESS=master:9333 |
|||
- ADMIN_PORT=9900 |
|||
- SCAN_INTERVAL=30s |
|||
- WORKER_TIMEOUT=5m |
|||
- TASK_TIMEOUT=30m |
|||
- MAX_RETRIES=3 |
|||
- MAX_CONCURRENT_TASKS=5 |
|||
volumes: |
|||
- admin_data:/data |
|||
- ./admin-config:/config |
|||
depends_on: |
|||
master: |
|||
condition: service_healthy |
|||
filer: |
|||
condition: service_healthy |
|||
networks: |
|||
- seaweed_net |
|||
healthcheck: |
|||
test: ["CMD", "curl", "-f", "http://localhost:9900/health"] |
|||
interval: 15s |
|||
timeout: 5s |
|||
retries: 3 |
|||
|
|||
# EC Worker 1 |
|||
worker1: |
|||
build: |
|||
context: ../../ |
|||
dockerfile: docker/admin_integration/Dockerfile.worker |
|||
container_name: seaweed-worker1 |
|||
environment: |
|||
- ADMIN_ADDRESS=admin:9900 |
|||
- WORKER_ID=worker-1 |
|||
- WORKER_ADDRESS=worker1:9001 |
|||
- CAPABILITIES=erasure_coding |
|||
- MAX_CONCURRENT=2 |
|||
- WORK_DIR=/work |
|||
volumes: |
|||
- worker1_data:/work |
|||
depends_on: |
|||
admin: |
|||
condition: service_healthy |
|||
networks: |
|||
- seaweed_net |
|||
healthcheck: |
|||
test: ["CMD", "curl", "-f", "http://localhost:9001/health"] |
|||
interval: 15s |
|||
timeout: 5s |
|||
retries: 3 |
|||
|
|||
# EC Worker 2 |
|||
worker2: |
|||
build: |
|||
context: ../../ |
|||
dockerfile: docker/admin_integration/Dockerfile.worker |
|||
container_name: seaweed-worker2 |
|||
environment: |
|||
- ADMIN_ADDRESS=admin:9900 |
|||
- WORKER_ID=worker-2 |
|||
- WORKER_ADDRESS=worker2:9001 |
|||
- CAPABILITIES=erasure_coding,vacuum |
|||
- MAX_CONCURRENT=2 |
|||
- WORK_DIR=/work |
|||
volumes: |
|||
- worker2_data:/work |
|||
depends_on: |
|||
admin: |
|||
condition: service_healthy |
|||
networks: |
|||
- seaweed_net |
|||
healthcheck: |
|||
test: ["CMD", "curl", "-f", "http://localhost:9001/health"] |
|||
interval: 15s |
|||
timeout: 5s |
|||
retries: 3 |
|||
|
|||
# EC Worker 3 |
|||
worker3: |
|||
build: |
|||
context: ../../ |
|||
dockerfile: docker/admin_integration/Dockerfile.worker |
|||
container_name: seaweed-worker3 |
|||
environment: |
|||
- ADMIN_ADDRESS=admin:9900 |
|||
- WORKER_ID=worker-3 |
|||
- WORKER_ADDRESS=worker3:9001 |
|||
- CAPABILITIES=erasure_coding,vacuum |
|||
- MAX_CONCURRENT=1 |
|||
- WORK_DIR=/work |
|||
volumes: |
|||
- worker3_data:/work |
|||
depends_on: |
|||
admin: |
|||
condition: service_healthy |
|||
networks: |
|||
- seaweed_net |
|||
healthcheck: |
|||
test: ["CMD", "curl", "-f", "http://localhost:9001/health"] |
|||
interval: 15s |
|||
timeout: 5s |
|||
retries: 3 |
|||
|
|||
# Continuous Load Generator |
|||
load_generator: |
|||
build: |
|||
context: ../../ |
|||
dockerfile: docker/admin_integration/Dockerfile.load |
|||
container_name: seaweed-load |
|||
environment: |
|||
- FILER_ADDRESS=filer:8888 |
|||
- MASTER_ADDRESS=master:9333 |
|||
- WRITE_RATE=10 # files per second |
|||
- DELETE_RATE=2 # files per second |
|||
- FILE_SIZE_MIN=1MB |
|||
- FILE_SIZE_MAX=5MB |
|||
- TEST_DURATION=3600 # 1 hour |
|||
depends_on: |
|||
filer: |
|||
condition: service_healthy |
|||
admin: |
|||
condition: service_healthy |
|||
networks: |
|||
- seaweed_net |
|||
|
|||
# Monitoring and Health Check |
|||
monitor: |
|||
build: |
|||
context: ../../ |
|||
dockerfile: docker/admin_integration/Dockerfile.monitor |
|||
container_name: seaweed-monitor |
|||
ports: |
|||
- "9999:9999" |
|||
environment: |
|||
- MASTER_ADDRESS=master:9333 |
|||
- ADMIN_ADDRESS=admin:9900 |
|||
- FILER_ADDRESS=filer:8888 |
|||
- MONITOR_INTERVAL=10s |
|||
depends_on: |
|||
admin: |
|||
condition: service_healthy |
|||
networks: |
|||
- seaweed_net |
|||
volumes: |
|||
- ./monitor-data:/monitor-data |
|||
|
|||
volumes: |
|||
master_data: |
|||
volume1_data: |
|||
volume2_data: |
|||
volume3_data: |
|||
volume4_data: |
|||
volume5_data: |
|||
volume6_data: |
|||
admin_data: |
|||
worker1_data: |
|||
worker2_data: |
|||
worker3_data: |
|||
|
|||
networks: |
|||
seaweed_net: |
|||
driver: bridge |
|||
ipam: |
|||
config: |
|||
- subnet: 172.20.0.0/16 |
|||
@ -0,0 +1,21 @@ |
|||
#!/bin/sh |
|||
|
|||
set -e |
|||
|
|||
echo "Starting Load Generator..." |
|||
echo "Filer Address: $FILER_ADDRESS" |
|||
echo "Write Rate: $WRITE_RATE files/sec" |
|||
echo "Delete Rate: $DELETE_RATE files/sec" |
|||
echo "File Size Range: $FILE_SIZE_MIN - $FILE_SIZE_MAX" |
|||
echo "Test Duration: $TEST_DURATION seconds" |
|||
|
|||
# Wait for filer to be ready |
|||
echo "Waiting for filer to be ready..." |
|||
until curl -f http://$FILER_ADDRESS/ > /dev/null 2>&1; do |
|||
echo "Filer not ready, waiting..." |
|||
sleep 5 |
|||
done |
|||
echo "Filer is ready!" |
|||
|
|||
# Start the load generator |
|||
exec ./load-generator |
|||
@ -0,0 +1,352 @@ |
|||
package main |
|||
|
|||
import ( |
|||
"bytes" |
|||
"crypto/rand" |
|||
"fmt" |
|||
"io" |
|||
"log" |
|||
"net/http" |
|||
"os" |
|||
"strconv" |
|||
"strings" |
|||
"sync" |
|||
"time" |
|||
) |
|||
|
|||
type LoadGenerator struct { |
|||
filerAddr string |
|||
masterAddr string |
|||
writeRate int |
|||
deleteRate int |
|||
fileSizeMin int64 |
|||
fileSizeMax int64 |
|||
testDuration int |
|||
collection string |
|||
|
|||
// State tracking
|
|||
createdFiles []string |
|||
mutex sync.RWMutex |
|||
stats LoadStats |
|||
} |
|||
|
|||
type LoadStats struct { |
|||
FilesWritten int64 |
|||
FilesDeleted int64 |
|||
BytesWritten int64 |
|||
Errors int64 |
|||
StartTime time.Time |
|||
LastOperation time.Time |
|||
} |
|||
|
|||
// parseSize converts size strings like "1MB", "5MB" to bytes
|
|||
func parseSize(sizeStr string) int64 { |
|||
sizeStr = strings.ToUpper(strings.TrimSpace(sizeStr)) |
|||
|
|||
var multiplier int64 = 1 |
|||
if strings.HasSuffix(sizeStr, "KB") { |
|||
multiplier = 1024 |
|||
sizeStr = strings.TrimSuffix(sizeStr, "KB") |
|||
} else if strings.HasSuffix(sizeStr, "MB") { |
|||
multiplier = 1024 * 1024 |
|||
sizeStr = strings.TrimSuffix(sizeStr, "MB") |
|||
} else if strings.HasSuffix(sizeStr, "GB") { |
|||
multiplier = 1024 * 1024 * 1024 |
|||
sizeStr = strings.TrimSuffix(sizeStr, "GB") |
|||
} |
|||
|
|||
size, err := strconv.ParseInt(sizeStr, 10, 64) |
|||
if err != nil { |
|||
return 1024 * 1024 // Default to 1MB
|
|||
} |
|||
|
|||
return size * multiplier |
|||
} |
|||
|
|||
// generateRandomData creates random data of specified size
|
|||
func (lg *LoadGenerator) generateRandomData(size int64) []byte { |
|||
data := make([]byte, size) |
|||
_, err := rand.Read(data) |
|||
if err != nil { |
|||
// Fallback to deterministic data
|
|||
for i := range data { |
|||
data[i] = byte(i % 256) |
|||
} |
|||
} |
|||
return data |
|||
} |
|||
|
|||
// uploadFile uploads a file to SeaweedFS via filer
|
|||
func (lg *LoadGenerator) uploadFile(filename string, data []byte) error { |
|||
url := fmt.Sprintf("http://%s/%s", lg.filerAddr, filename) |
|||
if lg.collection != "" { |
|||
url = fmt.Sprintf("http://%s/%s/%s", lg.filerAddr, lg.collection, filename) |
|||
} |
|||
|
|||
req, err := http.NewRequest("POST", url, bytes.NewReader(data)) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
|
|||
req.Header.Set("Content-Type", "application/octet-stream") |
|||
|
|||
client := &http.Client{Timeout: 30 * time.Second} |
|||
resp, err := client.Do(req) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
defer resp.Body.Close() |
|||
|
|||
if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusCreated { |
|||
return fmt.Errorf("upload failed with status: %d", resp.StatusCode) |
|||
} |
|||
|
|||
return nil |
|||
} |
|||
|
|||
// deleteFile deletes a file from SeaweedFS via filer
|
|||
func (lg *LoadGenerator) deleteFile(filename string) error { |
|||
url := fmt.Sprintf("http://%s/%s", lg.filerAddr, filename) |
|||
if lg.collection != "" { |
|||
url = fmt.Sprintf("http://%s/%s/%s", lg.filerAddr, lg.collection, filename) |
|||
} |
|||
|
|||
req, err := http.NewRequest("DELETE", url, nil) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
|
|||
client := &http.Client{Timeout: 10 * time.Second} |
|||
resp, err := client.Do(req) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
defer resp.Body.Close() |
|||
|
|||
if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusNoContent && resp.StatusCode != http.StatusNotFound { |
|||
return fmt.Errorf("delete failed with status: %d", resp.StatusCode) |
|||
} |
|||
|
|||
return nil |
|||
} |
|||
|
|||
// writeFiles continuously writes files at the specified rate
|
|||
func (lg *LoadGenerator) writeFiles() { |
|||
writeInterval := time.Second / time.Duration(lg.writeRate) |
|||
ticker := time.NewTicker(writeInterval) |
|||
defer ticker.Stop() |
|||
|
|||
fileCounter := 0 |
|||
|
|||
for range ticker.C { |
|||
fileCounter++ |
|||
|
|||
// Random file size between min and max
|
|||
sizeDiff := lg.fileSizeMax - lg.fileSizeMin |
|||
randomSize := lg.fileSizeMin |
|||
if sizeDiff > 0 { |
|||
randomSize += int64(time.Now().UnixNano()) % sizeDiff |
|||
} |
|||
|
|||
// Generate filename
|
|||
filename := fmt.Sprintf("test-data/file-%d-%d.bin", time.Now().Unix(), fileCounter) |
|||
|
|||
// Generate random data
|
|||
data := lg.generateRandomData(randomSize) |
|||
|
|||
// Upload file
|
|||
err := lg.uploadFile(filename, data) |
|||
if err != nil { |
|||
log.Printf("Error uploading file %s: %v", filename, err) |
|||
lg.stats.Errors++ |
|||
} else { |
|||
lg.mutex.Lock() |
|||
lg.createdFiles = append(lg.createdFiles, filename) |
|||
lg.stats.FilesWritten++ |
|||
lg.stats.BytesWritten += randomSize |
|||
lg.stats.LastOperation = time.Now() |
|||
lg.mutex.Unlock() |
|||
|
|||
log.Printf("Uploaded file: %s (size: %d bytes, total files: %d)", |
|||
filename, randomSize, lg.stats.FilesWritten) |
|||
} |
|||
} |
|||
} |
|||
|
|||
// deleteFiles continuously deletes files at the specified rate
|
|||
func (lg *LoadGenerator) deleteFiles() { |
|||
deleteInterval := time.Second / time.Duration(lg.deleteRate) |
|||
ticker := time.NewTicker(deleteInterval) |
|||
defer ticker.Stop() |
|||
|
|||
for range ticker.C { |
|||
lg.mutex.Lock() |
|||
if len(lg.createdFiles) == 0 { |
|||
lg.mutex.Unlock() |
|||
continue |
|||
} |
|||
|
|||
// Pick a random file to delete
|
|||
index := int(time.Now().UnixNano()) % len(lg.createdFiles) |
|||
filename := lg.createdFiles[index] |
|||
|
|||
// Remove from slice
|
|||
lg.createdFiles = append(lg.createdFiles[:index], lg.createdFiles[index+1:]...) |
|||
lg.mutex.Unlock() |
|||
|
|||
// Delete file
|
|||
err := lg.deleteFile(filename) |
|||
if err != nil { |
|||
log.Printf("Error deleting file %s: %v", filename, err) |
|||
lg.stats.Errors++ |
|||
} else { |
|||
lg.stats.FilesDeleted++ |
|||
lg.stats.LastOperation = time.Now() |
|||
log.Printf("Deleted file: %s (remaining files: %d)", filename, len(lg.createdFiles)) |
|||
} |
|||
} |
|||
} |
|||
|
|||
// printStats periodically prints load generation statistics
|
|||
func (lg *LoadGenerator) printStats() { |
|||
ticker := time.NewTicker(30 * time.Second) |
|||
defer ticker.Stop() |
|||
|
|||
for range ticker.C { |
|||
uptime := time.Since(lg.stats.StartTime) |
|||
writeRate := float64(lg.stats.FilesWritten) / uptime.Seconds() |
|||
deleteRate := float64(lg.stats.FilesDeleted) / uptime.Seconds() |
|||
|
|||
lg.mutex.RLock() |
|||
pendingFiles := len(lg.createdFiles) |
|||
lg.mutex.RUnlock() |
|||
|
|||
log.Printf("STATS: Files written=%d, deleted=%d, pending=%d, errors=%d", |
|||
lg.stats.FilesWritten, lg.stats.FilesDeleted, pendingFiles, lg.stats.Errors) |
|||
log.Printf("RATES: Write=%.2f/sec, Delete=%.2f/sec, Data=%.2f MB written", |
|||
writeRate, deleteRate, float64(lg.stats.BytesWritten)/(1024*1024)) |
|||
} |
|||
} |
|||
|
|||
// checkClusterHealth periodically checks cluster status
|
|||
func (lg *LoadGenerator) checkClusterHealth() { |
|||
ticker := time.NewTicker(1 * time.Minute) |
|||
defer ticker.Stop() |
|||
|
|||
for range ticker.C { |
|||
// Check master status
|
|||
resp, err := http.Get(fmt.Sprintf("http://%s/cluster/status", lg.masterAddr)) |
|||
if err != nil { |
|||
log.Printf("WARNING: Cannot reach master: %v", err) |
|||
continue |
|||
} |
|||
|
|||
body, err := io.ReadAll(resp.Body) |
|||
resp.Body.Close() |
|||
|
|||
if err != nil { |
|||
log.Printf("WARNING: Cannot read master response: %v", err) |
|||
continue |
|||
} |
|||
|
|||
if resp.StatusCode == http.StatusOK { |
|||
log.Printf("Cluster health check: OK (response size: %d bytes)", len(body)) |
|||
} else { |
|||
log.Printf("WARNING: Cluster health check failed with status: %d", resp.StatusCode) |
|||
} |
|||
} |
|||
} |
|||
|
|||
func main() { |
|||
filerAddr := os.Getenv("FILER_ADDRESS") |
|||
if filerAddr == "" { |
|||
filerAddr = "filer:8888" |
|||
} |
|||
|
|||
masterAddr := os.Getenv("MASTER_ADDRESS") |
|||
if masterAddr == "" { |
|||
masterAddr = "master:9333" |
|||
} |
|||
|
|||
writeRate, _ := strconv.Atoi(os.Getenv("WRITE_RATE")) |
|||
if writeRate <= 0 { |
|||
writeRate = 10 |
|||
} |
|||
|
|||
deleteRate, _ := strconv.Atoi(os.Getenv("DELETE_RATE")) |
|||
if deleteRate <= 0 { |
|||
deleteRate = 2 |
|||
} |
|||
|
|||
fileSizeMin := parseSize(os.Getenv("FILE_SIZE_MIN")) |
|||
if fileSizeMin <= 0 { |
|||
fileSizeMin = 1024 * 1024 // 1MB
|
|||
} |
|||
|
|||
fileSizeMax := parseSize(os.Getenv("FILE_SIZE_MAX")) |
|||
if fileSizeMax <= fileSizeMin { |
|||
fileSizeMax = 5 * 1024 * 1024 // 5MB
|
|||
} |
|||
|
|||
testDuration, _ := strconv.Atoi(os.Getenv("TEST_DURATION")) |
|||
if testDuration <= 0 { |
|||
testDuration = 3600 // 1 hour
|
|||
} |
|||
|
|||
collection := os.Getenv("COLLECTION") |
|||
|
|||
lg := &LoadGenerator{ |
|||
filerAddr: filerAddr, |
|||
masterAddr: masterAddr, |
|||
writeRate: writeRate, |
|||
deleteRate: deleteRate, |
|||
fileSizeMin: fileSizeMin, |
|||
fileSizeMax: fileSizeMax, |
|||
testDuration: testDuration, |
|||
collection: collection, |
|||
createdFiles: make([]string, 0), |
|||
stats: LoadStats{ |
|||
StartTime: time.Now(), |
|||
}, |
|||
} |
|||
|
|||
log.Printf("Starting load generator...") |
|||
log.Printf("Filer: %s", filerAddr) |
|||
log.Printf("Master: %s", masterAddr) |
|||
log.Printf("Write rate: %d files/sec", writeRate) |
|||
log.Printf("Delete rate: %d files/sec", deleteRate) |
|||
log.Printf("File size: %d - %d bytes", fileSizeMin, fileSizeMax) |
|||
log.Printf("Test duration: %d seconds", testDuration) |
|||
log.Printf("Collection: '%s'", collection) |
|||
|
|||
// Wait for filer to be ready
|
|||
log.Println("Waiting for filer to be ready...") |
|||
for { |
|||
resp, err := http.Get(fmt.Sprintf("http://%s/", filerAddr)) |
|||
if err == nil && resp.StatusCode == http.StatusOK { |
|||
resp.Body.Close() |
|||
break |
|||
} |
|||
if resp != nil { |
|||
resp.Body.Close() |
|||
} |
|||
log.Println("Filer not ready, waiting...") |
|||
time.Sleep(5 * time.Second) |
|||
} |
|||
log.Println("Filer is ready!") |
|||
|
|||
// Start background goroutines
|
|||
go lg.writeFiles() |
|||
go lg.deleteFiles() |
|||
go lg.printStats() |
|||
go lg.checkClusterHealth() |
|||
|
|||
// Run for specified duration
|
|||
log.Printf("Load test will run for %d seconds...", testDuration) |
|||
time.Sleep(time.Duration(testDuration) * time.Second) |
|||
|
|||
log.Println("Load test completed!") |
|||
log.Printf("Final stats: Files written=%d, deleted=%d, errors=%d, total data=%.2f MB", |
|||
lg.stats.FilesWritten, lg.stats.FilesDeleted, lg.stats.Errors, |
|||
float64(lg.stats.BytesWritten)/(1024*1024)) |
|||
} |
|||
@ -0,0 +1,38 @@ |
|||
#!/bin/sh |
|||
|
|||
set -e |
|||
|
|||
echo "Starting Cluster Monitor..." |
|||
echo "Master Address: $MASTER_ADDRESS" |
|||
echo "Admin Address: $ADMIN_ADDRESS" |
|||
echo "Filer Address: $FILER_ADDRESS" |
|||
echo "Monitor Interval: $MONITOR_INTERVAL" |
|||
|
|||
# Wait for core services to be ready |
|||
echo "Waiting for core services to be ready..." |
|||
|
|||
echo "Waiting for master..." |
|||
until curl -f http://$MASTER_ADDRESS/cluster/status > /dev/null 2>&1; do |
|||
echo "Master not ready, waiting..." |
|||
sleep 5 |
|||
done |
|||
echo "Master is ready!" |
|||
|
|||
echo "Waiting for admin..." |
|||
until curl -f http://$ADMIN_ADDRESS/health > /dev/null 2>&1; do |
|||
echo "Admin not ready, waiting..." |
|||
sleep 5 |
|||
done |
|||
echo "Admin is ready!" |
|||
|
|||
echo "Waiting for filer..." |
|||
until curl -f http://$FILER_ADDRESS/ > /dev/null 2>&1; do |
|||
echo "Filer not ready, waiting..." |
|||
sleep 5 |
|||
done |
|||
echo "Filer is ready!" |
|||
|
|||
echo "All services ready! Starting monitor..." |
|||
|
|||
# Start the monitor |
|||
exec ./monitor |
|||
@ -0,0 +1,366 @@ |
|||
package main |
|||
|
|||
import ( |
|||
"encoding/json" |
|||
"fmt" |
|||
"io" |
|||
"log" |
|||
"net/http" |
|||
"os" |
|||
"time" |
|||
) |
|||
|
|||
type Monitor struct { |
|||
masterAddr string |
|||
adminAddr string |
|||
filerAddr string |
|||
interval time.Duration |
|||
startTime time.Time |
|||
stats MonitorStats |
|||
} |
|||
|
|||
type MonitorStats struct { |
|||
TotalChecks int64 |
|||
MasterHealthy int64 |
|||
AdminHealthy int64 |
|||
FilerHealthy int64 |
|||
VolumeCount int64 |
|||
LastVolumeCheck time.Time |
|||
ECTasksDetected int64 |
|||
WorkersActive int64 |
|||
LastWorkerCheck time.Time |
|||
} |
|||
|
|||
type ClusterStatus struct { |
|||
IsLeader bool `json:"IsLeader"` |
|||
Leader string `json:"Leader"` |
|||
Peers []string `json:"Peers"` |
|||
} |
|||
|
|||
type VolumeStatus struct { |
|||
Volumes []VolumeInfo `json:"Volumes"` |
|||
} |
|||
|
|||
type VolumeInfo struct { |
|||
Id uint32 `json:"Id"` |
|||
Size uint64 `json:"Size"` |
|||
Collection string `json:"Collection"` |
|||
FileCount int64 `json:"FileCount"` |
|||
DeleteCount int64 `json:"DeleteCount"` |
|||
DeletedByteCount uint64 `json:"DeletedByteCount"` |
|||
ReadOnly bool `json:"ReadOnly"` |
|||
CompactRevision uint32 `json:"CompactRevision"` |
|||
Version uint32 `json:"Version"` |
|||
} |
|||
|
|||
type AdminStatus struct { |
|||
Status string `json:"status"` |
|||
Uptime string `json:"uptime"` |
|||
Tasks int `json:"tasks"` |
|||
Workers int `json:"workers"` |
|||
} |
|||
|
|||
// checkMasterHealth checks the master server health
|
|||
func (m *Monitor) checkMasterHealth() bool { |
|||
resp, err := http.Get(fmt.Sprintf("http://%s/cluster/status", m.masterAddr)) |
|||
if err != nil { |
|||
log.Printf("ERROR: Cannot reach master %s: %v", m.masterAddr, err) |
|||
return false |
|||
} |
|||
defer resp.Body.Close() |
|||
|
|||
if resp.StatusCode != http.StatusOK { |
|||
log.Printf("ERROR: Master returned status %d", resp.StatusCode) |
|||
return false |
|||
} |
|||
|
|||
var status ClusterStatus |
|||
body, err := io.ReadAll(resp.Body) |
|||
if err != nil { |
|||
log.Printf("ERROR: Cannot read master response: %v", err) |
|||
return false |
|||
} |
|||
|
|||
err = json.Unmarshal(body, &status) |
|||
if err != nil { |
|||
log.Printf("WARNING: Cannot parse master status: %v", err) |
|||
// Still consider it healthy if we got a response
|
|||
return true |
|||
} |
|||
|
|||
log.Printf("Master status: Leader=%s, IsLeader=%t, Peers=%d", |
|||
status.Leader, status.IsLeader, len(status.Peers)) |
|||
|
|||
m.stats.MasterHealthy++ |
|||
return true |
|||
} |
|||
|
|||
// checkAdminHealth checks the admin server health
|
|||
func (m *Monitor) checkAdminHealth() bool { |
|||
resp, err := http.Get(fmt.Sprintf("http://%s/health", m.adminAddr)) |
|||
if err != nil { |
|||
log.Printf("ERROR: Cannot reach admin %s: %v", m.adminAddr, err) |
|||
return false |
|||
} |
|||
defer resp.Body.Close() |
|||
|
|||
if resp.StatusCode != http.StatusOK { |
|||
log.Printf("ERROR: Admin returned status %d", resp.StatusCode) |
|||
return false |
|||
} |
|||
|
|||
var status AdminStatus |
|||
body, err := io.ReadAll(resp.Body) |
|||
if err != nil { |
|||
log.Printf("ERROR: Cannot read admin response: %v", err) |
|||
return false |
|||
} |
|||
|
|||
err = json.Unmarshal(body, &status) |
|||
if err != nil { |
|||
log.Printf("WARNING: Cannot parse admin status: %v", err) |
|||
return true |
|||
} |
|||
|
|||
log.Printf("Admin status: %s, Uptime=%s, Tasks=%d, Workers=%d", |
|||
status.Status, status.Uptime, status.Tasks, status.Workers) |
|||
|
|||
m.stats.AdminHealthy++ |
|||
m.stats.ECTasksDetected += int64(status.Tasks) |
|||
m.stats.WorkersActive = int64(status.Workers) |
|||
m.stats.LastWorkerCheck = time.Now() |
|||
|
|||
return true |
|||
} |
|||
|
|||
// checkFilerHealth checks the filer health
|
|||
func (m *Monitor) checkFilerHealth() bool { |
|||
resp, err := http.Get(fmt.Sprintf("http://%s/", m.filerAddr)) |
|||
if err != nil { |
|||
log.Printf("ERROR: Cannot reach filer %s: %v", m.filerAddr, err) |
|||
return false |
|||
} |
|||
defer resp.Body.Close() |
|||
|
|||
if resp.StatusCode != http.StatusOK { |
|||
log.Printf("ERROR: Filer returned status %d", resp.StatusCode) |
|||
return false |
|||
} |
|||
|
|||
m.stats.FilerHealthy++ |
|||
return true |
|||
} |
|||
|
|||
// checkVolumeStatus checks volume information from master
|
|||
func (m *Monitor) checkVolumeStatus() { |
|||
resp, err := http.Get(fmt.Sprintf("http://%s/vol/status", m.masterAddr)) |
|||
if err != nil { |
|||
log.Printf("ERROR: Cannot get volume status: %v", err) |
|||
return |
|||
} |
|||
defer resp.Body.Close() |
|||
|
|||
if resp.StatusCode != http.StatusOK { |
|||
log.Printf("ERROR: Volume status returned status %d", resp.StatusCode) |
|||
return |
|||
} |
|||
|
|||
body, err := io.ReadAll(resp.Body) |
|||
if err != nil { |
|||
log.Printf("ERROR: Cannot read volume status: %v", err) |
|||
return |
|||
} |
|||
|
|||
var volumeStatus VolumeStatus |
|||
err = json.Unmarshal(body, &volumeStatus) |
|||
if err != nil { |
|||
log.Printf("WARNING: Cannot parse volume status: %v", err) |
|||
return |
|||
} |
|||
|
|||
m.stats.VolumeCount = int64(len(volumeStatus.Volumes)) |
|||
m.stats.LastVolumeCheck = time.Now() |
|||
|
|||
// Analyze volumes
|
|||
var readOnlyCount, fullVolumeCount, ecCandidates int |
|||
var totalSize, totalFiles uint64 |
|||
|
|||
for _, vol := range volumeStatus.Volumes { |
|||
totalSize += vol.Size |
|||
totalFiles += uint64(vol.FileCount) |
|||
|
|||
if vol.ReadOnly { |
|||
readOnlyCount++ |
|||
} |
|||
|
|||
// Volume is close to full (>40MB for 50MB limit)
|
|||
if vol.Size > 40*1024*1024 { |
|||
fullVolumeCount++ |
|||
if !vol.ReadOnly { |
|||
ecCandidates++ |
|||
} |
|||
} |
|||
} |
|||
|
|||
log.Printf("Volume analysis: Total=%d, ReadOnly=%d, Full=%d, EC_Candidates=%d", |
|||
len(volumeStatus.Volumes), readOnlyCount, fullVolumeCount, ecCandidates) |
|||
log.Printf("Storage stats: Total_Size=%.2fMB, Total_Files=%d", |
|||
float64(totalSize)/(1024*1024), totalFiles) |
|||
|
|||
if ecCandidates > 0 { |
|||
log.Printf("⚠️ DETECTED %d volumes that should be EC'd!", ecCandidates) |
|||
} |
|||
} |
|||
|
|||
// healthHandler provides a health endpoint for the monitor itself
|
|||
func (m *Monitor) healthHandler(w http.ResponseWriter, r *http.Request) { |
|||
w.Header().Set("Content-Type", "application/json") |
|||
json.NewEncoder(w).Encode(map[string]interface{}{ |
|||
"status": "healthy", |
|||
"uptime": time.Since(m.startTime).String(), |
|||
"checks": m.stats.TotalChecks, |
|||
"last_check": m.stats.LastVolumeCheck.Format(time.RFC3339), |
|||
}) |
|||
} |
|||
|
|||
// statusHandler provides detailed monitoring status
|
|||
func (m *Monitor) statusHandler(w http.ResponseWriter, r *http.Request) { |
|||
w.Header().Set("Content-Type", "application/json") |
|||
json.NewEncoder(w).Encode(map[string]interface{}{ |
|||
"monitor": map[string]interface{}{ |
|||
"uptime": time.Since(m.startTime).String(), |
|||
"master_addr": m.masterAddr, |
|||
"admin_addr": m.adminAddr, |
|||
"filer_addr": m.filerAddr, |
|||
"interval": m.interval.String(), |
|||
}, |
|||
"stats": m.stats, |
|||
"health": map[string]interface{}{ |
|||
"master_healthy": m.stats.MasterHealthy > 0 && time.Since(m.stats.LastVolumeCheck) < 2*m.interval, |
|||
"admin_healthy": m.stats.AdminHealthy > 0 && time.Since(m.stats.LastWorkerCheck) < 2*m.interval, |
|||
"filer_healthy": m.stats.FilerHealthy > 0, |
|||
}, |
|||
}) |
|||
} |
|||
|
|||
// runMonitoring runs the main monitoring loop
|
|||
func (m *Monitor) runMonitoring() { |
|||
ticker := time.NewTicker(m.interval) |
|||
defer ticker.Stop() |
|||
|
|||
log.Printf("Starting monitoring loop every %v", m.interval) |
|||
|
|||
for { |
|||
m.stats.TotalChecks++ |
|||
|
|||
log.Printf("=== Monitoring Check #%d ===", m.stats.TotalChecks) |
|||
|
|||
// Check master health
|
|||
if m.checkMasterHealth() { |
|||
// If master is healthy, check volumes
|
|||
m.checkVolumeStatus() |
|||
} |
|||
|
|||
// Check admin health
|
|||
m.checkAdminHealth() |
|||
|
|||
// Check filer health
|
|||
m.checkFilerHealth() |
|||
|
|||
// Print summary
|
|||
log.Printf("Health Summary: Master=%t, Admin=%t, Filer=%t, Volumes=%d, Workers=%d", |
|||
m.stats.MasterHealthy > 0, |
|||
m.stats.AdminHealthy > 0, |
|||
m.stats.FilerHealthy > 0, |
|||
m.stats.VolumeCount, |
|||
m.stats.WorkersActive) |
|||
|
|||
log.Printf("=== End Check #%d ===", m.stats.TotalChecks) |
|||
|
|||
<-ticker.C |
|||
} |
|||
} |
|||
|
|||
func main() { |
|||
masterAddr := os.Getenv("MASTER_ADDRESS") |
|||
if masterAddr == "" { |
|||
masterAddr = "master:9333" |
|||
} |
|||
|
|||
adminAddr := os.Getenv("ADMIN_ADDRESS") |
|||
if adminAddr == "" { |
|||
adminAddr = "admin:9900" |
|||
} |
|||
|
|||
filerAddr := os.Getenv("FILER_ADDRESS") |
|||
if filerAddr == "" { |
|||
filerAddr = "filer:8888" |
|||
} |
|||
|
|||
intervalStr := os.Getenv("MONITOR_INTERVAL") |
|||
interval, err := time.ParseDuration(intervalStr) |
|||
if err != nil { |
|||
interval = 10 * time.Second |
|||
} |
|||
|
|||
monitor := &Monitor{ |
|||
masterAddr: masterAddr, |
|||
adminAddr: adminAddr, |
|||
filerAddr: filerAddr, |
|||
interval: interval, |
|||
startTime: time.Now(), |
|||
stats: MonitorStats{}, |
|||
} |
|||
|
|||
log.Printf("Starting SeaweedFS Cluster Monitor") |
|||
log.Printf("Master: %s", masterAddr) |
|||
log.Printf("Admin: %s", adminAddr) |
|||
log.Printf("Filer: %s", filerAddr) |
|||
log.Printf("Interval: %v", interval) |
|||
|
|||
// Setup HTTP endpoints
|
|||
http.HandleFunc("/health", monitor.healthHandler) |
|||
http.HandleFunc("/status", monitor.statusHandler) |
|||
|
|||
// Start HTTP server in background
|
|||
go func() { |
|||
log.Println("Monitor HTTP server starting on :9999") |
|||
if err := http.ListenAndServe(":9999", nil); err != nil { |
|||
log.Printf("Monitor HTTP server error: %v", err) |
|||
} |
|||
}() |
|||
|
|||
// Wait for services to be ready
|
|||
log.Println("Waiting for services to be ready...") |
|||
for { |
|||
masterOK := false |
|||
adminOK := false |
|||
filerOK := false |
|||
|
|||
if resp, err := http.Get(fmt.Sprintf("http://%s/cluster/status", masterAddr)); err == nil && resp.StatusCode == http.StatusOK { |
|||
masterOK = true |
|||
resp.Body.Close() |
|||
} |
|||
|
|||
if resp, err := http.Get(fmt.Sprintf("http://%s/health", adminAddr)); err == nil && resp.StatusCode == http.StatusOK { |
|||
adminOK = true |
|||
resp.Body.Close() |
|||
} |
|||
|
|||
if resp, err := http.Get(fmt.Sprintf("http://%s/", filerAddr)); err == nil && resp.StatusCode == http.StatusOK { |
|||
filerOK = true |
|||
resp.Body.Close() |
|||
} |
|||
|
|||
if masterOK && adminOK && filerOK { |
|||
log.Println("All services are ready!") |
|||
break |
|||
} |
|||
|
|||
log.Printf("Services ready: Master=%t, Admin=%t, Filer=%t", masterOK, adminOK, filerOK) |
|||
time.Sleep(5 * time.Second) |
|||
} |
|||
|
|||
// Start monitoring
|
|||
monitor.runMonitoring() |
|||
} |
|||
@ -0,0 +1,106 @@ |
|||
#!/bin/bash |
|||
|
|||
set -e |
|||
|
|||
# Colors for output |
|||
RED='\033[0;31m' |
|||
GREEN='\033[0;32m' |
|||
YELLOW='\033[1;33m' |
|||
BLUE='\033[0;34m' |
|||
NC='\033[0m' # No Color |
|||
|
|||
echo -e "${BLUE}🧪 SeaweedFS EC Worker Testing Environment${NC}" |
|||
echo -e "${BLUE}===========================================${NC}" |
|||
|
|||
# Check if docker-compose is available |
|||
if ! command -v docker-compose &> /dev/null; then |
|||
echo -e "${RED}❌ docker-compose is required but not installed${NC}" |
|||
exit 1 |
|||
fi |
|||
|
|||
# Create necessary directories |
|||
echo -e "${YELLOW}📁 Creating required directories...${NC}" |
|||
mkdir -p monitor-data admin-config |
|||
|
|||
# Make scripts executable |
|||
echo -e "${YELLOW}🔧 Making scripts executable...${NC}" |
|||
chmod +x *.sh |
|||
|
|||
# Stop any existing containers |
|||
echo -e "${YELLOW}🛑 Stopping any existing containers...${NC}" |
|||
docker-compose -f docker-compose-ec-test.yml down -v 2>/dev/null || true |
|||
|
|||
# Build and start the environment |
|||
echo -e "${GREEN}🚀 Starting SeaweedFS EC testing environment...${NC}" |
|||
echo -e "${BLUE}This will start:${NC}" |
|||
echo -e " • 1 Master server (port 9333)" |
|||
echo -e " • 6 Volume servers (ports 8080-8085) with 50MB volume limit" |
|||
echo -e " • 1 Filer (port 8888)" |
|||
echo -e " • 1 Admin server (port 9900)" |
|||
echo -e " • 3 EC Workers" |
|||
echo -e " • 1 Load generator (continuous read/write)" |
|||
echo -e " • 1 Monitor (port 9999)" |
|||
echo "" |
|||
|
|||
docker-compose -f docker-compose-ec-test.yml up --build -d |
|||
|
|||
echo -e "${GREEN}✅ Environment started successfully!${NC}" |
|||
echo "" |
|||
echo -e "${BLUE}📊 Monitoring URLs:${NC}" |
|||
echo -e " • Master UI: http://localhost:9333" |
|||
echo -e " • Filer: http://localhost:8888" |
|||
echo -e " • Admin Server: http://localhost:9900/status" |
|||
echo -e " • Monitor: http://localhost:9999/status" |
|||
echo "" |
|||
echo -e "${BLUE}📈 Volume Servers:${NC}" |
|||
echo -e " • Volume1: http://localhost:8080/status" |
|||
echo -e " • Volume2: http://localhost:8081/status" |
|||
echo -e " • Volume3: http://localhost:8082/status" |
|||
echo -e " • Volume4: http://localhost:8083/status" |
|||
echo -e " • Volume5: http://localhost:8084/status" |
|||
echo -e " • Volume6: http://localhost:8085/status" |
|||
echo "" |
|||
|
|||
echo -e "${YELLOW}⏳ Waiting for services to be ready...${NC}" |
|||
sleep 10 |
|||
|
|||
# Check service health |
|||
echo -e "${BLUE}🔍 Checking service health...${NC}" |
|||
|
|||
check_service() { |
|||
local name=$1 |
|||
local url=$2 |
|||
|
|||
if curl -s "$url" > /dev/null 2>&1; then |
|||
echo -e " ✅ $name: ${GREEN}Healthy${NC}" |
|||
return 0 |
|||
else |
|||
echo -e " ❌ $name: ${RED}Not responding${NC}" |
|||
return 1 |
|||
fi |
|||
} |
|||
|
|||
check_service "Master" "http://localhost:9333/cluster/status" |
|||
check_service "Filer" "http://localhost:8888/" |
|||
check_service "Admin" "http://localhost:9900/health" |
|||
check_service "Monitor" "http://localhost:9999/health" |
|||
|
|||
echo "" |
|||
echo -e "${GREEN}🎯 Test Environment is Ready!${NC}" |
|||
echo "" |
|||
echo -e "${BLUE}What's happening:${NC}" |
|||
echo -e " 1. 📝 Load generator continuously writes 1-5MB files at 10 files/sec" |
|||
echo -e " 2. 🗑️ Load generator deletes files at 2 files/sec" |
|||
echo -e " 3. 📊 Volumes fill up to 50MB limit and trigger EC conversion" |
|||
echo -e " 4. 🏭 Admin server detects volumes needing EC and assigns to workers" |
|||
echo -e " 5. ⚡ Workers perform comprehensive EC (copy→encode→distribute)" |
|||
echo -e " 6. 📈 Monitor tracks all activity and volume states" |
|||
echo "" |
|||
echo -e "${YELLOW}📋 Useful Commands:${NC}" |
|||
echo -e " • View logs: docker-compose -f docker-compose-ec-test.yml logs -f [service]" |
|||
echo -e " • Check worker status: docker-compose -f docker-compose-ec-test.yml logs worker1" |
|||
echo -e " • Stop environment: docker-compose -f docker-compose-ec-test.yml down -v" |
|||
echo -e " • Monitor logs: docker-compose -f docker-compose-ec-test.yml logs -f monitor" |
|||
echo "" |
|||
echo -e "${GREEN}🔥 The test will run for 1 hour by default${NC}" |
|||
echo -e "${BLUE}Monitor progress at: http://localhost:9999/status${NC}" |
|||
@ -0,0 +1,230 @@ |
|||
#!/bin/sh |
|||
|
|||
set -e |
|||
|
|||
echo "Starting SeaweedFS EC Worker..." |
|||
echo "Worker ID: $WORKER_ID" |
|||
echo "Admin Address: $ADMIN_ADDRESS" |
|||
echo "Capabilities: $CAPABILITIES" |
|||
|
|||
# Wait for admin server to be ready |
|||
echo "Waiting for admin server to be ready..." |
|||
until curl -f http://$ADMIN_ADDRESS/health > /dev/null 2>&1; do |
|||
echo "Admin server not ready, waiting..." |
|||
sleep 5 |
|||
done |
|||
echo "Admin server is ready!" |
|||
|
|||
# Create worker simulation |
|||
cat > /tmp/worker.go << 'EOF' |
|||
package main |
|||
|
|||
import ( |
|||
"encoding/json" |
|||
"fmt" |
|||
"log" |
|||
"net/http" |
|||
"os" |
|||
"strings" |
|||
"time" |
|||
) |
|||
|
|||
type Worker struct { |
|||
id string |
|||
adminAddr string |
|||
address string |
|||
capabilities []string |
|||
maxConcurrent int |
|||
workDir string |
|||
startTime time.Time |
|||
activeTasks map[string]*Task |
|||
} |
|||
|
|||
type Task struct { |
|||
ID string `json:"id"` |
|||
Type string `json:"type"` |
|||
VolumeID int `json:"volume_id"` |
|||
Status string `json:"status"` |
|||
Progress float64 `json:"progress"` |
|||
Started time.Time `json:"started"` |
|||
} |
|||
|
|||
func (w *Worker) healthHandler(res http.ResponseWriter, req *http.Request) { |
|||
res.Header().Set("Content-Type", "application/json") |
|||
json.NewEncoder(res).Encode(map[string]interface{}{ |
|||
"status": "healthy", |
|||
"worker_id": w.id, |
|||
"uptime": time.Since(w.startTime).String(), |
|||
"active_tasks": len(w.activeTasks), |
|||
"capabilities": w.capabilities, |
|||
}) |
|||
} |
|||
|
|||
func (w *Worker) statusHandler(res http.ResponseWriter, req *http.Request) { |
|||
res.Header().Set("Content-Type", "application/json") |
|||
json.NewEncoder(res).Encode(map[string]interface{}{ |
|||
"worker_id": w.id, |
|||
"admin_addr": w.adminAddr, |
|||
"capabilities": w.capabilities, |
|||
"max_concurrent": w.maxConcurrent, |
|||
"active_tasks": w.activeTasks, |
|||
"uptime": time.Since(w.startTime).String(), |
|||
}) |
|||
} |
|||
|
|||
func (w *Worker) simulateECTask(taskID string, volumeID int) { |
|||
log.Printf("Starting EC task %s for volume %d", taskID, volumeID) |
|||
|
|||
task := &Task{ |
|||
ID: taskID, |
|||
Type: "erasure_coding", |
|||
VolumeID: volumeID, |
|||
Status: "running", |
|||
Progress: 0.0, |
|||
Started: time.Now(), |
|||
} |
|||
|
|||
w.activeTasks[taskID] = task |
|||
|
|||
// Simulate EC process phases |
|||
phases := []struct { |
|||
progress float64 |
|||
phase string |
|||
duration time.Duration |
|||
}{ |
|||
{5.0, "Copying volume data locally", 10 * time.Second}, |
|||
{25.0, "Marking volume read-only", 2 * time.Second}, |
|||
{60.0, "Performing local EC encoding", 30 * time.Second}, |
|||
{70.0, "Calculating optimal shard placement", 5 * time.Second}, |
|||
{90.0, "Distributing shards to servers", 20 * time.Second}, |
|||
{100.0, "Verification and cleanup", 3 * time.Second}, |
|||
} |
|||
|
|||
go func() { |
|||
for _, phase := range phases { |
|||
if task.Status != "running" { |
|||
break |
|||
} |
|||
|
|||
time.Sleep(phase.duration) |
|||
task.Progress = phase.progress |
|||
log.Printf("Task %s: %.1f%% - %s", taskID, phase.progress, phase.phase) |
|||
} |
|||
|
|||
if task.Status == "running" { |
|||
task.Status = "completed" |
|||
task.Progress = 100.0 |
|||
log.Printf("Task %s completed successfully", taskID) |
|||
} |
|||
|
|||
// Remove from active tasks after completion |
|||
time.Sleep(5 * time.Second) |
|||
delete(w.activeTasks, taskID) |
|||
}() |
|||
} |
|||
|
|||
func (w *Worker) registerWithAdmin() { |
|||
ticker := time.NewTicker(30 * time.Second) |
|||
go func() { |
|||
for { |
|||
// Register/heartbeat with admin server |
|||
log.Printf("Sending heartbeat to admin server...") |
|||
|
|||
data := map[string]interface{}{ |
|||
"worker_id": w.id, |
|||
"address": w.address, |
|||
"capabilities": w.capabilities, |
|||
"max_concurrent": w.maxConcurrent, |
|||
"active_tasks": len(w.activeTasks), |
|||
"status": "active", |
|||
} |
|||
|
|||
jsonData, _ := json.Marshal(data) |
|||
|
|||
// In real implementation, this would be a proper gRPC call |
|||
resp, err := http.Post( |
|||
fmt.Sprintf("http://%s/register-worker", w.adminAddr), |
|||
"application/json", |
|||
strings.NewReader(string(jsonData)), |
|||
) |
|||
if err != nil { |
|||
log.Printf("Failed to register with admin: %v", err) |
|||
} else { |
|||
resp.Body.Close() |
|||
log.Printf("Successfully sent heartbeat to admin") |
|||
} |
|||
|
|||
// Simulate requesting new tasks |
|||
if len(w.activeTasks) < w.maxConcurrent { |
|||
// In real implementation, worker would request tasks from admin |
|||
// For simulation, we'll create some tasks periodically |
|||
if len(w.activeTasks) == 0 && time.Since(w.startTime) > 1*time.Minute { |
|||
taskID := fmt.Sprintf("%s-task-%d", w.id, time.Now().Unix()) |
|||
volumeID := 2000 + int(time.Now().Unix()%1000) |
|||
w.simulateECTask(taskID, volumeID) |
|||
} |
|||
} |
|||
|
|||
<-ticker.C |
|||
} |
|||
}() |
|||
} |
|||
|
|||
func main() { |
|||
workerID := os.Getenv("WORKER_ID") |
|||
if workerID == "" { |
|||
workerID = "worker-1" |
|||
} |
|||
|
|||
adminAddr := os.Getenv("ADMIN_ADDRESS") |
|||
if adminAddr == "" { |
|||
adminAddr = "admin:9900" |
|||
} |
|||
|
|||
address := os.Getenv("WORKER_ADDRESS") |
|||
if address == "" { |
|||
address = "worker:9001" |
|||
} |
|||
|
|||
capabilities := strings.Split(os.Getenv("CAPABILITIES"), ",") |
|||
if len(capabilities) == 0 || capabilities[0] == "" { |
|||
capabilities = []string{"erasure_coding"} |
|||
} |
|||
|
|||
worker := &Worker{ |
|||
id: workerID, |
|||
adminAddr: adminAddr, |
|||
address: address, |
|||
capabilities: capabilities, |
|||
maxConcurrent: 2, |
|||
workDir: "/work", |
|||
startTime: time.Now(), |
|||
activeTasks: make(map[string]*Task), |
|||
} |
|||
|
|||
http.HandleFunc("/health", worker.healthHandler) |
|||
http.HandleFunc("/status", worker.statusHandler) |
|||
|
|||
// Start registration and heartbeat |
|||
worker.registerWithAdmin() |
|||
|
|||
log.Printf("Worker %s starting on address %s", workerID, address) |
|||
log.Printf("Admin address: %s", adminAddr) |
|||
log.Printf("Capabilities: %v", capabilities) |
|||
|
|||
port := ":9001" |
|||
if strings.Contains(address, ":") { |
|||
parts := strings.Split(address, ":") |
|||
port = ":" + parts[1] |
|||
} |
|||
|
|||
if err := http.ListenAndServe(port, nil); err != nil { |
|||
log.Fatal("Worker failed to start:", err) |
|||
} |
|||
} |
|||
EOF |
|||
|
|||
# Compile and run the worker |
|||
cd /tmp |
|||
go mod init worker |
|||
go run worker.go |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue