23 changed files with 450 additions and 3313 deletions
-
36docker/admin_integration/Dockerfile.admin
-
44docker/admin_integration/Dockerfile.load
-
18docker/admin_integration/Dockerfile.local
-
48docker/admin_integration/Dockerfile.monitor
-
35docker/admin_integration/Dockerfile.worker
-
45docker/admin_integration/EC-TESTING-README.md
-
428docker/admin_integration/Makefile
-
521docker/admin_integration/admin-entrypoint.sh
-
73docker/admin_integration/admin-grpc-entrypoint.sh
-
663docker/admin_integration/admin_grpc_server.go
-
423docker/admin_integration/docker-compose-ec-test.yml
-
21docker/admin_integration/load-entrypoint.sh
-
375docker/admin_integration/load-generator.go
-
38docker/admin_integration/monitor-entrypoint.sh
-
366docker/admin_integration/monitor.go
-
106docker/admin_integration/run-ec-test.sh
-
73docker/admin_integration/test-integration.sh
-
230docker/admin_integration/worker-entrypoint.sh
-
67docker/admin_integration/worker-grpc-entrypoint.sh
-
55weed/admin/dash/admin_server.go
-
20weed/admin/handlers/maintenance_handlers.go
-
11weed/worker/ec_worker.go
-
67weed/worker/main.go
@ -1,36 +0,0 @@ |
|||
# Final stage |
|||
FROM alpine:latest |
|||
|
|||
# Install dependencies including Go for the entrypoint script |
|||
RUN apk --no-cache add curl ca-certificates go |
|||
|
|||
WORKDIR /root/ |
|||
|
|||
# Copy gRPC admin files |
|||
COPY ./docker/admin_integration/admin-grpc-entrypoint.sh /entrypoint.sh |
|||
COPY ./docker/admin_integration/admin_grpc_server.go /admin_grpc_server.go |
|||
COPY ./weed/pb/worker.proto /worker.proto |
|||
RUN chmod +x /entrypoint.sh |
|||
|
|||
# Create directories |
|||
RUN mkdir -p /data /config /work |
|||
|
|||
# Expose admin ports (HTTP and gRPC) |
|||
EXPOSE 9900 9901 |
|||
|
|||
# Set environment variables |
|||
ENV MASTER_ADDRESS="master:9333" |
|||
ENV ADMIN_PORT="9900" |
|||
ENV GRPC_PORT="9901" |
|||
ENV SCAN_INTERVAL="30s" |
|||
ENV WORKER_TIMEOUT="5m" |
|||
ENV TASK_TIMEOUT="30m" |
|||
ENV MAX_RETRIES="3" |
|||
ENV MAX_CONCURRENT_TASKS="5" |
|||
|
|||
# Health check |
|||
HEALTHCHECK --interval=15s --timeout=5s --start-period=30s --retries=3 \ |
|||
CMD curl -f http://localhost:9900/health || exit 1 |
|||
|
|||
# Start admin server |
|||
ENTRYPOINT ["/entrypoint.sh"] |
|||
@ -1,44 +0,0 @@ |
|||
FROM golang:1.24-alpine AS builder |
|||
|
|||
# Install dependencies |
|||
RUN apk add --no-cache git build-base |
|||
|
|||
# Set working directory |
|||
WORKDIR /app |
|||
|
|||
# Copy and create load generator |
|||
COPY ./docker/admin_integration/load-generator.go . |
|||
COPY go.mod go.sum ./ |
|||
RUN go mod download |
|||
RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o load-generator load-generator.go |
|||
|
|||
# Final stage |
|||
FROM alpine:latest |
|||
|
|||
# Install dependencies |
|||
RUN apk --no-cache add curl ca-certificates openssl |
|||
|
|||
WORKDIR /root/ |
|||
|
|||
# Copy the binary |
|||
COPY --from=builder /app/load-generator . |
|||
|
|||
# Copy load generator script |
|||
COPY ./docker/admin_integration/load-entrypoint.sh /entrypoint.sh |
|||
RUN chmod +x /entrypoint.sh |
|||
|
|||
# Create directories for test data |
|||
RUN mkdir -p /test-data /temp |
|||
|
|||
# Set environment variables |
|||
ENV FILER_ADDRESS="filer:8888" |
|||
ENV MASTER_ADDRESS="master:9333" |
|||
ENV WRITE_RATE="10" |
|||
ENV DELETE_RATE="2" |
|||
ENV FILE_SIZE_MIN="1MB" |
|||
ENV FILE_SIZE_MAX="5MB" |
|||
ENV TEST_DURATION="3600" |
|||
ENV COLLECTION="" |
|||
|
|||
# Start load generator |
|||
ENTRYPOINT ["/entrypoint.sh"] |
|||
@ -0,0 +1,18 @@ |
|||
FROM alpine:latest |
|||
|
|||
# Install required packages |
|||
RUN apk add --no-cache \ |
|||
ca-certificates \ |
|||
fuse \ |
|||
curl \ |
|||
jq |
|||
|
|||
# Copy our locally built binary |
|||
COPY weed-local /usr/bin/weed |
|||
RUN chmod +x /usr/bin/weed |
|||
|
|||
# Create working directory |
|||
WORKDIR /data |
|||
|
|||
# Default command |
|||
ENTRYPOINT ["/usr/bin/weed"] |
|||
@ -1,48 +0,0 @@ |
|||
FROM golang:1.24-alpine AS builder |
|||
|
|||
# Install dependencies |
|||
RUN apk add --no-cache git build-base |
|||
|
|||
# Set working directory |
|||
WORKDIR /app |
|||
|
|||
# Copy and create monitor |
|||
COPY ./docker/admin_integration/monitor.go . |
|||
COPY go.mod go.sum ./ |
|||
RUN go mod download |
|||
RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o monitor monitor.go |
|||
|
|||
# Final stage |
|||
FROM alpine:latest |
|||
|
|||
# Install dependencies |
|||
RUN apk --no-cache add curl ca-certificates jq |
|||
|
|||
WORKDIR /root/ |
|||
|
|||
# Copy the binary |
|||
COPY --from=builder /app/monitor . |
|||
|
|||
# Copy monitor scripts |
|||
COPY ./docker/admin_integration/monitor-entrypoint.sh /entrypoint.sh |
|||
RUN chmod +x /entrypoint.sh |
|||
|
|||
# Create monitoring directories |
|||
RUN mkdir -p /monitor-data /logs |
|||
|
|||
# Expose monitor port |
|||
EXPOSE 9999 |
|||
|
|||
# Set environment variables |
|||
ENV MASTER_ADDRESS="master:9333" |
|||
ENV ADMIN_ADDRESS="admin:9900" |
|||
ENV FILER_ADDRESS="filer:8888" |
|||
ENV MONITOR_INTERVAL="10s" |
|||
ENV LOG_LEVEL="info" |
|||
|
|||
# Health check |
|||
HEALTHCHECK --interval=30s --timeout=5s --start-period=30s --retries=3 \ |
|||
CMD curl -f http://localhost:9999/health || exit 1 |
|||
|
|||
# Start monitor |
|||
ENTRYPOINT ["/entrypoint.sh"] |
|||
@ -1,35 +0,0 @@ |
|||
# Final stage |
|||
FROM alpine:latest |
|||
|
|||
# Install dependencies including Go for the entrypoint script |
|||
RUN apk --no-cache add curl ca-certificates go |
|||
|
|||
WORKDIR /root/ |
|||
|
|||
# Copy gRPC worker files |
|||
COPY ./docker/admin_integration/worker-grpc-entrypoint.sh /entrypoint.sh |
|||
COPY ./docker/admin_integration/worker_grpc_client.go /worker_grpc_client.go |
|||
COPY ./weed/pb/worker.proto /worker.proto |
|||
RUN chmod +x /entrypoint.sh |
|||
|
|||
# Create working directories |
|||
RUN mkdir -p /work /tmp/ec_work |
|||
|
|||
# Expose worker port |
|||
EXPOSE 9001 |
|||
|
|||
# Set environment variables |
|||
ENV ADMIN_GRPC_ADDRESS="admin:9901" |
|||
ENV WORKER_ID="worker-1" |
|||
ENV WORKER_ADDRESS="worker:9001" |
|||
ENV CAPABILITIES="erasure_coding" |
|||
ENV MAX_CONCURRENT="2" |
|||
ENV WORK_DIR="/work" |
|||
ENV HEARTBEAT_INTERVAL="10s" |
|||
|
|||
# Health check |
|||
HEALTHCHECK --interval=15s --timeout=5s --start-period=30s --retries=3 \ |
|||
CMD curl -f http://localhost:9001/health || exit 1 |
|||
|
|||
# Start worker |
|||
ENTRYPOINT ["/entrypoint.sh"] |
|||
@ -1,301 +1,195 @@ |
|||
# SeaweedFS EC Worker Testing Environment Makefile
|
|||
# Usage: make <target>
|
|||
# SeaweedFS Admin Integration Test Makefile
|
|||
# Tests the admin server and worker functionality using official weed commands
|
|||
|
|||
.PHONY: help start stop clean logs status monitor health up down restart scale docs test |
|||
|
|||
# Default target
|
|||
.PHONY: help start stop restart logs clean status test admin-ui worker-logs master-logs admin-logs |
|||
.DEFAULT_GOAL := help |
|||
|
|||
# Docker compose file
|
|||
COMPOSE_FILE := docker-compose-ec-test.yml |
|||
|
|||
# Color codes for output
|
|||
GREEN := \033[32m |
|||
YELLOW := \033[33m |
|||
BLUE := \033[34m |
|||
RED := \033[31m |
|||
NC := \033[0m # No Color |
|||
PROJECT_NAME := admin_integration |
|||
|
|||
help: ## Show this help message
|
|||
@echo "$(BLUE)🧪 SeaweedFS EC Worker Testing Environment$(NC)" |
|||
@echo "$(BLUE)===========================================$(NC)" |
|||
@echo "SeaweedFS Admin Integration Test" |
|||
@echo "================================" |
|||
@echo "Tests admin server task distribution to workers using official weed commands" |
|||
@echo "" |
|||
@echo "Available targets:" |
|||
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf " %-15s %s\n", $$1, $$2}' |
|||
|
|||
start: ## Start the complete SeaweedFS cluster with admin and workers
|
|||
@echo "🚀 Starting SeaweedFS cluster with admin and workers..." |
|||
@docker-compose -f $(COMPOSE_FILE) up -d |
|||
@echo "✅ Cluster started!" |
|||
@echo "" |
|||
@echo "📊 Access points:" |
|||
@echo " • Admin UI: http://localhost:23646/" |
|||
@echo " • Master UI: http://localhost:9333/" |
|||
@echo " • Filer: http://localhost:8888/" |
|||
@echo "" |
|||
@echo "📈 Services starting up..." |
|||
@echo " • Master server: ✓" |
|||
@echo " • Volume servers: Starting (6 servers)..." |
|||
@echo " • Filer: Starting..." |
|||
@echo " • Admin server: Starting..." |
|||
@echo " • Workers: Starting (3 workers)..." |
|||
@echo "" |
|||
@echo "⏳ Use 'make status' to check startup progress" |
|||
@echo "💡 Use 'make logs' to watch the startup process" |
|||
|
|||
start-staged: ## Start services in proper order with delays
|
|||
@echo "🚀 Starting SeaweedFS cluster in stages..." |
|||
@echo "" |
|||
@echo "Stage 1: Starting Master server..." |
|||
@docker-compose -f $(COMPOSE_FILE) up -d master |
|||
@sleep 10 |
|||
@echo "" |
|||
@echo "$(YELLOW)Available targets:$(NC)" |
|||
@awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf " $(GREEN)%-15s$(NC) %s\n", $$1, $$2}' $(MAKEFILE_LIST) |
|||
@echo "Stage 2: Starting Volume servers..." |
|||
@docker-compose -f $(COMPOSE_FILE) up -d volume1 volume2 volume3 volume4 volume5 volume6 |
|||
@sleep 15 |
|||
@echo "" |
|||
@echo "$(YELLOW)Quick start:$(NC) make start" |
|||
@echo "$(YELLOW)Monitor:$(NC) make monitor" |
|||
@echo "$(YELLOW)Cleanup:$(NC) make clean" |
|||
|
|||
start: ## Start the complete EC testing environment
|
|||
@echo "$(GREEN)🚀 Starting SeaweedFS EC testing environment...$(NC)" |
|||
@echo "$(BLUE)This will start:$(NC)" |
|||
@echo " • 1 Master server (port 9333)" |
|||
@echo " • 6 Volume servers (ports 8080-8085) with 50MB volume limit" |
|||
@echo " • 1 Filer (port 8888)" |
|||
@echo " • 1 Admin server (port 9900)" |
|||
@echo " • 3 EC Workers" |
|||
@echo " • 1 Load generator (continuous read/write)" |
|||
@echo " • 1 Monitor (port 9999)" |
|||
@echo "Stage 3: Starting Filer..." |
|||
@docker-compose -f $(COMPOSE_FILE) up -d filer |
|||
@sleep 10 |
|||
@echo "" |
|||
@mkdir -p monitor-data admin-config |
|||
@chmod +x *.sh 2>/dev/null || true |
|||
@docker-compose -f $(COMPOSE_FILE) down -v 2>/dev/null || true |
|||
@docker-compose -f $(COMPOSE_FILE) up --build -d |
|||
@echo "Stage 4: Starting Admin server..." |
|||
@docker-compose -f $(COMPOSE_FILE) up -d admin |
|||
@sleep 15 |
|||
@echo "" |
|||
@echo "$(GREEN)✅ Environment started successfully!$(NC)" |
|||
@echo "Stage 5: Starting Workers..." |
|||
@docker-compose -f $(COMPOSE_FILE) up -d worker1 worker2 worker3 |
|||
@sleep 10 |
|||
@echo "" |
|||
@$(MAKE) urls |
|||
@echo "Stage 6: Starting Load generator and Monitor..." |
|||
@docker-compose -f $(COMPOSE_FILE) up -d load_generator monitor |
|||
@echo "" |
|||
@echo "$(YELLOW)⏳ Waiting for services to be ready...$(NC)" |
|||
@sleep 10 |
|||
@$(MAKE) health |
|||
@echo "✅ All services started!" |
|||
@echo "" |
|||
@echo "📊 Access points:" |
|||
@echo " • Admin UI: http://localhost:23646/" |
|||
@echo " • Master UI: http://localhost:9333/" |
|||
@echo " • Filer: http://localhost:8888/" |
|||
@echo "" |
|||
@echo "⏳ Services are initializing... Use 'make status' to check progress" |
|||
|
|||
stop: ## Stop all services
|
|||
@echo "$(YELLOW)🛑 Stopping all services...$(NC)" |
|||
@docker-compose -f $(COMPOSE_FILE) stop |
|||
@echo "$(GREEN)✅ All services stopped$(NC)" |
|||
|
|||
down: ## Stop and remove all containers
|
|||
@echo "$(YELLOW)🛑 Stopping and removing containers...$(NC)" |
|||
@echo "🛑 Stopping SeaweedFS cluster..." |
|||
@docker-compose -f $(COMPOSE_FILE) down |
|||
@echo "$(GREEN)✅ Containers stopped and removed$(NC)" |
|||
@echo "✅ Cluster stopped" |
|||
|
|||
clean: ## Stop and remove all containers, networks, volumes, and images
|
|||
@echo "$(RED)🧹 Cleaning up entire environment...$(NC)" |
|||
@docker-compose -f $(COMPOSE_FILE) down -v --rmi all 2>/dev/null || true |
|||
@docker system prune -f |
|||
@echo "$(GREEN)✅ Environment cleaned up$(NC)" |
|||
|
|||
restart: ## Restart all services
|
|||
@echo "$(YELLOW)🔄 Restarting all services...$(NC)" |
|||
@docker-compose -f $(COMPOSE_FILE) restart |
|||
@echo "$(GREEN)✅ All services restarted$(NC)" |
|||
restart: stop start ## Restart the entire cluster
|
|||
|
|||
up: start ## Alias for start
|
|||
clean: ## Stop and remove all containers, networks, and volumes
|
|||
@echo "🧹 Cleaning up SeaweedFS test environment..." |
|||
@docker-compose -f $(COMPOSE_FILE) down -v --remove-orphans |
|||
@docker system prune -f |
|||
@rm -rf data/ |
|||
@echo "✅ Environment cleaned" |
|||
|
|||
status: ## Show status of all services
|
|||
@echo "$(BLUE)📊 Service Status:$(NC)" |
|||
status: ## Check the status of all services
|
|||
@echo "📊 SeaweedFS Cluster Status" |
|||
@echo "==========================" |
|||
@docker-compose -f $(COMPOSE_FILE) ps |
|||
@echo "" |
|||
@echo "📋 Service Health:" |
|||
@echo "Master:" |
|||
@curl -s http://localhost:9333/cluster/status | jq '.IsLeader' 2>/dev/null || echo " ❌ Master not ready" |
|||
@echo "Admin:" |
|||
@curl -s http://localhost:23646/ | grep -q "Admin" && echo " ✅ Admin ready" || echo " ❌ Admin not ready" |
|||
|
|||
logs: ## Show logs from all services
|
|||
@echo "$(BLUE)📋 Showing logs from all services (Ctrl+C to exit):$(NC)" |
|||
@echo "📜 Following logs from all services..." |
|||
@echo "💡 Press Ctrl+C to stop following logs" |
|||
@docker-compose -f $(COMPOSE_FILE) logs -f |
|||
|
|||
logs-admin: ## Show admin server logs
|
|||
@echo "$(BLUE)📋 Admin Server Logs:$(NC)" |
|||
admin-logs: ## Show logs from admin server only
|
|||
@echo "📜 Admin server logs:" |
|||
@docker-compose -f $(COMPOSE_FILE) logs -f admin |
|||
|
|||
logs-workers: ## Show all worker logs
|
|||
@echo "$(BLUE)📋 Worker Logs:$(NC)" |
|||
worker-logs: ## Show logs from all workers
|
|||
@echo "📜 Worker logs:" |
|||
@docker-compose -f $(COMPOSE_FILE) logs -f worker1 worker2 worker3 |
|||
|
|||
logs-worker1: ## Show worker1 logs
|
|||
@docker-compose -f $(COMPOSE_FILE) logs -f worker1 |
|||
|
|||
logs-worker2: ## Show worker2 logs
|
|||
@docker-compose -f $(COMPOSE_FILE) logs -f worker2 |
|||
|
|||
logs-worker3: ## Show worker3 logs
|
|||
@docker-compose -f $(COMPOSE_FILE) logs -f worker3 |
|||
|
|||
logs-load: ## Show load generator logs
|
|||
@echo "$(BLUE)📋 Load Generator Logs:$(NC)" |
|||
@docker-compose -f $(COMPOSE_FILE) logs -f load_generator |
|||
|
|||
logs-monitor: ## Show monitor logs
|
|||
@echo "$(BLUE)📋 Monitor Logs:$(NC)" |
|||
@docker-compose -f $(COMPOSE_FILE) logs -f monitor |
|||
|
|||
logs-master: ## Show master logs
|
|||
master-logs: ## Show logs from master server
|
|||
@echo "📜 Master server logs:" |
|||
@docker-compose -f $(COMPOSE_FILE) logs -f master |
|||
|
|||
logs-volumes: ## Show all volume server logs
|
|||
@echo "$(BLUE)📋 Volume Server Logs:$(NC)" |
|||
@docker-compose -f $(COMPOSE_FILE) logs -f volume1 volume2 volume3 volume4 volume5 volume6 |
|||
admin-ui: ## Open admin UI in browser (macOS)
|
|||
@echo "🌐 Opening admin UI in browser..." |
|||
@open http://localhost:23646/ || echo "💡 Manually open: http://localhost:23646/" |
|||
|
|||
urls: ## Show monitoring URLs
|
|||
@echo "$(BLUE)📊 Monitoring URLs:$(NC)" |
|||
@echo " • Master UI: http://localhost:9333" |
|||
@echo " • Filer: http://localhost:8888" |
|||
@echo " • Admin Server: http://localhost:9900/status" |
|||
@echo " • Monitor: http://localhost:9999/status" |
|||
test: ## Run integration test to verify task assignment and completion
|
|||
@echo "🧪 Running Admin-Worker Integration Test" |
|||
@echo "========================================" |
|||
@echo "" |
|||
@echo "$(BLUE)📈 Volume Servers:$(NC)" |
|||
@echo " • Volume1: http://localhost:8080/status" |
|||
@echo " • Volume2: http://localhost:8081/status" |
|||
@echo " • Volume3: http://localhost:8082/status" |
|||
@echo " • Volume4: http://localhost:8083/status" |
|||
@echo " • Volume5: http://localhost:8084/status" |
|||
@echo " • Volume6: http://localhost:8085/status" |
|||
|
|||
health: ## Check health of all services
|
|||
@echo "$(BLUE)🔍 Checking service health...$(NC)" |
|||
@echo -n " Master: "; \
|
|||
if curl -s http://localhost:9333/cluster/status > /dev/null 2>&1; then \
|
|||
echo "$(GREEN)✅ Healthy$(NC)"; \
|
|||
else \
|
|||
echo "$(RED)❌ Not responding$(NC)"; \
|
|||
fi |
|||
@echo -n " Filer: "; \
|
|||
if curl -s http://localhost:8888/ > /dev/null 2>&1; then \
|
|||
echo "$(GREEN)✅ Healthy$(NC)"; \
|
|||
else \
|
|||
echo "$(RED)❌ Not responding$(NC)"; \
|
|||
fi |
|||
@echo -n " Admin: "; \
|
|||
if curl -s http://localhost:9900/health > /dev/null 2>&1; then \
|
|||
echo "$(GREEN)✅ Healthy$(NC)"; \
|
|||
else \
|
|||
echo "$(RED)❌ Not responding$(NC)"; \
|
|||
fi |
|||
@echo -n " Monitor: "; \
|
|||
if curl -s http://localhost:9999/health > /dev/null 2>&1; then \
|
|||
echo "$(GREEN)✅ Healthy$(NC)"; \
|
|||
else \
|
|||
echo "$(RED)❌ Not responding$(NC)"; \
|
|||
fi |
|||
|
|||
monitor: ## Open monitor dashboard in browser
|
|||
@echo "$(BLUE)📊 Opening monitor dashboard...$(NC)" |
|||
@echo "Monitor URL: http://localhost:9999/status" |
|||
@command -v open >/dev/null 2>&1 && open http://localhost:9999/status || \
|
|||
command -v xdg-open >/dev/null 2>&1 && xdg-open http://localhost:9999/status || \
|
|||
echo "Please open http://localhost:9999/status in your browser" |
|||
|
|||
monitor-status: ## Show current monitoring status via API
|
|||
@echo "$(BLUE)📊 Current Monitor Status:$(NC)" |
|||
@curl -s http://localhost:9999/status | jq . 2>/dev/null || \
|
|||
curl -s http://localhost:9999/status 2>/dev/null || \
|
|||
echo "Monitor not available" |
|||
|
|||
volume-status: ## Show volume status from master
|
|||
@echo "$(BLUE)💾 Volume Status:$(NC)" |
|||
@curl -s http://localhost:9333/vol/status | jq . 2>/dev/null || \
|
|||
curl -s http://localhost:9333/vol/status 2>/dev/null || \
|
|||
echo "Master not available" |
|||
|
|||
admin-status: ## Show admin server status
|
|||
@echo "$(BLUE)🏭 Admin Server Status:$(NC)" |
|||
@curl -s http://localhost:9900/status | jq . 2>/dev/null || \
|
|||
curl -s http://localhost:9900/status 2>/dev/null || \
|
|||
echo "Admin server not available" |
|||
|
|||
cluster-status: ## Show complete cluster status
|
|||
@echo "$(BLUE)🌐 Cluster Status:$(NC)" |
|||
@curl -s http://localhost:9333/cluster/status | jq . 2>/dev/null || \
|
|||
curl -s http://localhost:9333/cluster/status 2>/dev/null || \
|
|||
echo "Master not available" |
|||
|
|||
scale-workers: ## Scale workers (usage: make scale-workers WORKERS=5)
|
|||
@echo "$(YELLOW)⚖️ Scaling workers to $(or $(WORKERS),3)...$(NC)" |
|||
@docker-compose -f $(COMPOSE_FILE) up -d --scale worker2=$(or $(WORKERS),3) |
|||
|
|||
scale-load: ## Restart load generator with higher rate (usage: make scale-load RATE=20)
|
|||
@echo "$(YELLOW)📈 Scaling load generation to $(or $(RATE),20) files/sec...$(NC)" |
|||
@docker-compose -f $(COMPOSE_FILE) stop load_generator |
|||
@docker-compose -f $(COMPOSE_FILE) run -d --name temp_load_generator \
|
|||
-e WRITE_RATE=$(or $(RATE),20) -e DELETE_RATE=$(or $(shell expr $(or $(RATE),20) / 4),5) \
|
|||
load_generator |
|||
@echo "$(GREEN)✅ Load generator restarted with higher rate$(NC)" |
|||
|
|||
test-ec: ## Run a focused EC test scenario
|
|||
@echo "$(YELLOW)🧪 Running focused EC test...$(NC)" |
|||
@$(MAKE) scale-load RATE=25 |
|||
@echo "$(BLUE)Monitoring EC detection...$(NC)" |
|||
@echo "Watch for volumes >40MB that trigger EC conversion" |
|||
@echo "Monitor at: http://localhost:9999/status" |
|||
|
|||
shell-admin: ## Open shell in admin container
|
|||
@docker-compose -f $(COMPOSE_FILE) exec admin /bin/sh |
|||
|
|||
shell-worker1: ## Open shell in worker1 container
|
|||
@docker-compose -f $(COMPOSE_FILE) exec worker1 /bin/sh |
|||
|
|||
shell-master: ## Open shell in master container
|
|||
@docker-compose -f $(COMPOSE_FILE) exec master /bin/sh |
|||
|
|||
docs: ## Show documentation
|
|||
@echo "$(BLUE)📖 EC Testing Documentation:$(NC)" |
|||
@echo "" |
|||
@cat EC-TESTING-README.md |
|||
|
|||
build: ## Build all Docker images without starting
|
|||
@echo "$(YELLOW)🔨 Building all Docker images...$(NC)" |
|||
@docker-compose -f $(COMPOSE_FILE) build |
|||
@echo "$(GREEN)✅ All images built$(NC)" |
|||
|
|||
pull: ## Pull latest SeaweedFS image
|
|||
@echo "$(YELLOW)📥 Pulling latest SeaweedFS image...$(NC)" |
|||
@docker pull chrislusf/seaweedfs:latest |
|||
@echo "$(GREEN)✅ Latest image pulled$(NC)" |
|||
|
|||
debug: ## Show debug information
|
|||
@echo "$(BLUE)🔍 Debug Information:$(NC)" |
|||
@echo "" |
|||
@echo "$(YELLOW)Docker Compose Version:$(NC)" |
|||
@docker-compose --version |
|||
@echo "1️⃣ Checking cluster health..." |
|||
@sleep 5 |
|||
@curl -s http://localhost:9333/cluster/status | jq '.IsLeader' > /dev/null && echo "✅ Master healthy" || echo "❌ Master not ready" |
|||
@curl -s http://localhost:23646/ | grep -q "Admin" && echo "✅ Admin healthy" || echo "❌ Admin not ready" |
|||
@echo "" |
|||
@echo "$(YELLOW)Docker Version:$(NC)" |
|||
@docker --version |
|||
@echo "" |
|||
@echo "$(YELLOW)Current Directory:$(NC)" |
|||
@pwd |
|||
@echo "" |
|||
@echo "$(YELLOW)Available Files:$(NC)" |
|||
@ls -la *.yml *.sh *.md 2>/dev/null || echo "No config files found" |
|||
@echo "" |
|||
@echo "$(YELLOW)Running Containers:$(NC)" |
|||
@docker ps --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}" |
|||
|
|||
# Targets for development and testing
|
|||
dev-start: ## Start with development settings (faster iteration)
|
|||
@echo "$(YELLOW)🛠️ Starting development environment...$(NC)" |
|||
@mkdir -p monitor-data admin-config |
|||
@WRITE_RATE=50 DELETE_RATE=10 docker-compose -f $(COMPOSE_FILE) up --build -d |
|||
@echo "$(GREEN)✅ Development environment started with high load$(NC)" |
|||
|
|||
dev-stop: stop ## Stop development environment
|
|||
|
|||
# Clean specific components
|
|||
clean-volumes: ## Remove only data volumes
|
|||
@echo "$(YELLOW)🗄️ Removing data volumes...$(NC)" |
|||
@docker-compose -f $(COMPOSE_FILE) down -v |
|||
@echo "$(GREEN)✅ Data volumes removed$(NC)" |
|||
|
|||
clean-images: ## Remove built images
|
|||
@echo "$(YELLOW)🖼️ Removing built images...$(NC)" |
|||
@docker-compose -f $(COMPOSE_FILE) down --rmi local |
|||
@echo "$(GREEN)✅ Built images removed$(NC)" |
|||
|
|||
# Backup and restore
|
|||
backup-logs: ## Backup all service logs
|
|||
@echo "$(YELLOW)💾 Backing up service logs...$(NC)" |
|||
@mkdir -p logs-backup |
|||
@docker-compose -f $(COMPOSE_FILE) logs admin > logs-backup/admin.log 2>&1 |
|||
@docker-compose -f $(COMPOSE_FILE) logs worker1 > logs-backup/worker1.log 2>&1 |
|||
@docker-compose -f $(COMPOSE_FILE) logs worker2 > logs-backup/worker2.log 2>&1 |
|||
@docker-compose -f $(COMPOSE_FILE) logs worker3 > logs-backup/worker3.log 2>&1 |
|||
@docker-compose -f $(COMPOSE_FILE) logs monitor > logs-backup/monitor.log 2>&1 |
|||
@docker-compose -f $(COMPOSE_FILE) logs load_generator > logs-backup/load_generator.log 2>&1 |
|||
@echo "$(GREEN)✅ Logs backed up to logs-backup/$(NC)" |
|||
|
|||
# Quick troubleshooting
|
|||
troubleshoot: ## Run troubleshooting checks
|
|||
@echo "$(BLUE)🔧 Running troubleshooting checks...$(NC)" |
|||
@echo "" |
|||
@echo "$(YELLOW)1. Checking required ports:$(NC)" |
|||
@for port in 9333 8888 9900 9999 8080 8081 8082 8083 8084 8085; do \
|
|||
echo -n " Port $$port: "; \
|
|||
if lsof -i :$$port >/dev/null 2>&1; then \
|
|||
echo "$(RED)❌ In use$(NC)"; \
|
|||
else \
|
|||
echo "$(GREEN)✅ Available$(NC)"; \
|
|||
fi; \
|
|||
@echo "2️⃣ Checking worker registration..." |
|||
@sleep 10 |
|||
@echo "💡 Check admin UI for connected workers: http://localhost:23646/" |
|||
@echo "" |
|||
@echo "3️⃣ Generating load to trigger EC tasks..." |
|||
@echo "📝 Creating test files to fill volumes..." |
|||
@echo "Creating large files with random data to trigger EC (targeting ~60MB total to exceed 50MB limit)..." |
|||
@for i in {1..12}; do \
|
|||
echo "Creating 5MB random file $$i..."; \
|
|||
docker run --rm --network admin_integration_seaweed_net -v /tmp:/tmp --entrypoint sh chrislusf/seaweedfs:local -c "dd if=/dev/urandom of=/tmp/largefile$$i.dat bs=1M count=5 2>/dev/null && weed upload -master=master:9333 /tmp/largefile$$i.dat && rm /tmp/largefile$$i.dat"; \
|
|||
sleep 3; \
|
|||
done |
|||
@echo "" |
|||
@echo "$(YELLOW)2. Docker resources:$(NC)" |
|||
@docker system df |
|||
@echo "" |
|||
@echo "$(YELLOW)3. Service health:$(NC)" |
|||
@$(MAKE) health |
|||
@echo "4️⃣ Waiting for volumes to process large files and reach 50MB limit..." |
|||
@echo "This may take a few minutes as we're uploading 60MB of data..." |
|||
@sleep 60 |
|||
@echo "" |
|||
@echo "5️⃣ Checking for EC task creation and assignment..." |
|||
@echo "💡 Monitor the admin UI to see:" |
|||
@echo " • Tasks being created for volumes needing EC" |
|||
@echo " • Workers picking up tasks" |
|||
@echo " • Task progress (pending → running → completed)" |
|||
@echo " • EC shards being distributed" |
|||
@echo "" |
|||
@echo "✅ Integration test setup complete!" |
|||
@echo "📊 Monitor progress at: http://localhost:23646/" |
|||
|
|||
quick-test: ## Quick verification that core services are running
|
|||
@echo "⚡ Quick Health Check" |
|||
@echo "====================" |
|||
@echo "Master: $$(curl -s http://localhost:9333/cluster/status | jq -r '.IsLeader // "not ready"')" |
|||
@echo "Admin: $$(curl -s http://localhost:23646/ | grep -q "Admin" && echo "ready" || echo "not ready")" |
|||
@echo "Workers: $$(docker-compose -f $(COMPOSE_FILE) ps worker1 worker2 worker3 | grep -c Up) running" |
|||
|
|||
validate: ## Validate integration test configuration
|
|||
@echo "🔍 Validating Integration Test Configuration" |
|||
@echo "===========================================" |
|||
@chmod +x test-integration.sh |
|||
@./test-integration.sh |
|||
|
|||
demo: start ## Start cluster and run demonstration
|
|||
@echo "🎭 SeaweedFS Admin-Worker Demo" |
|||
@echo "=============================" |
|||
@echo "" |
|||
@echo "⏳ Waiting for services to start..." |
|||
@sleep 45 |
|||
@echo "" |
|||
@echo "🎯 Demo Overview:" |
|||
@echo " • 1 Master server (coordinates cluster)" |
|||
@echo " • 6 Volume servers (50MB volume limit)" |
|||
@echo " • 1 Admin server (task management)" |
|||
@echo " • 3 Workers (execute EC tasks)" |
|||
@echo " • Load generator (creates files continuously)" |
|||
@echo "" |
|||
@echo "📊 Watch the process:" |
|||
@echo " 1. Visit: http://localhost:23646/" |
|||
@echo " 2. Observe workers connecting" |
|||
@echo " 3. Watch tasks being created and assigned" |
|||
@echo " 4. See tasks progress from pending → completed" |
|||
@echo "" |
|||
@echo "🔄 The demo will:" |
|||
@echo " • Fill volumes to 50MB limit" |
|||
@echo " • Admin detects volumes needing EC" |
|||
@echo " • Workers receive and execute EC tasks" |
|||
@echo " • Tasks complete with shard distribution" |
|||
@echo "" |
|||
@echo "💡 Use 'make worker-logs' to see worker activity" |
|||
@echo "💡 Use 'make admin-logs' to see admin task management" |
|||
@ -1,521 +0,0 @@ |
|||
#!/bin/sh |
|||
|
|||
set -e |
|||
|
|||
echo "Starting SeaweedFS Admin Server..." |
|||
echo "Master Address: $MASTER_ADDRESS" |
|||
echo "Admin Port: $ADMIN_PORT" |
|||
echo "Scan Interval: $SCAN_INTERVAL" |
|||
|
|||
# Wait for master to be ready |
|||
echo "Waiting for master to be ready..." |
|||
until curl -f http://$MASTER_ADDRESS/cluster/status > /dev/null 2>&1; do |
|||
echo "Master not ready, waiting..." |
|||
sleep 5 |
|||
done |
|||
echo "Master is ready!" |
|||
|
|||
# For now, use a simple HTTP server to simulate admin functionality |
|||
# In a real implementation, this would start the actual admin server |
|||
cat > /tmp/admin_server.go << 'EOF' |
|||
package main |
|||
|
|||
import ( |
|||
"encoding/json" |
|||
"fmt" |
|||
"log" |
|||
"net/http" |
|||
"os" |
|||
"time" |
|||
) |
|||
|
|||
type AdminServer struct { |
|||
masterAddr string |
|||
port string |
|||
startTime time.Time |
|||
tasks []Task |
|||
workers []Worker |
|||
} |
|||
|
|||
type Task struct { |
|||
ID string `json:"id"` |
|||
Type string `json:"type"` |
|||
VolumeID int `json:"volume_id"` |
|||
Status string `json:"status"` |
|||
Progress float64 `json:"progress"` |
|||
Created time.Time `json:"created"` |
|||
} |
|||
|
|||
type Worker struct { |
|||
ID string `json:"id"` |
|||
Address string `json:"address"` |
|||
Capabilities []string `json:"capabilities"` |
|||
Status string `json:"status"` |
|||
LastSeen time.Time `json:"last_seen"` |
|||
} |
|||
|
|||
func (s *AdminServer) healthHandler(w http.ResponseWriter, r *http.Request) { |
|||
w.Header().Set("Content-Type", "application/json") |
|||
json.NewEncoder(w).Encode(map[string]interface{}{ |
|||
"status": "healthy", |
|||
"uptime": time.Since(s.startTime).String(), |
|||
"tasks": len(s.tasks), |
|||
"workers": len(s.workers), |
|||
}) |
|||
} |
|||
|
|||
func (s *AdminServer) statusHandler(w http.ResponseWriter, r *http.Request) { |
|||
w.Header().Set("Content-Type", "application/json") |
|||
json.NewEncoder(w).Encode(map[string]interface{}{ |
|||
"admin_server": "running", |
|||
"master_addr": s.masterAddr, |
|||
"tasks": s.tasks, |
|||
"workers": s.workers, |
|||
"uptime": time.Since(s.startTime).String(), |
|||
}) |
|||
} |
|||
|
|||
func (s *AdminServer) registerWorkerHandler(w http.ResponseWriter, r *http.Request) { |
|||
if r.Method != "POST" { |
|||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) |
|||
return |
|||
} |
|||
|
|||
var worker Worker |
|||
if err := json.NewDecoder(r.Body).Decode(&worker); err != nil { |
|||
http.Error(w, "Invalid JSON", http.StatusBadRequest) |
|||
return |
|||
} |
|||
|
|||
worker.LastSeen = time.Now() |
|||
worker.Status = "active" |
|||
|
|||
// Check if worker already exists, update if so |
|||
found := false |
|||
for i, w := range s.workers { |
|||
if w.ID == worker.ID { |
|||
s.workers[i] = worker |
|||
found = true |
|||
break |
|||
} |
|||
} |
|||
|
|||
if !found { |
|||
s.workers = append(s.workers, worker) |
|||
log.Printf("Registered new worker: %s with capabilities: %v", worker.ID, worker.Capabilities) |
|||
} else { |
|||
log.Printf("Updated worker: %s", worker.ID) |
|||
} |
|||
|
|||
w.Header().Set("Content-Type", "application/json") |
|||
json.NewEncoder(w).Encode(map[string]string{"status": "registered"}) |
|||
} |
|||
|
|||
func (s *AdminServer) heartbeatHandler(w http.ResponseWriter, r *http.Request) { |
|||
if r.Method != "POST" { |
|||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) |
|||
return |
|||
} |
|||
|
|||
var heartbeat struct { |
|||
WorkerID string `json:"worker_id"` |
|||
Status string `json:"status"` |
|||
} |
|||
|
|||
if err := json.NewDecoder(r.Body).Decode(&heartbeat); err != nil { |
|||
http.Error(w, "Invalid JSON", http.StatusBadRequest) |
|||
return |
|||
} |
|||
|
|||
// Update worker last seen time |
|||
for i, w := range s.workers { |
|||
if w.ID == heartbeat.WorkerID { |
|||
s.workers[i].LastSeen = time.Now() |
|||
s.workers[i].Status = heartbeat.Status |
|||
break |
|||
} |
|||
} |
|||
|
|||
w.Header().Set("Content-Type", "application/json") |
|||
json.NewEncoder(w).Encode(map[string]string{"status": "ok"}) |
|||
} |
|||
|
|||
func (s *AdminServer) assignTaskHandler(w http.ResponseWriter, r *http.Request) { |
|||
if r.Method != "POST" { |
|||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) |
|||
return |
|||
} |
|||
|
|||
var request struct { |
|||
WorkerID string `json:"worker_id"` |
|||
Capabilities []string `json:"capabilities"` |
|||
} |
|||
|
|||
if err := json.NewDecoder(r.Body).Decode(&request); err != nil { |
|||
http.Error(w, "Invalid JSON", http.StatusBadRequest) |
|||
return |
|||
} |
|||
|
|||
// Find a pending task that matches worker capabilities |
|||
for i, task := range s.tasks { |
|||
if task.Status == "pending" { |
|||
// Assign task to worker |
|||
s.tasks[i].Status = "assigned" |
|||
|
|||
w.Header().Set("Content-Type", "application/json") |
|||
json.NewEncoder(w).Encode(map[string]interface{}{ |
|||
"task_id": task.ID, |
|||
"type": task.Type, |
|||
"volume_id": task.VolumeID, |
|||
"parameters": map[string]interface{}{ |
|||
"server": "volume1:8080", // Simplified assignment |
|||
}, |
|||
}) |
|||
|
|||
log.Printf("Assigned task %s to worker %s", task.ID, request.WorkerID) |
|||
return |
|||
} |
|||
} |
|||
|
|||
// No tasks available |
|||
w.Header().Set("Content-Type", "application/json") |
|||
json.NewEncoder(w).Encode(map[string]string{"status": "no_tasks"}) |
|||
} |
|||
|
|||
func (s *AdminServer) taskProgressHandler(w http.ResponseWriter, r *http.Request) { |
|||
if r.Method != "POST" { |
|||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) |
|||
return |
|||
} |
|||
|
|||
var progress struct { |
|||
TaskID string `json:"task_id"` |
|||
Progress float64 `json:"progress"` |
|||
Status string `json:"status"` |
|||
Message string `json:"message"` |
|||
} |
|||
|
|||
if err := json.NewDecoder(r.Body).Decode(&progress); err != nil { |
|||
http.Error(w, "Invalid JSON", http.StatusBadRequest) |
|||
return |
|||
} |
|||
|
|||
// Update task progress |
|||
for i, task := range s.tasks { |
|||
if task.ID == progress.TaskID { |
|||
s.tasks[i].Progress = progress.Progress |
|||
s.tasks[i].Status = progress.Status |
|||
|
|||
log.Printf("Task %s: %.1f%% - %s", progress.TaskID, progress.Progress, progress.Message) |
|||
break |
|||
} |
|||
} |
|||
|
|||
w.Header().Set("Content-Type", "application/json") |
|||
json.NewEncoder(w).Encode(map[string]string{"status": "updated"}) |
|||
} |
|||
|
|||
func (s *AdminServer) webUIHandler(w http.ResponseWriter, r *http.Request) { |
|||
w.Header().Set("Content-Type", "text/html") |
|||
|
|||
html := `<!DOCTYPE html> |
|||
<html> |
|||
<head> |
|||
<title>SeaweedFS Admin - EC Task Monitor</title> |
|||
<meta charset="UTF-8"> |
|||
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|||
<style> |
|||
body { |
|||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; |
|||
margin: 0; padding: 20px; background: #f5f5f5; |
|||
} |
|||
.container { max-width: 1200px; margin: 0 auto; } |
|||
.header { |
|||
background: #2c3e50; color: white; padding: 20px; border-radius: 8px; |
|||
margin-bottom: 20px; text-align: center; |
|||
} |
|||
.stats { |
|||
display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); |
|||
gap: 20px; margin-bottom: 20px; |
|||
} |
|||
.stat-card { |
|||
background: white; padding: 20px; border-radius: 8px; |
|||
box-shadow: 0 2px 4px rgba(0,0,0,0.1); text-align: center; |
|||
} |
|||
.stat-number { font-size: 2em; font-weight: bold; color: #3498db; } |
|||
.stat-label { color: #7f8c8d; margin-top: 5px; } |
|||
.section { |
|||
background: white; border-radius: 8px; margin-bottom: 20px; |
|||
box-shadow: 0 2px 4px rgba(0,0,0,0.1); overflow: hidden; |
|||
} |
|||
.section-header { |
|||
background: #34495e; color: white; padding: 15px 20px; |
|||
font-weight: bold; font-size: 1.1em; |
|||
} |
|||
.section-content { padding: 20px; } |
|||
table { width: 100%; border-collapse: collapse; } |
|||
th, td { padding: 12px; text-align: left; border-bottom: 1px solid #ecf0f1; } |
|||
th { background: #f8f9fa; font-weight: 600; } |
|||
.status-pending { color: #f39c12; font-weight: bold; } |
|||
.status-assigned { color: #3498db; font-weight: bold; } |
|||
.status-running { color: #e67e22; font-weight: bold; } |
|||
.status-completed { color: #27ae60; font-weight: bold; } |
|||
.status-failed { color: #e74c3c; font-weight: bold; } |
|||
.progress-bar { |
|||
background: #ecf0f1; height: 20px; border-radius: 10px; |
|||
overflow: hidden; position: relative; |
|||
} |
|||
.progress-fill { |
|||
height: 100%; background: linear-gradient(90deg, #3498db, #2ecc71); |
|||
transition: width 0.3s ease; |
|||
} |
|||
.progress-text { |
|||
position: absolute; top: 50%; left: 50%; transform: translate(-50%, -50%); |
|||
font-size: 0.8em; font-weight: bold; color: white; text-shadow: 1px 1px 2px rgba(0,0,0,0.5); |
|||
} |
|||
.worker-online { color: #27ae60; } |
|||
.worker-offline { color: #e74c3c; } |
|||
.refresh-btn { |
|||
background: #3498db; color: white; padding: 10px 20px; |
|||
border: none; border-radius: 5px; cursor: pointer; margin-bottom: 20px; |
|||
} |
|||
.refresh-btn:hover { background: #2980b9; } |
|||
.last-updated { color: #7f8c8d; font-size: 0.9em; text-align: center; margin-top: 20px; } |
|||
</style> |
|||
</head> |
|||
<body> |
|||
<div class="container"> |
|||
<div class="header"> |
|||
<h1>🧪 SeaweedFS EC Task Monitor</h1> |
|||
<p>Real-time Erasure Coding Task Management Dashboard</p> |
|||
</div> |
|||
|
|||
<button class="refresh-btn" onclick="location.reload()">🔄 Refresh</button> |
|||
|
|||
<div class="stats" id="stats"> |
|||
<div class="stat-card"> |
|||
<div class="stat-number" id="total-tasks">0</div> |
|||
<div class="stat-label">Total Tasks</div> |
|||
</div> |
|||
<div class="stat-card"> |
|||
<div class="stat-number" id="active-workers">0</div> |
|||
<div class="stat-label">Active Workers</div> |
|||
</div> |
|||
<div class="stat-card"> |
|||
<div class="stat-number" id="completed-tasks">0</div> |
|||
<div class="stat-label">Completed</div> |
|||
</div> |
|||
<div class="stat-card"> |
|||
<div class="stat-number" id="uptime">--</div> |
|||
<div class="stat-label">Uptime</div> |
|||
</div> |
|||
</div> |
|||
|
|||
<div class="section"> |
|||
<div class="section-header">📋 EC Tasks</div> |
|||
<div class="section-content"> |
|||
<table> |
|||
<thead> |
|||
<tr> |
|||
<th>Task ID</th> |
|||
<th>Type</th> |
|||
<th>Volume ID</th> |
|||
<th>Status</th> |
|||
<th>Progress</th> |
|||
<th>Created</th> |
|||
</tr> |
|||
</thead> |
|||
<tbody id="tasks-table"> |
|||
<tr><td colspan="6" style="text-align: center; color: #7f8c8d;">Loading tasks...</td></tr> |
|||
</tbody> |
|||
</table> |
|||
</div> |
|||
</div> |
|||
|
|||
<div class="section"> |
|||
<div class="section-header">⚙️ Workers</div> |
|||
<div class="section-content"> |
|||
<table> |
|||
<thead> |
|||
<tr> |
|||
<th>Worker ID</th> |
|||
<th>Address</th> |
|||
<th>Status</th> |
|||
<th>Capabilities</th> |
|||
<th>Last Seen</th> |
|||
</tr> |
|||
</thead> |
|||
<tbody id="workers-table"> |
|||
<tr><td colspan="5" style="text-align: center; color: #7f8c8d;">Loading workers...</td></tr> |
|||
</tbody> |
|||
</table> |
|||
</div> |
|||
</div> |
|||
|
|||
<div class="last-updated"> |
|||
Last updated: <span id="last-updated">--</span> | |
|||
Auto-refresh every 5 seconds |
|||
</div> |
|||
</div> |
|||
|
|||
<script> |
|||
function formatTime(timestamp) { |
|||
return new Date(timestamp).toLocaleString(); |
|||
} |
|||
|
|||
function formatUptime(uptime) { |
|||
if (!uptime) return '--'; |
|||
const matches = uptime.match(/(\d+h)?(\d+m)?(\d+\.?\d*s)?/); |
|||
if (!matches) return uptime; |
|||
|
|||
let parts = []; |
|||
if (matches[1]) parts.push(matches[1]); |
|||
if (matches[2]) parts.push(matches[2]); |
|||
if (matches[3] && !matches[1]) parts.push(matches[3]); |
|||
|
|||
return parts.join(' ') || uptime; |
|||
} |
|||
|
|||
function updateData() { |
|||
fetch('/status') |
|||
.then(response => response.json()) |
|||
.then(data => { |
|||
// Update stats |
|||
document.getElementById('total-tasks').textContent = data.tasks.length; |
|||
document.getElementById('active-workers').textContent = data.workers.length; |
|||
document.getElementById('completed-tasks').textContent = |
|||
data.tasks.filter(function(t) { return t.status === 'completed'; }).length; |
|||
document.getElementById('uptime').textContent = formatUptime(data.uptime); |
|||
|
|||
// Update tasks table |
|||
const tasksTable = document.getElementById('tasks-table'); |
|||
if (data.tasks.length === 0) { |
|||
tasksTable.innerHTML = '<tr><td colspan="6" style="text-align: center; color: #7f8c8d;">No tasks available</td></tr>'; |
|||
} else { |
|||
tasksTable.innerHTML = data.tasks.map(task => '<tr>' + |
|||
'<td><code>' + task.id + '</code></td>' + |
|||
'<td>' + task.type + '</td>' + |
|||
'<td>' + task.volume_id + '</td>' + |
|||
'<td><span class="status-' + task.status + '">' + task.status.toUpperCase() + '</span></td>' + |
|||
'<td><div class="progress-bar">' + |
|||
'<div class="progress-fill" style="width: ' + task.progress + '%"></div>' + |
|||
'<div class="progress-text">' + task.progress.toFixed(1) + '%</div>' + |
|||
'</div></td>' + |
|||
'<td>' + formatTime(task.created) + '</td>' + |
|||
'</tr>').join(''); |
|||
} |
|||
|
|||
// Update workers table |
|||
const workersTable = document.getElementById('workers-table'); |
|||
if (data.workers.length === 0) { |
|||
workersTable.innerHTML = '<tr><td colspan="5" style="text-align: center; color: #7f8c8d;">No workers registered</td></tr>'; |
|||
} else { |
|||
workersTable.innerHTML = data.workers.map(worker => '<tr>' + |
|||
'<td><strong>' + worker.id + '</strong></td>' + |
|||
'<td>' + (worker.address || 'N/A') + '</td>' + |
|||
'<td><span class="worker-' + (worker.status === 'active' ? 'online' : 'offline') + '">' + worker.status.toUpperCase() + '</span></td>' + |
|||
'<td>' + worker.capabilities.join(', ') + '</td>' + |
|||
'<td>' + formatTime(worker.last_seen) + '</td>' + |
|||
'</tr>').join(''); |
|||
} |
|||
|
|||
document.getElementById('last-updated').textContent = new Date().toLocaleTimeString(); |
|||
}) |
|||
.catch(error => { |
|||
console.error('Failed to fetch data:', error); |
|||
}); |
|||
} |
|||
|
|||
// Initial load |
|||
updateData(); |
|||
|
|||
// Auto-refresh every 5 seconds |
|||
setInterval(updateData, 5000); |
|||
</script> |
|||
</body> |
|||
</html>` |
|||
|
|||
fmt.Fprint(w, html) |
|||
} |
|||
|
|||
func (s *AdminServer) detectVolumesForEC() { |
|||
// Simulate volume detection logic |
|||
// In real implementation, this would query the master for volume status |
|||
ticker := time.NewTicker(30 * time.Second) |
|||
go func() { |
|||
for range ticker.C { |
|||
log.Println("Scanning for volumes requiring EC...") |
|||
|
|||
// Check master for volume status |
|||
resp, err := http.Get(fmt.Sprintf("http://%s/vol/status", s.masterAddr)) |
|||
if err != nil { |
|||
log.Printf("Error checking master: %v", err) |
|||
continue |
|||
} |
|||
resp.Body.Close() |
|||
|
|||
// Simulate detecting a volume that needs EC |
|||
if len(s.tasks) < 5 { // Don't create too many tasks |
|||
taskID := fmt.Sprintf("ec-task-%d", len(s.tasks)+1) |
|||
volumeID := 1000 + len(s.tasks) |
|||
|
|||
task := Task{ |
|||
ID: taskID, |
|||
Type: "erasure_coding", |
|||
VolumeID: volumeID, |
|||
Status: "pending", |
|||
Progress: 0.0, |
|||
Created: time.Now(), |
|||
} |
|||
|
|||
s.tasks = append(s.tasks, task) |
|||
log.Printf("Created EC task %s for volume %d", taskID, volumeID) |
|||
} |
|||
} |
|||
}() |
|||
} |
|||
|
|||
func main() { |
|||
masterAddr := os.Getenv("MASTER_ADDRESS") |
|||
if masterAddr == "" { |
|||
masterAddr = "master:9333" |
|||
} |
|||
|
|||
port := os.Getenv("ADMIN_PORT") |
|||
if port == "" { |
|||
port = "9900" |
|||
} |
|||
|
|||
server := &AdminServer{ |
|||
masterAddr: masterAddr, |
|||
port: port, |
|||
startTime: time.Now(), |
|||
tasks: make([]Task, 0), |
|||
workers: make([]Worker, 0), |
|||
} |
|||
|
|||
http.HandleFunc("/health", server.healthHandler) |
|||
http.HandleFunc("/status", server.statusHandler) |
|||
http.HandleFunc("/register", server.registerWorkerHandler) |
|||
http.HandleFunc("/register-worker", server.registerWorkerHandler) // Worker compatibility |
|||
http.HandleFunc("/heartbeat", server.heartbeatHandler) |
|||
http.HandleFunc("/assign-task", server.assignTaskHandler) |
|||
http.HandleFunc("/task-progress", server.taskProgressHandler) |
|||
http.HandleFunc("/", server.webUIHandler) // Web UI |
|||
|
|||
// Start volume detection |
|||
server.detectVolumesForEC() |
|||
|
|||
log.Printf("Admin server starting on port %s", port) |
|||
log.Printf("Master address: %s", masterAddr) |
|||
|
|||
if err := http.ListenAndServe(":"+port, nil); err != nil { |
|||
log.Fatal("Server failed to start:", err) |
|||
} |
|||
} |
|||
EOF |
|||
|
|||
# Compile and run the admin server |
|||
cd /tmp |
|||
go mod init admin-server |
|||
go run admin_server.go |
|||
@ -1,73 +0,0 @@ |
|||
#!/bin/sh |
|||
|
|||
set -e |
|||
|
|||
echo "Starting SeaweedFS Admin Server (gRPC)..." |
|||
echo "Master Address: $MASTER_ADDRESS" |
|||
echo "Admin HTTP Port: $ADMIN_PORT" |
|||
echo "Admin gRPC Port: $GRPC_PORT" |
|||
|
|||
# Wait for master to be ready |
|||
echo "Waiting for master to be ready..." |
|||
until wget --quiet --tries=1 --spider http://$MASTER_ADDRESS/cluster/status > /dev/null 2>&1; do |
|||
echo "Master not ready, waiting..." |
|||
sleep 5 |
|||
done |
|||
echo "Master is ready!" |
|||
|
|||
# Install protobuf compiler and Go protobuf plugins |
|||
apk add --no-cache protobuf protobuf-dev |
|||
|
|||
# Set up Go environment |
|||
export GOPATH=/tmp/go |
|||
export PATH=$PATH:$GOPATH/bin |
|||
mkdir -p $GOPATH/src $GOPATH/bin $GOPATH/pkg |
|||
|
|||
# Install Go protobuf plugins globally first |
|||
export GOPATH=/tmp/go |
|||
mkdir -p $GOPATH |
|||
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest |
|||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest |
|||
|
|||
# Set up working directory for compilation |
|||
cd /tmp |
|||
mkdir -p admin-project |
|||
cd admin-project |
|||
|
|||
# Create a basic go.mod with required dependencies |
|||
cat > go.mod << 'EOF' |
|||
module admin-server |
|||
|
|||
go 1.24 |
|||
|
|||
require ( |
|||
google.golang.org/grpc v1.65.0 |
|||
google.golang.org/protobuf v1.34.2 |
|||
) |
|||
EOF |
|||
|
|||
go mod tidy |
|||
|
|||
# Add Go bin to PATH |
|||
export PATH=$PATH:$(go env GOPATH)/bin |
|||
|
|||
# Create directory structure for protobuf |
|||
mkdir -p worker_pb |
|||
|
|||
# Copy the admin server source and existing worker protobuf file |
|||
cp /admin_grpc_server.go . |
|||
cp /worker.proto . |
|||
|
|||
# Generate Go code from the existing worker protobuf |
|||
echo "Generating gRPC code from worker.proto..." |
|||
protoc --go_out=. --go_opt=paths=source_relative \ |
|||
--go-grpc_out=. --go-grpc_opt=paths=source_relative \ |
|||
worker.proto |
|||
|
|||
# Build and run the admin server |
|||
echo "Building admin server..." |
|||
go mod tidy |
|||
go build -o admin-server admin_grpc_server.go |
|||
|
|||
echo "Starting admin server..." |
|||
exec ./admin-server |
|||
@ -1,663 +0,0 @@ |
|||
package main |
|||
|
|||
import ( |
|||
"encoding/json" |
|||
"fmt" |
|||
"io" |
|||
"log" |
|||
"net" |
|||
"net/http" |
|||
"os" |
|||
"sync" |
|||
"time" |
|||
|
|||
pb "github.com/seaweedfs/seaweedfs/weed/pb/worker_pb" |
|||
"google.golang.org/grpc" |
|||
"google.golang.org/grpc/reflection" |
|||
) |
|||
|
|||
type AdminServer struct { |
|||
pb.UnimplementedWorkerServiceServer |
|||
|
|||
// Server configuration
|
|||
grpcPort string |
|||
httpPort string |
|||
masterAddr string |
|||
startTime time.Time |
|||
|
|||
// Data storage
|
|||
workers map[string]*WorkerInfo |
|||
tasks map[string]*TaskInfo |
|||
taskQueue []string |
|||
|
|||
// Synchronization
|
|||
mu sync.RWMutex |
|||
|
|||
// Active streams for workers
|
|||
workerStreams map[string]pb.WorkerService_WorkerStreamServer |
|||
streamMu sync.RWMutex |
|||
} |
|||
|
|||
type WorkerInfo struct { |
|||
ID string |
|||
Address string |
|||
Capabilities []string |
|||
MaxConcurrent int32 |
|||
Status string |
|||
CurrentLoad int32 |
|||
LastSeen time.Time |
|||
TasksCompleted int32 |
|||
TasksFailed int32 |
|||
UptimeSeconds int64 |
|||
Stream pb.WorkerService_WorkerStreamServer |
|||
} |
|||
|
|||
type TaskInfo struct { |
|||
ID string |
|||
Type string |
|||
VolumeID uint32 |
|||
Status string |
|||
Progress float32 |
|||
AssignedTo string |
|||
Created time.Time |
|||
Updated time.Time |
|||
Server string |
|||
Collection string |
|||
DataCenter string |
|||
Rack string |
|||
Parameters map[string]string |
|||
} |
|||
|
|||
// gRPC service implementation
|
|||
func (s *AdminServer) WorkerStream(stream pb.WorkerService_WorkerStreamServer) error { |
|||
var workerID string |
|||
|
|||
for { |
|||
req, err := stream.Recv() |
|||
if err == io.EOF { |
|||
log.Printf("Worker %s disconnected", workerID) |
|||
s.removeWorkerStream(workerID) |
|||
return nil |
|||
} |
|||
if err != nil { |
|||
log.Printf("Stream error from worker %s: %v", workerID, err) |
|||
s.removeWorkerStream(workerID) |
|||
return err |
|||
} |
|||
|
|||
// Handle different message types
|
|||
switch msg := req.Message.(type) { |
|||
case *pb.WorkerMessage_Registration: |
|||
workerID = msg.Registration.WorkerId |
|||
s.handleWorkerRegistration(workerID, msg.Registration, stream) |
|||
|
|||
case *pb.WorkerMessage_Heartbeat: |
|||
s.handleWorkerHeartbeat(msg.Heartbeat, stream) |
|||
|
|||
case *pb.WorkerMessage_TaskRequest: |
|||
s.handleTaskRequest(msg.TaskRequest, stream) |
|||
|
|||
case *pb.WorkerMessage_TaskUpdate: |
|||
s.handleTaskUpdate(msg.TaskUpdate) |
|||
|
|||
case *pb.WorkerMessage_TaskComplete: |
|||
s.handleTaskComplete(msg.TaskComplete) |
|||
|
|||
case *pb.WorkerMessage_Shutdown: |
|||
log.Printf("Worker %s shutting down: %s", msg.Shutdown.WorkerId, msg.Shutdown.Reason) |
|||
s.removeWorkerStream(msg.Shutdown.WorkerId) |
|||
return nil |
|||
} |
|||
} |
|||
} |
|||
|
|||
func (s *AdminServer) handleWorkerRegistration(workerID string, reg *pb.WorkerRegistration, stream pb.WorkerService_WorkerStreamServer) { |
|||
s.mu.Lock() |
|||
defer s.mu.Unlock() |
|||
|
|||
worker := &WorkerInfo{ |
|||
ID: reg.WorkerId, |
|||
Address: reg.Address, |
|||
Capabilities: reg.Capabilities, |
|||
MaxConcurrent: reg.MaxConcurrent, |
|||
Status: "active", |
|||
CurrentLoad: 0, |
|||
LastSeen: time.Now(), |
|||
TasksCompleted: 0, |
|||
TasksFailed: 0, |
|||
UptimeSeconds: 0, |
|||
Stream: stream, |
|||
} |
|||
|
|||
s.workers[reg.WorkerId] = worker |
|||
s.addWorkerStream(reg.WorkerId, stream) |
|||
|
|||
log.Printf("Registered worker %s with capabilities: %v", reg.WorkerId, reg.Capabilities) |
|||
|
|||
// Send registration response
|
|||
response := &pb.AdminMessage{ |
|||
AdminId: "admin-server", |
|||
Timestamp: time.Now().Unix(), |
|||
Message: &pb.AdminMessage_RegistrationResponse{ |
|||
RegistrationResponse: &pb.RegistrationResponse{ |
|||
Success: true, |
|||
Message: "Worker registered successfully", |
|||
AssignedWorkerId: reg.WorkerId, |
|||
}, |
|||
}, |
|||
} |
|||
|
|||
if err := stream.Send(response); err != nil { |
|||
log.Printf("Failed to send registration response to worker %s: %v", reg.WorkerId, err) |
|||
} |
|||
} |
|||
|
|||
func (s *AdminServer) handleWorkerHeartbeat(heartbeat *pb.WorkerHeartbeat, stream pb.WorkerService_WorkerStreamServer) { |
|||
s.mu.Lock() |
|||
defer s.mu.Unlock() |
|||
|
|||
if worker, exists := s.workers[heartbeat.WorkerId]; exists { |
|||
worker.Status = heartbeat.Status |
|||
worker.CurrentLoad = heartbeat.CurrentLoad |
|||
worker.LastSeen = time.Now() |
|||
worker.TasksCompleted = heartbeat.TasksCompleted |
|||
worker.TasksFailed = heartbeat.TasksFailed |
|||
worker.UptimeSeconds = heartbeat.UptimeSeconds |
|||
|
|||
log.Printf("Heartbeat from worker %s: status=%s, load=%d/%d", |
|||
heartbeat.WorkerId, heartbeat.Status, heartbeat.CurrentLoad, heartbeat.MaxConcurrent) |
|||
} |
|||
|
|||
// Send heartbeat response
|
|||
response := &pb.AdminMessage{ |
|||
AdminId: "admin-server", |
|||
Timestamp: time.Now().Unix(), |
|||
Message: &pb.AdminMessage_HeartbeatResponse{ |
|||
HeartbeatResponse: &pb.HeartbeatResponse{ |
|||
Success: true, |
|||
Message: "Heartbeat received", |
|||
}, |
|||
}, |
|||
} |
|||
|
|||
if err := stream.Send(response); err != nil { |
|||
log.Printf("Failed to send heartbeat response to worker %s: %v", heartbeat.WorkerId, err) |
|||
} |
|||
} |
|||
|
|||
func (s *AdminServer) handleTaskRequest(taskReq *pb.TaskRequest, stream pb.WorkerService_WorkerStreamServer) { |
|||
s.mu.Lock() |
|||
defer s.mu.Unlock() |
|||
|
|||
// Find a pending task that matches worker capabilities
|
|||
for taskID, task := range s.tasks { |
|||
if task.Status == "pending" { |
|||
// Check if worker has required capability
|
|||
hasCapability := false |
|||
for _, capability := range taskReq.Capabilities { |
|||
if capability == task.Type { |
|||
hasCapability = true |
|||
break |
|||
} |
|||
} |
|||
|
|||
if hasCapability && taskReq.AvailableSlots > 0 { |
|||
// Assign task to worker
|
|||
task.Status = "assigned" |
|||
task.AssignedTo = taskReq.WorkerId |
|||
task.Updated = time.Now() |
|||
|
|||
log.Printf("Assigned task %s (volume %d) to worker %s", taskID, task.VolumeID, taskReq.WorkerId) |
|||
|
|||
// Send task assignment
|
|||
response := &pb.AdminMessage{ |
|||
AdminId: "admin-server", |
|||
Timestamp: time.Now().Unix(), |
|||
Message: &pb.AdminMessage_TaskAssignment{ |
|||
TaskAssignment: &pb.TaskAssignment{ |
|||
TaskId: taskID, |
|||
TaskType: task.Type, |
|||
Priority: 1, |
|||
CreatedTime: task.Created.Unix(), |
|||
Params: &pb.TaskParams{ |
|||
VolumeId: task.VolumeID, |
|||
Server: task.Server, |
|||
Collection: task.Collection, |
|||
DataCenter: task.DataCenter, |
|||
Rack: task.Rack, |
|||
Parameters: task.Parameters, |
|||
}, |
|||
}, |
|||
}, |
|||
} |
|||
|
|||
if err := stream.Send(response); err != nil { |
|||
log.Printf("Failed to send task assignment to worker %s: %v", taskReq.WorkerId, err) |
|||
} |
|||
return |
|||
} |
|||
} |
|||
} |
|||
|
|||
log.Printf("No suitable tasks available for worker %s", taskReq.WorkerId) |
|||
} |
|||
|
|||
func (s *AdminServer) handleTaskUpdate(update *pb.TaskUpdate) { |
|||
s.mu.Lock() |
|||
defer s.mu.Unlock() |
|||
|
|||
if task, exists := s.tasks[update.TaskId]; exists { |
|||
task.Progress = update.Progress |
|||
task.Status = update.Status |
|||
task.Updated = time.Now() |
|||
|
|||
log.Printf("Task %s progress: %.1f%% - %s", update.TaskId, update.Progress, update.Message) |
|||
} |
|||
} |
|||
|
|||
func (s *AdminServer) handleTaskComplete(complete *pb.TaskComplete) { |
|||
s.mu.Lock() |
|||
defer s.mu.Unlock() |
|||
|
|||
if task, exists := s.tasks[complete.TaskId]; exists { |
|||
if complete.Success { |
|||
task.Status = "completed" |
|||
task.Progress = 100.0 |
|||
} else { |
|||
task.Status = "failed" |
|||
} |
|||
task.Updated = time.Now() |
|||
|
|||
// Update worker stats
|
|||
if worker, workerExists := s.workers[complete.WorkerId]; workerExists { |
|||
if complete.Success { |
|||
worker.TasksCompleted++ |
|||
} else { |
|||
worker.TasksFailed++ |
|||
} |
|||
if worker.CurrentLoad > 0 { |
|||
worker.CurrentLoad-- |
|||
} |
|||
} |
|||
|
|||
log.Printf("Task %s completed by worker %s: success=%v", complete.TaskId, complete.WorkerId, complete.Success) |
|||
} |
|||
} |
|||
|
|||
func (s *AdminServer) addWorkerStream(workerID string, stream pb.WorkerService_WorkerStreamServer) { |
|||
s.streamMu.Lock() |
|||
defer s.streamMu.Unlock() |
|||
s.workerStreams[workerID] = stream |
|||
} |
|||
|
|||
func (s *AdminServer) removeWorkerStream(workerID string) { |
|||
s.streamMu.Lock() |
|||
defer s.streamMu.Unlock() |
|||
delete(s.workerStreams, workerID) |
|||
|
|||
s.mu.Lock() |
|||
defer s.mu.Unlock() |
|||
delete(s.workers, workerID) |
|||
} |
|||
|
|||
// HTTP handlers for web UI
|
|||
func (s *AdminServer) statusHandler(w http.ResponseWriter, r *http.Request) { |
|||
s.mu.RLock() |
|||
defer s.mu.RUnlock() |
|||
|
|||
// Convert internal data to JSON-friendly format
|
|||
taskList := make([]map[string]interface{}, 0, len(s.tasks)) |
|||
for _, task := range s.tasks { |
|||
taskList = append(taskList, map[string]interface{}{ |
|||
"id": task.ID, |
|||
"type": task.Type, |
|||
"volume_id": task.VolumeID, |
|||
"status": task.Status, |
|||
"progress": task.Progress, |
|||
"created": task.Created, |
|||
}) |
|||
} |
|||
|
|||
workerList := make([]map[string]interface{}, 0, len(s.workers)) |
|||
for _, worker := range s.workers { |
|||
workerList = append(workerList, map[string]interface{}{ |
|||
"id": worker.ID, |
|||
"address": worker.Address, |
|||
"capabilities": worker.Capabilities, |
|||
"status": worker.Status, |
|||
"last_seen": worker.LastSeen, |
|||
}) |
|||
} |
|||
|
|||
w.Header().Set("Content-Type", "application/json") |
|||
json.NewEncoder(w).Encode(map[string]interface{}{ |
|||
"admin_server": "running", |
|||
"master_addr": s.masterAddr, |
|||
"tasks": taskList, |
|||
"workers": workerList, |
|||
"uptime": time.Since(s.startTime).String(), |
|||
}) |
|||
} |
|||
|
|||
func (s *AdminServer) healthHandler(w http.ResponseWriter, r *http.Request) { |
|||
s.mu.RLock() |
|||
defer s.mu.RUnlock() |
|||
|
|||
w.Header().Set("Content-Type", "application/json") |
|||
json.NewEncoder(w).Encode(map[string]interface{}{ |
|||
"status": "healthy", |
|||
"uptime": time.Since(s.startTime).String(), |
|||
"tasks": len(s.tasks), |
|||
"workers": len(s.workers), |
|||
}) |
|||
} |
|||
|
|||
func (s *AdminServer) webUIHandler(w http.ResponseWriter, r *http.Request) { |
|||
w.Header().Set("Content-Type", "text/html") |
|||
|
|||
html := `<!DOCTYPE html> |
|||
<html> |
|||
<head> |
|||
<title>SeaweedFS Admin - EC Task Monitor</title> |
|||
<meta charset="UTF-8"> |
|||
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|||
<style> |
|||
body { |
|||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; |
|||
margin: 0; padding: 20px; background: #f5f5f5; |
|||
} |
|||
.container { max-width: 1200px; margin: 0 auto; } |
|||
.header { |
|||
background: #2c3e50; color: white; padding: 20px; border-radius: 8px; |
|||
margin-bottom: 20px; text-align: center; |
|||
} |
|||
.grpc-badge { |
|||
background: #9b59b6; color: white; padding: 4px 8px; |
|||
border-radius: 12px; font-size: 0.8em; margin-left: 10px; |
|||
} |
|||
.worker-badge { |
|||
background: #27ae60; color: white; padding: 4px 8px; |
|||
border-radius: 12px; font-size: 0.8em; margin-left: 10px; |
|||
} |
|||
.stats { |
|||
display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); |
|||
gap: 20px; margin-bottom: 20px; |
|||
} |
|||
.stat-card { |
|||
background: white; padding: 20px; border-radius: 8px; |
|||
box-shadow: 0 2px 4px rgba(0,0,0,0.1); text-align: center; |
|||
} |
|||
.stat-number { font-size: 2em; font-weight: bold; color: #3498db; } |
|||
.stat-label { color: #7f8c8d; margin-top: 5px; } |
|||
.section { |
|||
background: white; border-radius: 8px; margin-bottom: 20px; |
|||
box-shadow: 0 2px 4px rgba(0,0,0,0.1); overflow: hidden; |
|||
} |
|||
.section-header { |
|||
background: #34495e; color: white; padding: 15px 20px; |
|||
font-weight: bold; font-size: 1.1em; |
|||
} |
|||
.section-content { padding: 20px; } |
|||
table { width: 100%; border-collapse: collapse; } |
|||
th, td { padding: 12px; text-align: left; border-bottom: 1px solid #ecf0f1; } |
|||
th { background: #f8f9fa; font-weight: 600; } |
|||
.status-pending { color: #f39c12; font-weight: bold; } |
|||
.status-assigned { color: #3498db; font-weight: bold; } |
|||
.status-running { color: #e67e22; font-weight: bold; } |
|||
.status-completed { color: #27ae60; font-weight: bold; } |
|||
.status-failed { color: #e74c3c; font-weight: bold; } |
|||
.worker-online { color: #27ae60; } |
|||
.refresh-btn { |
|||
background: #3498db; color: white; padding: 10px 20px; |
|||
border: none; border-radius: 5px; cursor: pointer; margin-bottom: 20px; |
|||
} |
|||
.refresh-btn:hover { background: #2980b9; } |
|||
</style> |
|||
</head> |
|||
<body> |
|||
<div class="container"> |
|||
<div class="header"> |
|||
<h1>🧪 SeaweedFS EC Task Monitor |
|||
<span class="grpc-badge">gRPC Streaming</span> |
|||
<span class="worker-badge">worker.proto</span> |
|||
</h1> |
|||
<p>Real-time Erasure Coding Task Management Dashboard</p> |
|||
</div> |
|||
|
|||
<button class="refresh-btn" onclick="location.reload()">🔄 Refresh</button> |
|||
|
|||
<div class="stats" id="stats"> |
|||
<div class="stat-card"> |
|||
<div class="stat-number" id="total-tasks">0</div> |
|||
<div class="stat-label">Total Tasks</div> |
|||
</div> |
|||
<div class="stat-card"> |
|||
<div class="stat-number" id="active-workers">0</div> |
|||
<div class="stat-label">Active Workers</div> |
|||
</div> |
|||
<div class="stat-card"> |
|||
<div class="stat-number" id="completed-tasks">0</div> |
|||
<div class="stat-label">Completed</div> |
|||
</div> |
|||
<div class="stat-card"> |
|||
<div class="stat-number" id="uptime">--</div> |
|||
<div class="stat-label">Uptime</div> |
|||
</div> |
|||
</div> |
|||
|
|||
<div class="section"> |
|||
<div class="section-header">📋 EC Tasks</div> |
|||
<div class="section-content"> |
|||
<table> |
|||
<thead> |
|||
<tr> |
|||
<th>Task ID</th> |
|||
<th>Type</th> |
|||
<th>Volume ID</th> |
|||
<th>Status</th> |
|||
<th>Progress</th> |
|||
<th>Created</th> |
|||
</tr> |
|||
</thead> |
|||
<tbody id="tasks-table"> |
|||
<tr><td colspan="6" style="text-align: center; color: #7f8c8d;">Loading tasks...</td></tr> |
|||
</tbody> |
|||
</table> |
|||
</div> |
|||
</div> |
|||
|
|||
<div class="section"> |
|||
<div class="section-header">⚙️ Workers (via gRPC Streaming)</div> |
|||
<div class="section-content"> |
|||
<table> |
|||
<thead> |
|||
<tr> |
|||
<th>Worker ID</th> |
|||
<th>Address</th> |
|||
<th>Status</th> |
|||
<th>Capabilities</th> |
|||
<th>Last Seen</th> |
|||
</tr> |
|||
</thead> |
|||
<tbody id="workers-table"> |
|||
<tr><td colspan="5" style="text-align: center; color: #7f8c8d;">Loading workers...</td></tr> |
|||
</tbody> |
|||
</table> |
|||
</div> |
|||
</div> |
|||
</div> |
|||
|
|||
<script> |
|||
function formatTime(timestamp) { |
|||
return new Date(timestamp).toLocaleString(); |
|||
} |
|||
|
|||
function formatUptime(uptime) { |
|||
if (!uptime) return '--'; |
|||
const matches = uptime.match(/(\d+h)?(\d+m)?(\d+\.?\d*s)?/); |
|||
if (!matches) return uptime; |
|||
|
|||
let parts = []; |
|||
if (matches[1]) parts.push(matches[1]); |
|||
if (matches[2]) parts.push(matches[2]); |
|||
if (matches[3] && !matches[1]) parts.push(matches[3]); |
|||
|
|||
return parts.join(' ') || uptime; |
|||
} |
|||
|
|||
function updateData() { |
|||
fetch('/status') |
|||
.then(response => response.json()) |
|||
.then(data => { |
|||
document.getElementById('total-tasks').textContent = data.tasks.length; |
|||
document.getElementById('active-workers').textContent = data.workers.length; |
|||
document.getElementById('completed-tasks').textContent = |
|||
data.tasks.filter(function(t) { return t.status === 'completed'; }).length; |
|||
document.getElementById('uptime').textContent = formatUptime(data.uptime); |
|||
|
|||
const tasksTable = document.getElementById('tasks-table'); |
|||
if (data.tasks.length === 0) { |
|||
tasksTable.innerHTML = '<tr><td colspan="6" style="text-align: center; color: #7f8c8d;">No tasks available</td></tr>'; |
|||
} else { |
|||
tasksTable.innerHTML = data.tasks.map(task => '<tr>' + |
|||
'<td><code>' + task.id + '</code></td>' + |
|||
'<td>' + task.type + '</td>' + |
|||
'<td>' + task.volume_id + '</td>' + |
|||
'<td><span class="status-' + task.status + '">' + task.status.toUpperCase() + '</span></td>' + |
|||
'<td>' + task.progress.toFixed(1) + '%</td>' + |
|||
'<td>' + formatTime(task.created) + '</td>' + |
|||
'</tr>').join(''); |
|||
} |
|||
|
|||
const workersTable = document.getElementById('workers-table'); |
|||
if (data.workers.length === 0) { |
|||
workersTable.innerHTML = '<tr><td colspan="5" style="text-align: center; color: #7f8c8d;">No workers registered</td></tr>'; |
|||
} else { |
|||
workersTable.innerHTML = data.workers.map(worker => '<tr>' + |
|||
'<td><strong>' + worker.id + '</strong></td>' + |
|||
'<td>' + (worker.address || 'N/A') + '</td>' + |
|||
'<td><span class="worker-online">' + worker.status.toUpperCase() + '</span></td>' + |
|||
'<td>' + worker.capabilities.join(', ') + '</td>' + |
|||
'<td>' + formatTime(worker.last_seen) + '</td>' + |
|||
'</tr>').join(''); |
|||
} |
|||
}) |
|||
.catch(error => { |
|||
console.error('Failed to fetch data:', error); |
|||
}); |
|||
} |
|||
|
|||
updateData(); |
|||
setInterval(updateData, 5000); |
|||
</script> |
|||
</body> |
|||
</html>` |
|||
|
|||
fmt.Fprint(w, html) |
|||
} |
|||
|
|||
// Task detection and creation
|
|||
func (s *AdminServer) detectVolumesForEC() { |
|||
ticker := time.NewTicker(30 * time.Second) |
|||
go func() { |
|||
for range ticker.C { |
|||
s.mu.Lock() |
|||
|
|||
log.Println("Scanning for volumes requiring EC...") |
|||
|
|||
// Simulate volume detection - in real implementation, query master
|
|||
if len(s.tasks) < 5 { // Don't create too many tasks
|
|||
taskID := fmt.Sprintf("ec-task-%d", time.Now().Unix()) |
|||
volumeID := uint32(1000 + len(s.tasks)) |
|||
|
|||
task := &TaskInfo{ |
|||
ID: taskID, |
|||
Type: "erasure_coding", |
|||
VolumeID: volumeID, |
|||
Status: "pending", |
|||
Progress: 0.0, |
|||
Created: time.Now(), |
|||
Updated: time.Now(), |
|||
Server: "volume1:8080", // Simplified
|
|||
Collection: "", |
|||
DataCenter: "dc1", |
|||
Rack: "rack1", |
|||
Parameters: map[string]string{ |
|||
"data_shards": "10", |
|||
"parity_shards": "4", |
|||
}, |
|||
} |
|||
|
|||
s.tasks[taskID] = task |
|||
log.Printf("Created EC task %s for volume %d", taskID, volumeID) |
|||
} |
|||
|
|||
s.mu.Unlock() |
|||
} |
|||
}() |
|||
} |
|||
|
|||
func main() { |
|||
grpcPort := os.Getenv("GRPC_PORT") |
|||
if grpcPort == "" { |
|||
grpcPort = "9901" |
|||
} |
|||
|
|||
httpPort := os.Getenv("ADMIN_PORT") |
|||
if httpPort == "" { |
|||
httpPort = "9900" |
|||
} |
|||
|
|||
masterAddr := os.Getenv("MASTER_ADDRESS") |
|||
if masterAddr == "" { |
|||
masterAddr = "master:9333" |
|||
} |
|||
|
|||
server := &AdminServer{ |
|||
grpcPort: grpcPort, |
|||
httpPort: httpPort, |
|||
masterAddr: masterAddr, |
|||
startTime: time.Now(), |
|||
workers: make(map[string]*WorkerInfo), |
|||
tasks: make(map[string]*TaskInfo), |
|||
taskQueue: make([]string, 0), |
|||
workerStreams: make(map[string]pb.WorkerService_WorkerStreamServer), |
|||
} |
|||
|
|||
// Start gRPC server
|
|||
go func() { |
|||
lis, err := net.Listen("tcp", ":"+grpcPort) |
|||
if err != nil { |
|||
log.Fatalf("Failed to listen on gRPC port %s: %v", grpcPort, err) |
|||
} |
|||
|
|||
grpcServer := grpc.NewServer() |
|||
pb.RegisterWorkerServiceServer(grpcServer, server) |
|||
reflection.Register(grpcServer) |
|||
|
|||
log.Printf("gRPC server starting on port %s", grpcPort) |
|||
if err := grpcServer.Serve(lis); err != nil { |
|||
log.Fatalf("Failed to serve gRPC: %v", err) |
|||
} |
|||
}() |
|||
|
|||
// Start HTTP server for web UI
|
|||
go func() { |
|||
http.HandleFunc("/", server.webUIHandler) |
|||
http.HandleFunc("/status", server.statusHandler) |
|||
http.HandleFunc("/health", server.healthHandler) |
|||
|
|||
log.Printf("HTTP server starting on port %s", httpPort) |
|||
if err := http.ListenAndServe(":"+httpPort, nil); err != nil { |
|||
log.Fatalf("Failed to serve HTTP: %v", err) |
|||
} |
|||
}() |
|||
|
|||
// Start task detection
|
|||
server.detectVolumesForEC() |
|||
|
|||
log.Printf("Admin server started - gRPC port: %s, HTTP port: %s, Master: %s", grpcPort, httpPort, masterAddr) |
|||
|
|||
// Keep the main goroutine running
|
|||
select {} |
|||
} |
|||
@ -1,395 +1,216 @@ |
|||
version: '3.8' |
|||
name: admin_integration |
|||
|
|||
networks: |
|||
seaweed_net: |
|||
driver: bridge |
|||
|
|||
services: |
|||
# Master server - coordinates the cluster |
|||
master: |
|||
image: chrislusf/seaweedfs:latest |
|||
container_name: seaweed-master |
|||
image: chrislusf/seaweedfs:local |
|||
ports: |
|||
- "9333:9333" |
|||
- "19333:19333" |
|||
command: > |
|||
master |
|||
-ip=master |
|||
-port=9333 |
|||
-volumeSizeLimitMB=50 |
|||
-defaultReplication=001 |
|||
command: "master -ip=master -mdir=/data -volumeSizeLimitMB=50" |
|||
environment: |
|||
- WEED_MASTER_VOLUME_GROWTH_COPY_1=1 |
|||
- WEED_MASTER_VOLUME_GROWTH_COPY_2=2 |
|||
- WEED_MASTER_VOLUME_GROWTH_COPY_OTHER=1 |
|||
volumes: |
|||
- master_data:/data |
|||
- ./data/master:/data |
|||
networks: |
|||
- seaweed_net |
|||
healthcheck: |
|||
test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://master:9333/cluster/status"] |
|||
interval: 10s |
|||
timeout: 5s |
|||
retries: 3 |
|||
|
|||
# Volume Server 1 |
|||
volume1: |
|||
image: chrislusf/seaweedfs:latest |
|||
container_name: seaweed-volume1 |
|||
image: chrislusf/seaweedfs:local |
|||
ports: |
|||
- "8080:8080" |
|||
- "18080:18080" |
|||
command: > |
|||
volume |
|||
-mserver=master:9333 |
|||
-ip=volume1 |
|||
-port=8080 |
|||
-dir=/data |
|||
-max=100 |
|||
-dataCenter=dc1 |
|||
-rack=rack1 |
|||
volumes: |
|||
- volume1_data:/data |
|||
command: "volume -mserver=master:9333 -ip=volume1 -dir=/data -max=10" |
|||
depends_on: |
|||
master: |
|||
condition: service_healthy |
|||
- master |
|||
volumes: |
|||
- ./data/volume1:/data |
|||
networks: |
|||
- seaweed_net |
|||
healthcheck: |
|||
test: ["CMD", "curl", "-f", "http://localhost:8080/status"] |
|||
interval: 10s |
|||
timeout: 5s |
|||
retries: 3 |
|||
|
|||
# Volume Server 2 |
|||
volume2: |
|||
image: chrislusf/seaweedfs:latest |
|||
container_name: seaweed-volume2 |
|||
image: chrislusf/seaweedfs:local |
|||
ports: |
|||
- "8081:8080" |
|||
- "18081:18080" |
|||
command: > |
|||
volume |
|||
-mserver=master:9333 |
|||
-ip=volume2 |
|||
-port=8080 |
|||
-dir=/data |
|||
-max=100 |
|||
-dataCenter=dc1 |
|||
-rack=rack1 |
|||
volumes: |
|||
- volume2_data:/data |
|||
command: "volume -mserver=master:9333 -ip=volume2 -dir=/data -max=10" |
|||
depends_on: |
|||
master: |
|||
condition: service_healthy |
|||
- master |
|||
volumes: |
|||
- ./data/volume2:/data |
|||
networks: |
|||
- seaweed_net |
|||
healthcheck: |
|||
test: ["CMD", "curl", "-f", "http://localhost:8080/status"] |
|||
interval: 10s |
|||
timeout: 5s |
|||
retries: 3 |
|||
|
|||
# Volume Server 3 |
|||
volume3: |
|||
image: chrislusf/seaweedfs:latest |
|||
container_name: seaweed-volume3 |
|||
image: chrislusf/seaweedfs:local |
|||
ports: |
|||
- "8082:8080" |
|||
- "18082:18080" |
|||
command: > |
|||
volume |
|||
-mserver=master:9333 |
|||
-ip=volume3 |
|||
-port=8080 |
|||
-dir=/data |
|||
-max=100 |
|||
-dataCenter=dc1 |
|||
-rack=rack2 |
|||
volumes: |
|||
- volume3_data:/data |
|||
command: "volume -mserver=master:9333 -ip=volume3 -dir=/data -max=10" |
|||
depends_on: |
|||
master: |
|||
condition: service_healthy |
|||
- master |
|||
volumes: |
|||
- ./data/volume3:/data |
|||
networks: |
|||
- seaweed_net |
|||
healthcheck: |
|||
test: ["CMD", "curl", "-f", "http://localhost:8080/status"] |
|||
interval: 10s |
|||
timeout: 5s |
|||
retries: 3 |
|||
|
|||
# Volume Server 4 |
|||
volume4: |
|||
image: chrislusf/seaweedfs:latest |
|||
container_name: seaweed-volume4 |
|||
image: chrislusf/seaweedfs:local |
|||
ports: |
|||
- "8083:8080" |
|||
- "18083:18080" |
|||
command: > |
|||
volume |
|||
-mserver=master:9333 |
|||
-ip=volume4 |
|||
-port=8080 |
|||
-dir=/data |
|||
-max=100 |
|||
-dataCenter=dc2 |
|||
-rack=rack1 |
|||
volumes: |
|||
- volume4_data:/data |
|||
command: "volume -mserver=master:9333 -ip=volume4 -dir=/data -max=10" |
|||
depends_on: |
|||
master: |
|||
condition: service_healthy |
|||
- master |
|||
volumes: |
|||
- ./data/volume4:/data |
|||
networks: |
|||
- seaweed_net |
|||
healthcheck: |
|||
test: ["CMD", "curl", "-f", "http://localhost:8080/status"] |
|||
interval: 10s |
|||
timeout: 5s |
|||
retries: 3 |
|||
|
|||
# Volume Server 5 |
|||
volume5: |
|||
image: chrislusf/seaweedfs:latest |
|||
container_name: seaweed-volume5 |
|||
image: chrislusf/seaweedfs:local |
|||
ports: |
|||
- "8084:8080" |
|||
- "18084:18080" |
|||
command: > |
|||
volume |
|||
-mserver=master:9333 |
|||
-ip=volume5 |
|||
-port=8080 |
|||
-dir=/data |
|||
-max=100 |
|||
-dataCenter=dc2 |
|||
-rack=rack2 |
|||
volumes: |
|||
- volume5_data:/data |
|||
command: "volume -mserver=master:9333 -ip=volume5 -dir=/data -max=10" |
|||
depends_on: |
|||
master: |
|||
condition: service_healthy |
|||
- master |
|||
volumes: |
|||
- ./data/volume5:/data |
|||
networks: |
|||
- seaweed_net |
|||
healthcheck: |
|||
test: ["CMD", "curl", "-f", "http://localhost:8080/status"] |
|||
interval: 10s |
|||
timeout: 5s |
|||
retries: 3 |
|||
|
|||
# Volume Server 6 |
|||
volume6: |
|||
image: chrislusf/seaweedfs:latest |
|||
container_name: seaweed-volume6 |
|||
image: chrislusf/seaweedfs:local |
|||
ports: |
|||
- "8085:8080" |
|||
- "18085:18080" |
|||
command: > |
|||
volume |
|||
-mserver=master:9333 |
|||
-ip=volume6 |
|||
-port=8080 |
|||
-dir=/data |
|||
-max=100 |
|||
-dataCenter=dc2 |
|||
-rack=rack3 |
|||
volumes: |
|||
- volume6_data:/data |
|||
command: "volume -mserver=master:9333 -ip=volume6 -dir=/data -max=10" |
|||
depends_on: |
|||
master: |
|||
condition: service_healthy |
|||
- master |
|||
volumes: |
|||
- ./data/volume6:/data |
|||
networks: |
|||
- seaweed_net |
|||
healthcheck: |
|||
test: ["CMD", "curl", "-f", "http://localhost:8080/status"] |
|||
interval: 10s |
|||
timeout: 5s |
|||
retries: 3 |
|||
|
|||
# Filer for easier data access |
|||
filer: |
|||
image: chrislusf/seaweedfs:latest |
|||
container_name: seaweed-filer |
|||
image: chrislusf/seaweedfs:local |
|||
ports: |
|||
- "8888:8888" |
|||
- "18888:18888" |
|||
command: > |
|||
filer |
|||
-master=master:9333 |
|||
-ip=filer |
|||
-port=8888 |
|||
command: "filer -master=master:9333 -ip=filer" |
|||
depends_on: |
|||
master: |
|||
condition: service_healthy |
|||
- master |
|||
volumes: |
|||
- ./data/filer:/data |
|||
networks: |
|||
- seaweed_net |
|||
healthcheck: |
|||
test: ["CMD", "curl", "-f", "http://localhost:8888/"] |
|||
interval: 10s |
|||
timeout: 5s |
|||
retries: 3 |
|||
|
|||
# Admin Server - manages EC tasks |
|||
admin: |
|||
build: |
|||
context: ../../ |
|||
dockerfile: docker/admin_integration/Dockerfile.admin |
|||
container_name: seaweed-admin |
|||
image: chrislusf/seaweedfs:local |
|||
ports: |
|||
- "9900:9900" |
|||
- "9901:9901" |
|||
environment: |
|||
- MASTER_ADDRESS=master:9333 |
|||
- ADMIN_PORT=9900 |
|||
- GRPC_PORT=9901 |
|||
- SCAN_INTERVAL=30s |
|||
- WORKER_TIMEOUT=5m |
|||
- TASK_TIMEOUT=30m |
|||
- MAX_RETRIES=3 |
|||
- MAX_CONCURRENT_TASKS=5 |
|||
volumes: |
|||
- admin_data:/data |
|||
- ./admin-config:/config |
|||
- "23646:23646" # HTTP admin interface (default port) |
|||
- "33646:33646" # gRPC worker communication (23646 + 10000) |
|||
command: "admin -port=23646 -masters=master:9333 -dataDir=/data" |
|||
depends_on: |
|||
master: |
|||
condition: service_healthy |
|||
filer: |
|||
condition: service_healthy |
|||
- master |
|||
- filer |
|||
volumes: |
|||
- ./data/admin:/data |
|||
networks: |
|||
- seaweed_net |
|||
healthcheck: |
|||
test: ["CMD", "curl", "-f", "http://localhost:9900/health"] |
|||
interval: 15s |
|||
timeout: 5s |
|||
retries: 3 |
|||
|
|||
# EC Worker 1 |
|||
worker1: |
|||
build: |
|||
context: ../../ |
|||
dockerfile: docker/admin_integration/Dockerfile.worker |
|||
container_name: seaweed-worker1 |
|||
environment: |
|||
- ADMIN_GRPC_ADDRESS=admin:9901 |
|||
- WORKER_ID=worker-1 |
|||
- WORKER_ADDRESS=worker1:9001 |
|||
- CAPABILITIES=erasure_coding |
|||
- MAX_CONCURRENT=2 |
|||
- WORK_DIR=/work |
|||
volumes: |
|||
- worker1_data:/work |
|||
image: chrislusf/seaweedfs:local |
|||
command: "worker -admin=admin:23646 -capabilities=ec,vacuum -maxConcurrent=2" |
|||
depends_on: |
|||
admin: |
|||
condition: service_healthy |
|||
- admin |
|||
volumes: |
|||
- ./data/worker1:/data |
|||
networks: |
|||
- seaweed_net |
|||
healthcheck: |
|||
test: ["CMD", "curl", "-f", "http://localhost:9001/health"] |
|||
interval: 15s |
|||
timeout: 5s |
|||
retries: 3 |
|||
environment: |
|||
- WORKER_ID=worker-1 |
|||
|
|||
# EC Worker 2 |
|||
worker2: |
|||
build: |
|||
context: ../../ |
|||
dockerfile: docker/admin_integration/Dockerfile.worker |
|||
container_name: seaweed-worker2 |
|||
environment: |
|||
- ADMIN_GRPC_ADDRESS=admin:9901 |
|||
- WORKER_ID=worker-2 |
|||
- WORKER_ADDRESS=worker2:9001 |
|||
- CAPABILITIES=erasure_coding,vacuum |
|||
- MAX_CONCURRENT=2 |
|||
- WORK_DIR=/work |
|||
volumes: |
|||
- worker2_data:/work |
|||
image: chrislusf/seaweedfs:local |
|||
command: "worker -admin=admin:23646 -capabilities=ec,vacuum -maxConcurrent=2" |
|||
depends_on: |
|||
admin: |
|||
condition: service_healthy |
|||
- admin |
|||
volumes: |
|||
- ./data/worker2:/data |
|||
networks: |
|||
- seaweed_net |
|||
healthcheck: |
|||
test: ["CMD", "curl", "-f", "http://localhost:9001/health"] |
|||
interval: 15s |
|||
timeout: 5s |
|||
retries: 3 |
|||
environment: |
|||
- WORKER_ID=worker-2 |
|||
|
|||
# EC Worker 3 |
|||
worker3: |
|||
build: |
|||
context: ../../ |
|||
dockerfile: docker/admin_integration/Dockerfile.worker |
|||
container_name: seaweed-worker3 |
|||
environment: |
|||
- ADMIN_GRPC_ADDRESS=admin:9901 |
|||
- WORKER_ID=worker-3 |
|||
- WORKER_ADDRESS=worker3:9001 |
|||
- CAPABILITIES=erasure_coding,vacuum |
|||
- MAX_CONCURRENT=1 |
|||
- WORK_DIR=/work |
|||
volumes: |
|||
- worker3_data:/work |
|||
image: chrislusf/seaweedfs:local |
|||
command: "worker -admin=admin:23646 -capabilities=ec,vacuum -maxConcurrent=2" |
|||
depends_on: |
|||
admin: |
|||
condition: service_healthy |
|||
- admin |
|||
volumes: |
|||
- ./data/worker3:/data |
|||
networks: |
|||
- seaweed_net |
|||
healthcheck: |
|||
test: ["CMD", "curl", "-f", "http://localhost:9001/health"] |
|||
interval: 15s |
|||
timeout: 5s |
|||
retries: 3 |
|||
environment: |
|||
- WORKER_ID=worker-3 |
|||
|
|||
# Continuous Load Generator |
|||
load_generator: |
|||
build: |
|||
context: ../../ |
|||
dockerfile: docker/admin_integration/Dockerfile.load |
|||
container_name: seaweed-load |
|||
environment: |
|||
- FILER_ADDRESS=filer:8888 |
|||
- MASTER_ADDRESS=master:9333 |
|||
- WRITE_RATE=10 # files per second |
|||
- DELETE_RATE=2 # files per second |
|||
- FILE_SIZE_MIN=1MB |
|||
- FILE_SIZE_MAX=5MB |
|||
- TEST_DURATION=3600 # 1 hour |
|||
image: chrislusf/seaweedfs:local |
|||
entrypoint: ["/bin/sh"] |
|||
command: > |
|||
-c " |
|||
echo 'Starting load generator...'; |
|||
sleep 30; |
|||
echo 'Generating continuous load with 50MB volume limit...'; |
|||
while true; do |
|||
echo 'Writing test files...'; |
|||
echo 'Test file content at $(date)' | /usr/bin/weed upload -server=master:9333; |
|||
sleep 5; |
|||
echo 'Deleting some files...'; |
|||
/usr/bin/weed shell -master=master:9333 <<< 'fs.rm /test_file_*' || true; |
|||
sleep 10; |
|||
done |
|||
" |
|||
depends_on: |
|||
filer: |
|||
condition: service_healthy |
|||
admin: |
|||
condition: service_healthy |
|||
- master |
|||
- filer |
|||
- admin |
|||
networks: |
|||
- seaweed_net |
|||
|
|||
# Monitoring and Health Check |
|||
monitor: |
|||
build: |
|||
context: ../../ |
|||
dockerfile: docker/admin_integration/Dockerfile.monitor |
|||
container_name: seaweed-monitor |
|||
ports: |
|||
- "9999:9999" |
|||
environment: |
|||
- MASTER_ADDRESS=master:9333 |
|||
- ADMIN_ADDRESS=admin:9900 |
|||
- FILER_ADDRESS=filer:8888 |
|||
- MONITOR_INTERVAL=10s |
|||
image: alpine:latest |
|||
entrypoint: ["/bin/sh"] |
|||
command: > |
|||
-c " |
|||
apk add --no-cache curl jq; |
|||
echo 'Starting cluster monitor...'; |
|||
sleep 30; |
|||
while true; do |
|||
echo '=== Cluster Status $(date) ==='; |
|||
echo 'Master status:'; |
|||
curl -s http://master:9333/cluster/status | jq '.IsLeader, .Peers' || echo 'Master not ready'; |
|||
echo; |
|||
echo 'Admin status:'; |
|||
curl -s http://admin:23646/ | grep -o 'Admin.*Interface' || echo 'Admin not ready'; |
|||
echo; |
|||
echo 'Volume count by server:'; |
|||
curl -s http://master:9333/vol/status | jq '.Volumes | length' || echo 'Volumes not ready'; |
|||
echo; |
|||
sleep 60; |
|||
done |
|||
" |
|||
depends_on: |
|||
admin: |
|||
condition: service_healthy |
|||
- master |
|||
- admin |
|||
- filer |
|||
networks: |
|||
- seaweed_net |
|||
volumes: |
|||
- ./monitor-data:/monitor-data |
|||
|
|||
volumes: |
|||
master_data: |
|||
volume1_data: |
|||
volume2_data: |
|||
volume3_data: |
|||
volume4_data: |
|||
volume5_data: |
|||
volume6_data: |
|||
admin_data: |
|||
worker1_data: |
|||
worker2_data: |
|||
worker3_data: |
|||
|
|||
networks: |
|||
seaweed_net: |
|||
driver: bridge |
|||
ipam: |
|||
config: |
|||
- subnet: 172.20.0.0/16 |
|||
- seaweed_net |
|||
@ -1,21 +0,0 @@ |
|||
#!/bin/sh |
|||
|
|||
set -e |
|||
|
|||
echo "Starting Load Generator..." |
|||
echo "Filer Address: $FILER_ADDRESS" |
|||
echo "Write Rate: $WRITE_RATE files/sec" |
|||
echo "Delete Rate: $DELETE_RATE files/sec" |
|||
echo "File Size Range: $FILE_SIZE_MIN - $FILE_SIZE_MAX" |
|||
echo "Test Duration: $TEST_DURATION seconds" |
|||
|
|||
# Wait for filer to be ready |
|||
echo "Waiting for filer to be ready..." |
|||
until curl -f http://$FILER_ADDRESS/ > /dev/null 2>&1; do |
|||
echo "Filer not ready, waiting..." |
|||
sleep 5 |
|||
done |
|||
echo "Filer is ready!" |
|||
|
|||
# Start the load generator |
|||
exec ./load-generator |
|||
@ -1,375 +0,0 @@ |
|||
package main |
|||
|
|||
import ( |
|||
"bytes" |
|||
"crypto/rand" |
|||
"fmt" |
|||
"io" |
|||
"log" |
|||
"mime/multipart" |
|||
"net/http" |
|||
"os" |
|||
"strconv" |
|||
"strings" |
|||
"sync" |
|||
"time" |
|||
) |
|||
|
|||
type LoadGenerator struct { |
|||
filerAddr string |
|||
masterAddr string |
|||
writeRate int |
|||
deleteRate int |
|||
fileSizeMin int64 |
|||
fileSizeMax int64 |
|||
testDuration int |
|||
collection string |
|||
|
|||
// State tracking
|
|||
createdFiles []string |
|||
mutex sync.RWMutex |
|||
stats LoadStats |
|||
} |
|||
|
|||
type LoadStats struct { |
|||
FilesWritten int64 |
|||
FilesDeleted int64 |
|||
BytesWritten int64 |
|||
Errors int64 |
|||
StartTime time.Time |
|||
LastOperation time.Time |
|||
} |
|||
|
|||
// parseSize converts size strings like "1MB", "5MB" to bytes
|
|||
func parseSize(sizeStr string) int64 { |
|||
sizeStr = strings.ToUpper(strings.TrimSpace(sizeStr)) |
|||
|
|||
var multiplier int64 = 1 |
|||
if strings.HasSuffix(sizeStr, "KB") { |
|||
multiplier = 1024 |
|||
sizeStr = strings.TrimSuffix(sizeStr, "KB") |
|||
} else if strings.HasSuffix(sizeStr, "MB") { |
|||
multiplier = 1024 * 1024 |
|||
sizeStr = strings.TrimSuffix(sizeStr, "MB") |
|||
} else if strings.HasSuffix(sizeStr, "GB") { |
|||
multiplier = 1024 * 1024 * 1024 |
|||
sizeStr = strings.TrimSuffix(sizeStr, "GB") |
|||
} |
|||
|
|||
size, err := strconv.ParseInt(sizeStr, 10, 64) |
|||
if err != nil { |
|||
return 1024 * 1024 // Default to 1MB
|
|||
} |
|||
|
|||
return size * multiplier |
|||
} |
|||
|
|||
// generateRandomData creates random data of specified size
|
|||
func (lg *LoadGenerator) generateRandomData(size int64) []byte { |
|||
data := make([]byte, size) |
|||
_, err := rand.Read(data) |
|||
if err != nil { |
|||
// Fallback to deterministic data
|
|||
for i := range data { |
|||
data[i] = byte(i % 256) |
|||
} |
|||
} |
|||
return data |
|||
} |
|||
|
|||
// uploadFile uploads a file to SeaweedFS via filer
|
|||
func (lg *LoadGenerator) uploadFile(filename string, data []byte) error { |
|||
url := fmt.Sprintf("http://%s/%s", lg.filerAddr, filename) |
|||
if lg.collection != "" { |
|||
url = fmt.Sprintf("http://%s/%s/%s", lg.filerAddr, lg.collection, filename) |
|||
} |
|||
|
|||
// Create multipart form data
|
|||
var b bytes.Buffer |
|||
writer := multipart.NewWriter(&b) |
|||
|
|||
// Create form file field
|
|||
part, err := writer.CreateFormFile("file", filename) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
|
|||
// Write file data
|
|||
_, err = part.Write(data) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
|
|||
// Close the multipart writer
|
|||
err = writer.Close() |
|||
if err != nil { |
|||
return err |
|||
} |
|||
|
|||
req, err := http.NewRequest("POST", url, &b) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
|
|||
req.Header.Set("Content-Type", writer.FormDataContentType()) |
|||
|
|||
client := &http.Client{Timeout: 30 * time.Second} |
|||
resp, err := client.Do(req) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
defer resp.Body.Close() |
|||
|
|||
if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusCreated { |
|||
return fmt.Errorf("upload failed with status: %d", resp.StatusCode) |
|||
} |
|||
|
|||
return nil |
|||
} |
|||
|
|||
// deleteFile deletes a file from SeaweedFS via filer
|
|||
func (lg *LoadGenerator) deleteFile(filename string) error { |
|||
url := fmt.Sprintf("http://%s/%s", lg.filerAddr, filename) |
|||
if lg.collection != "" { |
|||
url = fmt.Sprintf("http://%s/%s/%s", lg.filerAddr, lg.collection, filename) |
|||
} |
|||
|
|||
req, err := http.NewRequest("DELETE", url, nil) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
|
|||
client := &http.Client{Timeout: 10 * time.Second} |
|||
resp, err := client.Do(req) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
defer resp.Body.Close() |
|||
|
|||
if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusNoContent && resp.StatusCode != http.StatusNotFound { |
|||
return fmt.Errorf("delete failed with status: %d", resp.StatusCode) |
|||
} |
|||
|
|||
return nil |
|||
} |
|||
|
|||
// writeFiles continuously writes files at the specified rate
|
|||
func (lg *LoadGenerator) writeFiles() { |
|||
writeInterval := time.Second / time.Duration(lg.writeRate) |
|||
ticker := time.NewTicker(writeInterval) |
|||
defer ticker.Stop() |
|||
|
|||
fileCounter := 0 |
|||
|
|||
for range ticker.C { |
|||
fileCounter++ |
|||
|
|||
// Random file size between min and max
|
|||
sizeDiff := lg.fileSizeMax - lg.fileSizeMin |
|||
randomSize := lg.fileSizeMin |
|||
if sizeDiff > 0 { |
|||
randomSize += int64(time.Now().UnixNano()) % sizeDiff |
|||
} |
|||
|
|||
// Generate filename
|
|||
filename := fmt.Sprintf("test-data/file-%d-%d.bin", time.Now().Unix(), fileCounter) |
|||
|
|||
// Generate random data
|
|||
data := lg.generateRandomData(randomSize) |
|||
|
|||
// Upload file
|
|||
err := lg.uploadFile(filename, data) |
|||
if err != nil { |
|||
log.Printf("Error uploading file %s: %v", filename, err) |
|||
lg.stats.Errors++ |
|||
} else { |
|||
lg.mutex.Lock() |
|||
lg.createdFiles = append(lg.createdFiles, filename) |
|||
lg.stats.FilesWritten++ |
|||
lg.stats.BytesWritten += randomSize |
|||
lg.stats.LastOperation = time.Now() |
|||
lg.mutex.Unlock() |
|||
|
|||
log.Printf("Uploaded file: %s (size: %d bytes, total files: %d)", |
|||
filename, randomSize, lg.stats.FilesWritten) |
|||
} |
|||
} |
|||
} |
|||
|
|||
// deleteFiles continuously deletes files at the specified rate
|
|||
func (lg *LoadGenerator) deleteFiles() { |
|||
deleteInterval := time.Second / time.Duration(lg.deleteRate) |
|||
ticker := time.NewTicker(deleteInterval) |
|||
defer ticker.Stop() |
|||
|
|||
for range ticker.C { |
|||
lg.mutex.Lock() |
|||
if len(lg.createdFiles) == 0 { |
|||
lg.mutex.Unlock() |
|||
continue |
|||
} |
|||
|
|||
// Pick a random file to delete
|
|||
index := int(time.Now().UnixNano()) % len(lg.createdFiles) |
|||
filename := lg.createdFiles[index] |
|||
|
|||
// Remove from slice
|
|||
lg.createdFiles = append(lg.createdFiles[:index], lg.createdFiles[index+1:]...) |
|||
lg.mutex.Unlock() |
|||
|
|||
// Delete file
|
|||
err := lg.deleteFile(filename) |
|||
if err != nil { |
|||
log.Printf("Error deleting file %s: %v", filename, err) |
|||
lg.stats.Errors++ |
|||
} else { |
|||
lg.stats.FilesDeleted++ |
|||
lg.stats.LastOperation = time.Now() |
|||
log.Printf("Deleted file: %s (remaining files: %d)", filename, len(lg.createdFiles)) |
|||
} |
|||
} |
|||
} |
|||
|
|||
// printStats periodically prints load generation statistics
|
|||
func (lg *LoadGenerator) printStats() { |
|||
ticker := time.NewTicker(30 * time.Second) |
|||
defer ticker.Stop() |
|||
|
|||
for range ticker.C { |
|||
uptime := time.Since(lg.stats.StartTime) |
|||
writeRate := float64(lg.stats.FilesWritten) / uptime.Seconds() |
|||
deleteRate := float64(lg.stats.FilesDeleted) / uptime.Seconds() |
|||
|
|||
lg.mutex.RLock() |
|||
pendingFiles := len(lg.createdFiles) |
|||
lg.mutex.RUnlock() |
|||
|
|||
log.Printf("STATS: Files written=%d, deleted=%d, pending=%d, errors=%d", |
|||
lg.stats.FilesWritten, lg.stats.FilesDeleted, pendingFiles, lg.stats.Errors) |
|||
log.Printf("RATES: Write=%.2f/sec, Delete=%.2f/sec, Data=%.2f MB written", |
|||
writeRate, deleteRate, float64(lg.stats.BytesWritten)/(1024*1024)) |
|||
} |
|||
} |
|||
|
|||
// checkClusterHealth periodically checks cluster status
|
|||
func (lg *LoadGenerator) checkClusterHealth() { |
|||
ticker := time.NewTicker(1 * time.Minute) |
|||
defer ticker.Stop() |
|||
|
|||
for range ticker.C { |
|||
// Check master status
|
|||
resp, err := http.Get(fmt.Sprintf("http://%s/cluster/status", lg.masterAddr)) |
|||
if err != nil { |
|||
log.Printf("WARNING: Cannot reach master: %v", err) |
|||
continue |
|||
} |
|||
|
|||
body, err := io.ReadAll(resp.Body) |
|||
resp.Body.Close() |
|||
|
|||
if err != nil { |
|||
log.Printf("WARNING: Cannot read master response: %v", err) |
|||
continue |
|||
} |
|||
|
|||
if resp.StatusCode == http.StatusOK { |
|||
log.Printf("Cluster health check: OK (response size: %d bytes)", len(body)) |
|||
} else { |
|||
log.Printf("WARNING: Cluster health check failed with status: %d", resp.StatusCode) |
|||
} |
|||
} |
|||
} |
|||
|
|||
func main() { |
|||
filerAddr := os.Getenv("FILER_ADDRESS") |
|||
if filerAddr == "" { |
|||
filerAddr = "filer:8888" |
|||
} |
|||
|
|||
masterAddr := os.Getenv("MASTER_ADDRESS") |
|||
if masterAddr == "" { |
|||
masterAddr = "master:9333" |
|||
} |
|||
|
|||
writeRate, _ := strconv.Atoi(os.Getenv("WRITE_RATE")) |
|||
if writeRate <= 0 { |
|||
writeRate = 10 |
|||
} |
|||
|
|||
deleteRate, _ := strconv.Atoi(os.Getenv("DELETE_RATE")) |
|||
if deleteRate <= 0 { |
|||
deleteRate = 2 |
|||
} |
|||
|
|||
fileSizeMin := parseSize(os.Getenv("FILE_SIZE_MIN")) |
|||
if fileSizeMin <= 0 { |
|||
fileSizeMin = 1024 * 1024 // 1MB
|
|||
} |
|||
|
|||
fileSizeMax := parseSize(os.Getenv("FILE_SIZE_MAX")) |
|||
if fileSizeMax <= fileSizeMin { |
|||
fileSizeMax = 5 * 1024 * 1024 // 5MB
|
|||
} |
|||
|
|||
testDuration, _ := strconv.Atoi(os.Getenv("TEST_DURATION")) |
|||
if testDuration <= 0 { |
|||
testDuration = 3600 // 1 hour
|
|||
} |
|||
|
|||
collection := os.Getenv("COLLECTION") |
|||
|
|||
lg := &LoadGenerator{ |
|||
filerAddr: filerAddr, |
|||
masterAddr: masterAddr, |
|||
writeRate: writeRate, |
|||
deleteRate: deleteRate, |
|||
fileSizeMin: fileSizeMin, |
|||
fileSizeMax: fileSizeMax, |
|||
testDuration: testDuration, |
|||
collection: collection, |
|||
createdFiles: make([]string, 0), |
|||
stats: LoadStats{ |
|||
StartTime: time.Now(), |
|||
}, |
|||
} |
|||
|
|||
log.Printf("Starting load generator...") |
|||
log.Printf("Filer: %s", filerAddr) |
|||
log.Printf("Master: %s", masterAddr) |
|||
log.Printf("Write rate: %d files/sec", writeRate) |
|||
log.Printf("Delete rate: %d files/sec", deleteRate) |
|||
log.Printf("File size: %d - %d bytes", fileSizeMin, fileSizeMax) |
|||
log.Printf("Test duration: %d seconds", testDuration) |
|||
log.Printf("Collection: '%s'", collection) |
|||
|
|||
// Wait for filer to be ready
|
|||
log.Println("Waiting for filer to be ready...") |
|||
for { |
|||
resp, err := http.Get(fmt.Sprintf("http://%s/", filerAddr)) |
|||
if err == nil && resp.StatusCode == http.StatusOK { |
|||
resp.Body.Close() |
|||
break |
|||
} |
|||
if resp != nil { |
|||
resp.Body.Close() |
|||
} |
|||
log.Println("Filer not ready, waiting...") |
|||
time.Sleep(5 * time.Second) |
|||
} |
|||
log.Println("Filer is ready!") |
|||
|
|||
// Start background goroutines
|
|||
go lg.writeFiles() |
|||
go lg.deleteFiles() |
|||
go lg.printStats() |
|||
go lg.checkClusterHealth() |
|||
|
|||
// Run for specified duration
|
|||
log.Printf("Load test will run for %d seconds...", testDuration) |
|||
time.Sleep(time.Duration(testDuration) * time.Second) |
|||
|
|||
log.Println("Load test completed!") |
|||
log.Printf("Final stats: Files written=%d, deleted=%d, errors=%d, total data=%.2f MB", |
|||
lg.stats.FilesWritten, lg.stats.FilesDeleted, lg.stats.Errors, |
|||
float64(lg.stats.BytesWritten)/(1024*1024)) |
|||
} |
|||
@ -1,38 +0,0 @@ |
|||
#!/bin/sh |
|||
|
|||
set -e |
|||
|
|||
echo "Starting Cluster Monitor..." |
|||
echo "Master Address: $MASTER_ADDRESS" |
|||
echo "Admin Address: $ADMIN_ADDRESS" |
|||
echo "Filer Address: $FILER_ADDRESS" |
|||
echo "Monitor Interval: $MONITOR_INTERVAL" |
|||
|
|||
# Wait for core services to be ready |
|||
echo "Waiting for core services to be ready..." |
|||
|
|||
echo "Waiting for master..." |
|||
until curl -f http://$MASTER_ADDRESS/cluster/status > /dev/null 2>&1; do |
|||
echo "Master not ready, waiting..." |
|||
sleep 5 |
|||
done |
|||
echo "Master is ready!" |
|||
|
|||
echo "Waiting for admin..." |
|||
until curl -f http://$ADMIN_ADDRESS/health > /dev/null 2>&1; do |
|||
echo "Admin not ready, waiting..." |
|||
sleep 5 |
|||
done |
|||
echo "Admin is ready!" |
|||
|
|||
echo "Waiting for filer..." |
|||
until curl -f http://$FILER_ADDRESS/ > /dev/null 2>&1; do |
|||
echo "Filer not ready, waiting..." |
|||
sleep 5 |
|||
done |
|||
echo "Filer is ready!" |
|||
|
|||
echo "All services ready! Starting monitor..." |
|||
|
|||
# Start the monitor |
|||
exec ./monitor |
|||
@ -1,366 +0,0 @@ |
|||
package main |
|||
|
|||
import ( |
|||
"encoding/json" |
|||
"fmt" |
|||
"io" |
|||
"log" |
|||
"net/http" |
|||
"os" |
|||
"time" |
|||
) |
|||
|
|||
type Monitor struct { |
|||
masterAddr string |
|||
adminAddr string |
|||
filerAddr string |
|||
interval time.Duration |
|||
startTime time.Time |
|||
stats MonitorStats |
|||
} |
|||
|
|||
type MonitorStats struct { |
|||
TotalChecks int64 |
|||
MasterHealthy int64 |
|||
AdminHealthy int64 |
|||
FilerHealthy int64 |
|||
VolumeCount int64 |
|||
LastVolumeCheck time.Time |
|||
ECTasksDetected int64 |
|||
WorkersActive int64 |
|||
LastWorkerCheck time.Time |
|||
} |
|||
|
|||
type ClusterStatus struct { |
|||
IsLeader bool `json:"IsLeader"` |
|||
Leader string `json:"Leader"` |
|||
Peers []string `json:"Peers"` |
|||
} |
|||
|
|||
type VolumeStatus struct { |
|||
Volumes []VolumeInfo `json:"Volumes"` |
|||
} |
|||
|
|||
type VolumeInfo struct { |
|||
Id uint32 `json:"Id"` |
|||
Size uint64 `json:"Size"` |
|||
Collection string `json:"Collection"` |
|||
FileCount int64 `json:"FileCount"` |
|||
DeleteCount int64 `json:"DeleteCount"` |
|||
DeletedByteCount uint64 `json:"DeletedByteCount"` |
|||
ReadOnly bool `json:"ReadOnly"` |
|||
CompactRevision uint32 `json:"CompactRevision"` |
|||
Version uint32 `json:"Version"` |
|||
} |
|||
|
|||
type AdminStatus struct { |
|||
Status string `json:"status"` |
|||
Uptime string `json:"uptime"` |
|||
Tasks int `json:"tasks"` |
|||
Workers int `json:"workers"` |
|||
} |
|||
|
|||
// checkMasterHealth checks the master server health
|
|||
func (m *Monitor) checkMasterHealth() bool { |
|||
resp, err := http.Get(fmt.Sprintf("http://%s/cluster/status", m.masterAddr)) |
|||
if err != nil { |
|||
log.Printf("ERROR: Cannot reach master %s: %v", m.masterAddr, err) |
|||
return false |
|||
} |
|||
defer resp.Body.Close() |
|||
|
|||
if resp.StatusCode != http.StatusOK { |
|||
log.Printf("ERROR: Master returned status %d", resp.StatusCode) |
|||
return false |
|||
} |
|||
|
|||
var status ClusterStatus |
|||
body, err := io.ReadAll(resp.Body) |
|||
if err != nil { |
|||
log.Printf("ERROR: Cannot read master response: %v", err) |
|||
return false |
|||
} |
|||
|
|||
err = json.Unmarshal(body, &status) |
|||
if err != nil { |
|||
log.Printf("WARNING: Cannot parse master status: %v", err) |
|||
// Still consider it healthy if we got a response
|
|||
return true |
|||
} |
|||
|
|||
log.Printf("Master status: Leader=%s, IsLeader=%t, Peers=%d", |
|||
status.Leader, status.IsLeader, len(status.Peers)) |
|||
|
|||
m.stats.MasterHealthy++ |
|||
return true |
|||
} |
|||
|
|||
// checkAdminHealth checks the admin server health
|
|||
func (m *Monitor) checkAdminHealth() bool { |
|||
resp, err := http.Get(fmt.Sprintf("http://%s/health", m.adminAddr)) |
|||
if err != nil { |
|||
log.Printf("ERROR: Cannot reach admin %s: %v", m.adminAddr, err) |
|||
return false |
|||
} |
|||
defer resp.Body.Close() |
|||
|
|||
if resp.StatusCode != http.StatusOK { |
|||
log.Printf("ERROR: Admin returned status %d", resp.StatusCode) |
|||
return false |
|||
} |
|||
|
|||
var status AdminStatus |
|||
body, err := io.ReadAll(resp.Body) |
|||
if err != nil { |
|||
log.Printf("ERROR: Cannot read admin response: %v", err) |
|||
return false |
|||
} |
|||
|
|||
err = json.Unmarshal(body, &status) |
|||
if err != nil { |
|||
log.Printf("WARNING: Cannot parse admin status: %v", err) |
|||
return true |
|||
} |
|||
|
|||
log.Printf("Admin status: %s, Uptime=%s, Tasks=%d, Workers=%d", |
|||
status.Status, status.Uptime, status.Tasks, status.Workers) |
|||
|
|||
m.stats.AdminHealthy++ |
|||
m.stats.ECTasksDetected += int64(status.Tasks) |
|||
m.stats.WorkersActive = int64(status.Workers) |
|||
m.stats.LastWorkerCheck = time.Now() |
|||
|
|||
return true |
|||
} |
|||
|
|||
// checkFilerHealth checks the filer health
|
|||
func (m *Monitor) checkFilerHealth() bool { |
|||
resp, err := http.Get(fmt.Sprintf("http://%s/", m.filerAddr)) |
|||
if err != nil { |
|||
log.Printf("ERROR: Cannot reach filer %s: %v", m.filerAddr, err) |
|||
return false |
|||
} |
|||
defer resp.Body.Close() |
|||
|
|||
if resp.StatusCode != http.StatusOK { |
|||
log.Printf("ERROR: Filer returned status %d", resp.StatusCode) |
|||
return false |
|||
} |
|||
|
|||
m.stats.FilerHealthy++ |
|||
return true |
|||
} |
|||
|
|||
// checkVolumeStatus checks volume information from master
|
|||
func (m *Monitor) checkVolumeStatus() { |
|||
resp, err := http.Get(fmt.Sprintf("http://%s/vol/status", m.masterAddr)) |
|||
if err != nil { |
|||
log.Printf("ERROR: Cannot get volume status: %v", err) |
|||
return |
|||
} |
|||
defer resp.Body.Close() |
|||
|
|||
if resp.StatusCode != http.StatusOK { |
|||
log.Printf("ERROR: Volume status returned status %d", resp.StatusCode) |
|||
return |
|||
} |
|||
|
|||
body, err := io.ReadAll(resp.Body) |
|||
if err != nil { |
|||
log.Printf("ERROR: Cannot read volume status: %v", err) |
|||
return |
|||
} |
|||
|
|||
var volumeStatus VolumeStatus |
|||
err = json.Unmarshal(body, &volumeStatus) |
|||
if err != nil { |
|||
log.Printf("WARNING: Cannot parse volume status: %v", err) |
|||
return |
|||
} |
|||
|
|||
m.stats.VolumeCount = int64(len(volumeStatus.Volumes)) |
|||
m.stats.LastVolumeCheck = time.Now() |
|||
|
|||
// Analyze volumes
|
|||
var readOnlyCount, fullVolumeCount, ecCandidates int |
|||
var totalSize, totalFiles uint64 |
|||
|
|||
for _, vol := range volumeStatus.Volumes { |
|||
totalSize += vol.Size |
|||
totalFiles += uint64(vol.FileCount) |
|||
|
|||
if vol.ReadOnly { |
|||
readOnlyCount++ |
|||
} |
|||
|
|||
// Volume is close to full (>40MB for 50MB limit)
|
|||
if vol.Size > 40*1024*1024 { |
|||
fullVolumeCount++ |
|||
if !vol.ReadOnly { |
|||
ecCandidates++ |
|||
} |
|||
} |
|||
} |
|||
|
|||
log.Printf("Volume analysis: Total=%d, ReadOnly=%d, Full=%d, EC_Candidates=%d", |
|||
len(volumeStatus.Volumes), readOnlyCount, fullVolumeCount, ecCandidates) |
|||
log.Printf("Storage stats: Total_Size=%.2fMB, Total_Files=%d", |
|||
float64(totalSize)/(1024*1024), totalFiles) |
|||
|
|||
if ecCandidates > 0 { |
|||
log.Printf("⚠️ DETECTED %d volumes that should be EC'd!", ecCandidates) |
|||
} |
|||
} |
|||
|
|||
// healthHandler provides a health endpoint for the monitor itself
|
|||
func (m *Monitor) healthHandler(w http.ResponseWriter, r *http.Request) { |
|||
w.Header().Set("Content-Type", "application/json") |
|||
json.NewEncoder(w).Encode(map[string]interface{}{ |
|||
"status": "healthy", |
|||
"uptime": time.Since(m.startTime).String(), |
|||
"checks": m.stats.TotalChecks, |
|||
"last_check": m.stats.LastVolumeCheck.Format(time.RFC3339), |
|||
}) |
|||
} |
|||
|
|||
// statusHandler provides detailed monitoring status
|
|||
func (m *Monitor) statusHandler(w http.ResponseWriter, r *http.Request) { |
|||
w.Header().Set("Content-Type", "application/json") |
|||
json.NewEncoder(w).Encode(map[string]interface{}{ |
|||
"monitor": map[string]interface{}{ |
|||
"uptime": time.Since(m.startTime).String(), |
|||
"master_addr": m.masterAddr, |
|||
"admin_addr": m.adminAddr, |
|||
"filer_addr": m.filerAddr, |
|||
"interval": m.interval.String(), |
|||
}, |
|||
"stats": m.stats, |
|||
"health": map[string]interface{}{ |
|||
"master_healthy": m.stats.MasterHealthy > 0 && time.Since(m.stats.LastVolumeCheck) < 2*m.interval, |
|||
"admin_healthy": m.stats.AdminHealthy > 0 && time.Since(m.stats.LastWorkerCheck) < 2*m.interval, |
|||
"filer_healthy": m.stats.FilerHealthy > 0, |
|||
}, |
|||
}) |
|||
} |
|||
|
|||
// runMonitoring runs the main monitoring loop
|
|||
func (m *Monitor) runMonitoring() { |
|||
ticker := time.NewTicker(m.interval) |
|||
defer ticker.Stop() |
|||
|
|||
log.Printf("Starting monitoring loop every %v", m.interval) |
|||
|
|||
for { |
|||
m.stats.TotalChecks++ |
|||
|
|||
log.Printf("=== Monitoring Check #%d ===", m.stats.TotalChecks) |
|||
|
|||
// Check master health
|
|||
if m.checkMasterHealth() { |
|||
// If master is healthy, check volumes
|
|||
m.checkVolumeStatus() |
|||
} |
|||
|
|||
// Check admin health
|
|||
m.checkAdminHealth() |
|||
|
|||
// Check filer health
|
|||
m.checkFilerHealth() |
|||
|
|||
// Print summary
|
|||
log.Printf("Health Summary: Master=%t, Admin=%t, Filer=%t, Volumes=%d, Workers=%d", |
|||
m.stats.MasterHealthy > 0, |
|||
m.stats.AdminHealthy > 0, |
|||
m.stats.FilerHealthy > 0, |
|||
m.stats.VolumeCount, |
|||
m.stats.WorkersActive) |
|||
|
|||
log.Printf("=== End Check #%d ===", m.stats.TotalChecks) |
|||
|
|||
<-ticker.C |
|||
} |
|||
} |
|||
|
|||
func main() { |
|||
masterAddr := os.Getenv("MASTER_ADDRESS") |
|||
if masterAddr == "" { |
|||
masterAddr = "master:9333" |
|||
} |
|||
|
|||
adminAddr := os.Getenv("ADMIN_ADDRESS") |
|||
if adminAddr == "" { |
|||
adminAddr = "admin:9900" |
|||
} |
|||
|
|||
filerAddr := os.Getenv("FILER_ADDRESS") |
|||
if filerAddr == "" { |
|||
filerAddr = "filer:8888" |
|||
} |
|||
|
|||
intervalStr := os.Getenv("MONITOR_INTERVAL") |
|||
interval, err := time.ParseDuration(intervalStr) |
|||
if err != nil { |
|||
interval = 10 * time.Second |
|||
} |
|||
|
|||
monitor := &Monitor{ |
|||
masterAddr: masterAddr, |
|||
adminAddr: adminAddr, |
|||
filerAddr: filerAddr, |
|||
interval: interval, |
|||
startTime: time.Now(), |
|||
stats: MonitorStats{}, |
|||
} |
|||
|
|||
log.Printf("Starting SeaweedFS Cluster Monitor") |
|||
log.Printf("Master: %s", masterAddr) |
|||
log.Printf("Admin: %s", adminAddr) |
|||
log.Printf("Filer: %s", filerAddr) |
|||
log.Printf("Interval: %v", interval) |
|||
|
|||
// Setup HTTP endpoints
|
|||
http.HandleFunc("/health", monitor.healthHandler) |
|||
http.HandleFunc("/status", monitor.statusHandler) |
|||
|
|||
// Start HTTP server in background
|
|||
go func() { |
|||
log.Println("Monitor HTTP server starting on :9999") |
|||
if err := http.ListenAndServe(":9999", nil); err != nil { |
|||
log.Printf("Monitor HTTP server error: %v", err) |
|||
} |
|||
}() |
|||
|
|||
// Wait for services to be ready
|
|||
log.Println("Waiting for services to be ready...") |
|||
for { |
|||
masterOK := false |
|||
adminOK := false |
|||
filerOK := false |
|||
|
|||
if resp, err := http.Get(fmt.Sprintf("http://%s/cluster/status", masterAddr)); err == nil && resp.StatusCode == http.StatusOK { |
|||
masterOK = true |
|||
resp.Body.Close() |
|||
} |
|||
|
|||
if resp, err := http.Get(fmt.Sprintf("http://%s/health", adminAddr)); err == nil && resp.StatusCode == http.StatusOK { |
|||
adminOK = true |
|||
resp.Body.Close() |
|||
} |
|||
|
|||
if resp, err := http.Get(fmt.Sprintf("http://%s/", filerAddr)); err == nil && resp.StatusCode == http.StatusOK { |
|||
filerOK = true |
|||
resp.Body.Close() |
|||
} |
|||
|
|||
if masterOK && adminOK && filerOK { |
|||
log.Println("All services are ready!") |
|||
break |
|||
} |
|||
|
|||
log.Printf("Services ready: Master=%t, Admin=%t, Filer=%t", masterOK, adminOK, filerOK) |
|||
time.Sleep(5 * time.Second) |
|||
} |
|||
|
|||
// Start monitoring
|
|||
monitor.runMonitoring() |
|||
} |
|||
@ -1,106 +0,0 @@ |
|||
#!/bin/bash |
|||
|
|||
set -e |
|||
|
|||
# Colors for output |
|||
RED='\033[0;31m' |
|||
GREEN='\033[0;32m' |
|||
YELLOW='\033[1;33m' |
|||
BLUE='\033[0;34m' |
|||
NC='\033[0m' # No Color |
|||
|
|||
echo -e "${BLUE}🧪 SeaweedFS EC Worker Testing Environment${NC}" |
|||
echo -e "${BLUE}===========================================${NC}" |
|||
|
|||
# Check if docker-compose is available |
|||
if ! command -v docker-compose &> /dev/null; then |
|||
echo -e "${RED}❌ docker-compose is required but not installed${NC}" |
|||
exit 1 |
|||
fi |
|||
|
|||
# Create necessary directories |
|||
echo -e "${YELLOW}📁 Creating required directories...${NC}" |
|||
mkdir -p monitor-data admin-config |
|||
|
|||
# Make scripts executable |
|||
echo -e "${YELLOW}🔧 Making scripts executable...${NC}" |
|||
chmod +x *.sh |
|||
|
|||
# Stop any existing containers |
|||
echo -e "${YELLOW}🛑 Stopping any existing containers...${NC}" |
|||
docker-compose -f docker-compose-ec-test.yml down -v 2>/dev/null || true |
|||
|
|||
# Build and start the environment |
|||
echo -e "${GREEN}🚀 Starting SeaweedFS EC testing environment...${NC}" |
|||
echo -e "${BLUE}This will start:${NC}" |
|||
echo -e " • 1 Master server (port 9333)" |
|||
echo -e " • 6 Volume servers (ports 8080-8085) with 50MB volume limit" |
|||
echo -e " • 1 Filer (port 8888)" |
|||
echo -e " • 1 Admin server (port 9900)" |
|||
echo -e " • 3 EC Workers" |
|||
echo -e " • 1 Load generator (continuous read/write)" |
|||
echo -e " • 1 Monitor (port 9999)" |
|||
echo "" |
|||
|
|||
docker-compose -f docker-compose-ec-test.yml up --build -d |
|||
|
|||
echo -e "${GREEN}✅ Environment started successfully!${NC}" |
|||
echo "" |
|||
echo -e "${BLUE}📊 Monitoring URLs:${NC}" |
|||
echo -e " • Master UI: http://localhost:9333" |
|||
echo -e " • Filer: http://localhost:8888" |
|||
echo -e " • Admin Server: http://localhost:9900/status" |
|||
echo -e " • Monitor: http://localhost:9999/status" |
|||
echo "" |
|||
echo -e "${BLUE}📈 Volume Servers:${NC}" |
|||
echo -e " • Volume1: http://localhost:8080/status" |
|||
echo -e " • Volume2: http://localhost:8081/status" |
|||
echo -e " • Volume3: http://localhost:8082/status" |
|||
echo -e " • Volume4: http://localhost:8083/status" |
|||
echo -e " • Volume5: http://localhost:8084/status" |
|||
echo -e " • Volume6: http://localhost:8085/status" |
|||
echo "" |
|||
|
|||
echo -e "${YELLOW}⏳ Waiting for services to be ready...${NC}" |
|||
sleep 10 |
|||
|
|||
# Check service health |
|||
echo -e "${BLUE}🔍 Checking service health...${NC}" |
|||
|
|||
check_service() { |
|||
local name=$1 |
|||
local url=$2 |
|||
|
|||
if curl -s "$url" > /dev/null 2>&1; then |
|||
echo -e " ✅ $name: ${GREEN}Healthy${NC}" |
|||
return 0 |
|||
else |
|||
echo -e " ❌ $name: ${RED}Not responding${NC}" |
|||
return 1 |
|||
fi |
|||
} |
|||
|
|||
check_service "Master" "http://localhost:9333/cluster/status" |
|||
check_service "Filer" "http://localhost:8888/" |
|||
check_service "Admin" "http://localhost:9900/health" |
|||
check_service "Monitor" "http://localhost:9999/health" |
|||
|
|||
echo "" |
|||
echo -e "${GREEN}🎯 Test Environment is Ready!${NC}" |
|||
echo "" |
|||
echo -e "${BLUE}What's happening:${NC}" |
|||
echo -e " 1. 📝 Load generator continuously writes 1-5MB files at 10 files/sec" |
|||
echo -e " 2. 🗑️ Load generator deletes files at 2 files/sec" |
|||
echo -e " 3. 📊 Volumes fill up to 50MB limit and trigger EC conversion" |
|||
echo -e " 4. 🏭 Admin server detects volumes needing EC and assigns to workers" |
|||
echo -e " 5. ⚡ Workers perform comprehensive EC (copy→encode→distribute)" |
|||
echo -e " 6. 📈 Monitor tracks all activity and volume states" |
|||
echo "" |
|||
echo -e "${YELLOW}📋 Useful Commands:${NC}" |
|||
echo -e " • View logs: docker-compose -f docker-compose-ec-test.yml logs -f [service]" |
|||
echo -e " • Check worker status: docker-compose -f docker-compose-ec-test.yml logs worker1" |
|||
echo -e " • Stop environment: docker-compose -f docker-compose-ec-test.yml down -v" |
|||
echo -e " • Monitor logs: docker-compose -f docker-compose-ec-test.yml logs -f monitor" |
|||
echo "" |
|||
echo -e "${GREEN}🔥 The test will run for 1 hour by default${NC}" |
|||
echo -e "${BLUE}Monitor progress at: http://localhost:9999/status${NC}" |
|||
@ -0,0 +1,73 @@ |
|||
#!/bin/bash |
|||
|
|||
set -e |
|||
|
|||
echo "🧪 Testing SeaweedFS Admin-Worker Integration" |
|||
echo "=============================================" |
|||
|
|||
# Colors for output |
|||
RED='\033[0;31m' |
|||
GREEN='\033[0;32m' |
|||
YELLOW='\033[1;33m' |
|||
BLUE='\033[0;34m' |
|||
NC='\033[0m' # No Color |
|||
|
|||
cd "$(dirname "$0")" |
|||
|
|||
echo -e "${BLUE}1. Validating docker-compose configuration...${NC}" |
|||
if docker-compose -f docker-compose-ec-test.yml config > /dev/null; then |
|||
echo -e "${GREEN}✅ Docker compose configuration is valid${NC}" |
|||
else |
|||
echo -e "${RED}❌ Docker compose configuration is invalid${NC}" |
|||
exit 1 |
|||
fi |
|||
|
|||
echo -e "${BLUE}2. Checking if required ports are available...${NC}" |
|||
for port in 9333 8080 8081 8082 8083 8084 8085 8888 23646; do |
|||
if lsof -i :$port > /dev/null 2>&1; then |
|||
echo -e "${YELLOW}⚠️ Port $port is in use${NC}" |
|||
else |
|||
echo -e "${GREEN}✅ Port $port is available${NC}" |
|||
fi |
|||
done |
|||
|
|||
echo -e "${BLUE}3. Testing worker command syntax...${NC}" |
|||
# Test that the worker command in docker-compose has correct syntax |
|||
if docker-compose -f docker-compose-ec-test.yml config | grep -q "workingDir=/work"; then |
|||
echo -e "${GREEN}✅ Worker working directory option is properly configured${NC}" |
|||
else |
|||
echo -e "${RED}❌ Worker working directory option is missing${NC}" |
|||
exit 1 |
|||
fi |
|||
|
|||
echo -e "${BLUE}4. Verifying admin server configuration...${NC}" |
|||
if docker-compose -f docker-compose-ec-test.yml config | grep -q "admin:23646"; then |
|||
echo -e "${GREEN}✅ Admin server port configuration is correct${NC}" |
|||
else |
|||
echo -e "${RED}❌ Admin server port configuration is incorrect${NC}" |
|||
exit 1 |
|||
fi |
|||
|
|||
echo -e "${BLUE}5. Checking service dependencies...${NC}" |
|||
if docker-compose -f docker-compose-ec-test.yml config | grep -q "depends_on"; then |
|||
echo -e "${GREEN}✅ Service dependencies are configured${NC}" |
|||
else |
|||
echo -e "${YELLOW}⚠️ Service dependencies may not be configured${NC}" |
|||
fi |
|||
|
|||
echo "" |
|||
echo -e "${GREEN}🎉 Integration test configuration is ready!${NC}" |
|||
echo "" |
|||
echo -e "${BLUE}To start the integration test:${NC}" |
|||
echo " make start # Start all services" |
|||
echo " make health # Check service health" |
|||
echo " make logs # View logs" |
|||
echo " make stop # Stop all services" |
|||
echo "" |
|||
echo -e "${BLUE}Key features verified:${NC}" |
|||
echo " ✅ Official SeaweedFS images are used" |
|||
echo " ✅ Worker working directories are configured" |
|||
echo " ✅ Admin-worker communication on correct ports" |
|||
echo " ✅ Task-specific directories will be created" |
|||
echo " ✅ Load generator will trigger EC tasks" |
|||
echo " ✅ Monitor will track progress" |
|||
@ -1,230 +0,0 @@ |
|||
#!/bin/sh |
|||
|
|||
set -e |
|||
|
|||
echo "Starting SeaweedFS EC Worker..." |
|||
echo "Worker ID: $WORKER_ID" |
|||
echo "Admin Address: $ADMIN_ADDRESS" |
|||
echo "Capabilities: $CAPABILITIES" |
|||
|
|||
# Wait for admin server to be ready |
|||
echo "Waiting for admin server to be ready..." |
|||
until curl -f http://$ADMIN_ADDRESS/health > /dev/null 2>&1; do |
|||
echo "Admin server not ready, waiting..." |
|||
sleep 5 |
|||
done |
|||
echo "Admin server is ready!" |
|||
|
|||
# Create worker simulation |
|||
cat > /tmp/worker.go << 'EOF' |
|||
package main |
|||
|
|||
import ( |
|||
"encoding/json" |
|||
"fmt" |
|||
"log" |
|||
"net/http" |
|||
"os" |
|||
"strings" |
|||
"time" |
|||
) |
|||
|
|||
type Worker struct { |
|||
id string |
|||
adminAddr string |
|||
address string |
|||
capabilities []string |
|||
maxConcurrent int |
|||
workDir string |
|||
startTime time.Time |
|||
activeTasks map[string]*Task |
|||
} |
|||
|
|||
type Task struct { |
|||
ID string `json:"id"` |
|||
Type string `json:"type"` |
|||
VolumeID int `json:"volume_id"` |
|||
Status string `json:"status"` |
|||
Progress float64 `json:"progress"` |
|||
Started time.Time `json:"started"` |
|||
} |
|||
|
|||
func (w *Worker) healthHandler(res http.ResponseWriter, req *http.Request) { |
|||
res.Header().Set("Content-Type", "application/json") |
|||
json.NewEncoder(res).Encode(map[string]interface{}{ |
|||
"status": "healthy", |
|||
"worker_id": w.id, |
|||
"uptime": time.Since(w.startTime).String(), |
|||
"active_tasks": len(w.activeTasks), |
|||
"capabilities": w.capabilities, |
|||
}) |
|||
} |
|||
|
|||
func (w *Worker) statusHandler(res http.ResponseWriter, req *http.Request) { |
|||
res.Header().Set("Content-Type", "application/json") |
|||
json.NewEncoder(res).Encode(map[string]interface{}{ |
|||
"worker_id": w.id, |
|||
"admin_addr": w.adminAddr, |
|||
"capabilities": w.capabilities, |
|||
"max_concurrent": w.maxConcurrent, |
|||
"active_tasks": w.activeTasks, |
|||
"uptime": time.Since(w.startTime).String(), |
|||
}) |
|||
} |
|||
|
|||
func (w *Worker) simulateECTask(taskID string, volumeID int) { |
|||
log.Printf("Starting EC task %s for volume %d", taskID, volumeID) |
|||
|
|||
task := &Task{ |
|||
ID: taskID, |
|||
Type: "erasure_coding", |
|||
VolumeID: volumeID, |
|||
Status: "running", |
|||
Progress: 0.0, |
|||
Started: time.Now(), |
|||
} |
|||
|
|||
w.activeTasks[taskID] = task |
|||
|
|||
// Simulate EC process phases |
|||
phases := []struct { |
|||
progress float64 |
|||
phase string |
|||
duration time.Duration |
|||
}{ |
|||
{5.0, "Copying volume data locally", 10 * time.Second}, |
|||
{25.0, "Marking volume read-only", 2 * time.Second}, |
|||
{60.0, "Performing local EC encoding", 30 * time.Second}, |
|||
{70.0, "Calculating optimal shard placement", 5 * time.Second}, |
|||
{90.0, "Distributing shards to servers", 20 * time.Second}, |
|||
{100.0, "Verification and cleanup", 3 * time.Second}, |
|||
} |
|||
|
|||
go func() { |
|||
for _, phase := range phases { |
|||
if task.Status != "running" { |
|||
break |
|||
} |
|||
|
|||
time.Sleep(phase.duration) |
|||
task.Progress = phase.progress |
|||
log.Printf("Task %s: %.1f%% - %s", taskID, phase.progress, phase.phase) |
|||
} |
|||
|
|||
if task.Status == "running" { |
|||
task.Status = "completed" |
|||
task.Progress = 100.0 |
|||
log.Printf("Task %s completed successfully", taskID) |
|||
} |
|||
|
|||
// Remove from active tasks after completion |
|||
time.Sleep(5 * time.Second) |
|||
delete(w.activeTasks, taskID) |
|||
}() |
|||
} |
|||
|
|||
func (w *Worker) registerWithAdmin() { |
|||
ticker := time.NewTicker(30 * time.Second) |
|||
go func() { |
|||
for { |
|||
// Register/heartbeat with admin server |
|||
log.Printf("Sending heartbeat to admin server...") |
|||
|
|||
data := map[string]interface{}{ |
|||
"worker_id": w.id, |
|||
"address": w.address, |
|||
"capabilities": w.capabilities, |
|||
"max_concurrent": w.maxConcurrent, |
|||
"active_tasks": len(w.activeTasks), |
|||
"status": "active", |
|||
} |
|||
|
|||
jsonData, _ := json.Marshal(data) |
|||
|
|||
// In real implementation, this would be a proper gRPC call |
|||
resp, err := http.Post( |
|||
fmt.Sprintf("http://%s/register-worker", w.adminAddr), |
|||
"application/json", |
|||
strings.NewReader(string(jsonData)), |
|||
) |
|||
if err != nil { |
|||
log.Printf("Failed to register with admin: %v", err) |
|||
} else { |
|||
resp.Body.Close() |
|||
log.Printf("Successfully sent heartbeat to admin") |
|||
} |
|||
|
|||
// Simulate requesting new tasks |
|||
if len(w.activeTasks) < w.maxConcurrent { |
|||
// In real implementation, worker would request tasks from admin |
|||
// For simulation, we'll create some tasks periodically |
|||
if len(w.activeTasks) == 0 && time.Since(w.startTime) > 1*time.Minute { |
|||
taskID := fmt.Sprintf("%s-task-%d", w.id, time.Now().Unix()) |
|||
volumeID := 2000 + int(time.Now().Unix()%1000) |
|||
w.simulateECTask(taskID, volumeID) |
|||
} |
|||
} |
|||
|
|||
<-ticker.C |
|||
} |
|||
}() |
|||
} |
|||
|
|||
func main() { |
|||
workerID := os.Getenv("WORKER_ID") |
|||
if workerID == "" { |
|||
workerID = "worker-1" |
|||
} |
|||
|
|||
adminAddr := os.Getenv("ADMIN_ADDRESS") |
|||
if adminAddr == "" { |
|||
adminAddr = "admin:9900" |
|||
} |
|||
|
|||
address := os.Getenv("WORKER_ADDRESS") |
|||
if address == "" { |
|||
address = "worker:9001" |
|||
} |
|||
|
|||
capabilities := strings.Split(os.Getenv("CAPABILITIES"), ",") |
|||
if len(capabilities) == 0 || capabilities[0] == "" { |
|||
capabilities = []string{"erasure_coding"} |
|||
} |
|||
|
|||
worker := &Worker{ |
|||
id: workerID, |
|||
adminAddr: adminAddr, |
|||
address: address, |
|||
capabilities: capabilities, |
|||
maxConcurrent: 2, |
|||
workDir: "/work", |
|||
startTime: time.Now(), |
|||
activeTasks: make(map[string]*Task), |
|||
} |
|||
|
|||
http.HandleFunc("/health", worker.healthHandler) |
|||
http.HandleFunc("/status", worker.statusHandler) |
|||
|
|||
// Start registration and heartbeat |
|||
worker.registerWithAdmin() |
|||
|
|||
log.Printf("Worker %s starting on address %s", workerID, address) |
|||
log.Printf("Admin address: %s", adminAddr) |
|||
log.Printf("Capabilities: %v", capabilities) |
|||
|
|||
port := ":9001" |
|||
if strings.Contains(address, ":") { |
|||
parts := strings.Split(address, ":") |
|||
port = ":" + parts[1] |
|||
} |
|||
|
|||
if err := http.ListenAndServe(port, nil); err != nil { |
|||
log.Fatal("Worker failed to start:", err) |
|||
} |
|||
} |
|||
EOF |
|||
|
|||
# Compile and run the worker |
|||
cd /tmp |
|||
go mod init worker |
|||
go run worker.go |
|||
@ -1,67 +0,0 @@ |
|||
#!/bin/sh |
|||
|
|||
set -e |
|||
|
|||
echo "Starting SeaweedFS EC Worker (gRPC)..." |
|||
echo "Worker ID: $WORKER_ID" |
|||
echo "Admin gRPC Address: $ADMIN_GRPC_ADDRESS" |
|||
|
|||
# Wait for admin to be ready |
|||
echo "Waiting for admin to be ready..." |
|||
until curl -f http://admin:9900/health > /dev/null 2>&1; do |
|||
echo "Admin not ready, waiting..." |
|||
sleep 5 |
|||
done |
|||
echo "Admin is ready!" |
|||
|
|||
# Install protobuf compiler and Go protobuf plugins |
|||
apk add --no-cache protobuf protobuf-dev |
|||
|
|||
# Set up Go environment |
|||
export GOPATH=/tmp/go |
|||
export PATH=$PATH:$GOPATH/bin |
|||
mkdir -p $GOPATH/src $GOPATH/bin $GOPATH/pkg |
|||
|
|||
# Install Go protobuf plugins |
|||
cd /tmp |
|||
go mod init worker-client |
|||
|
|||
# Create a basic go.mod with required dependencies |
|||
cat > go.mod << 'EOF' |
|||
module worker-client |
|||
|
|||
go 1.24 |
|||
|
|||
require ( |
|||
google.golang.org/grpc v1.65.0 |
|||
google.golang.org/protobuf v1.34.2 |
|||
) |
|||
EOF |
|||
|
|||
go mod tidy |
|||
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest |
|||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest |
|||
|
|||
# Add Go bin to PATH |
|||
export PATH=$PATH:$(go env GOPATH)/bin |
|||
|
|||
# Create directory structure for protobuf |
|||
mkdir -p worker_pb |
|||
|
|||
# Copy the worker client source and existing worker protobuf file |
|||
cp /worker_grpc_client.go . |
|||
cp /worker.proto . |
|||
|
|||
# Generate Go code from the existing worker protobuf |
|||
echo "Generating gRPC code from worker.proto..." |
|||
protoc --go_out=. --go_opt=paths=source_relative \ |
|||
--go-grpc_out=. --go-grpc_opt=paths=source_relative \ |
|||
worker.proto |
|||
|
|||
# Build and run the worker |
|||
echo "Building worker..." |
|||
go mod tidy |
|||
go build -o worker-client worker_grpc_client.go |
|||
|
|||
echo "Starting worker..." |
|||
exec ./worker-client |
|||
@ -1,67 +0,0 @@ |
|||
package main |
|||
|
|||
import ( |
|||
"flag" |
|||
"fmt" |
|||
"os" |
|||
"os/signal" |
|||
"syscall" |
|||
"time" |
|||
|
|||
"github.com/seaweedfs/seaweedfs/weed/glog" |
|||
"github.com/seaweedfs/seaweedfs/weed/worker" |
|||
) |
|||
|
|||
var ( |
|||
workerID = flag.String("worker.id", "", "Worker ID (required)") |
|||
adminAddr = flag.String("admin.address", "localhost:9090", "Admin server address") |
|||
grpcAddr = flag.String("grpc.address", "localhost:18000", "Worker gRPC address") |
|||
logLevel = flag.Int("log.level", 1, "Log level (0-4)") |
|||
) |
|||
|
|||
func main() { |
|||
flag.Parse() |
|||
|
|||
// Validate required flags
|
|||
if *workerID == "" { |
|||
fmt.Fprintf(os.Stderr, "Error: worker.id is required\n") |
|||
flag.Usage() |
|||
os.Exit(1) |
|||
} |
|||
|
|||
// Set log level
|
|||
flag.Set("v", fmt.Sprintf("%d", *logLevel)) |
|||
|
|||
glog.Infof("Starting SeaweedFS EC Worker") |
|||
glog.Infof("Worker ID: %s", *workerID) |
|||
glog.Infof("Admin Address: %s", *adminAddr) |
|||
glog.Infof("gRPC Address: %s", *grpcAddr) |
|||
|
|||
// Create worker
|
|||
ecWorker := worker.NewECWorker(*workerID, *adminAddr, *grpcAddr) |
|||
|
|||
// Start worker
|
|||
err := ecWorker.Start() |
|||
if err != nil { |
|||
glog.Fatalf("Failed to start worker: %v", err) |
|||
} |
|||
|
|||
// Wait for shutdown signal
|
|||
waitForShutdown(ecWorker) |
|||
|
|||
glog.Infof("Worker %s shutdown complete", *workerID) |
|||
} |
|||
|
|||
// waitForShutdown waits for shutdown signal and gracefully stops the worker
|
|||
func waitForShutdown(worker *worker.ECWorker) { |
|||
sigCh := make(chan os.Signal, 1) |
|||
signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) |
|||
|
|||
<-sigCh |
|||
glog.Infof("Shutdown signal received, stopping worker...") |
|||
|
|||
worker.Stop() |
|||
|
|||
// Give a moment for cleanup
|
|||
time.Sleep(2 * time.Second) |
|||
} |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue