23 changed files with 450 additions and 3313 deletions
-
36docker/admin_integration/Dockerfile.admin
-
44docker/admin_integration/Dockerfile.load
-
18docker/admin_integration/Dockerfile.local
-
48docker/admin_integration/Dockerfile.monitor
-
35docker/admin_integration/Dockerfile.worker
-
43docker/admin_integration/EC-TESTING-README.md
-
428docker/admin_integration/Makefile
-
521docker/admin_integration/admin-entrypoint.sh
-
73docker/admin_integration/admin-grpc-entrypoint.sh
-
663docker/admin_integration/admin_grpc_server.go
-
421docker/admin_integration/docker-compose-ec-test.yml
-
21docker/admin_integration/load-entrypoint.sh
-
375docker/admin_integration/load-generator.go
-
38docker/admin_integration/monitor-entrypoint.sh
-
366docker/admin_integration/monitor.go
-
106docker/admin_integration/run-ec-test.sh
-
73docker/admin_integration/test-integration.sh
-
230docker/admin_integration/worker-entrypoint.sh
-
67docker/admin_integration/worker-grpc-entrypoint.sh
-
51weed/admin/dash/admin_server.go
-
20weed/admin/handlers/maintenance_handlers.go
-
11weed/worker/ec_worker.go
-
67weed/worker/main.go
@ -1,36 +0,0 @@ |
|||||
# Final stage |
|
||||
FROM alpine:latest |
|
||||
|
|
||||
# Install dependencies including Go for the entrypoint script |
|
||||
RUN apk --no-cache add curl ca-certificates go |
|
||||
|
|
||||
WORKDIR /root/ |
|
||||
|
|
||||
# Copy gRPC admin files |
|
||||
COPY ./docker/admin_integration/admin-grpc-entrypoint.sh /entrypoint.sh |
|
||||
COPY ./docker/admin_integration/admin_grpc_server.go /admin_grpc_server.go |
|
||||
COPY ./weed/pb/worker.proto /worker.proto |
|
||||
RUN chmod +x /entrypoint.sh |
|
||||
|
|
||||
# Create directories |
|
||||
RUN mkdir -p /data /config /work |
|
||||
|
|
||||
# Expose admin ports (HTTP and gRPC) |
|
||||
EXPOSE 9900 9901 |
|
||||
|
|
||||
# Set environment variables |
|
||||
ENV MASTER_ADDRESS="master:9333" |
|
||||
ENV ADMIN_PORT="9900" |
|
||||
ENV GRPC_PORT="9901" |
|
||||
ENV SCAN_INTERVAL="30s" |
|
||||
ENV WORKER_TIMEOUT="5m" |
|
||||
ENV TASK_TIMEOUT="30m" |
|
||||
ENV MAX_RETRIES="3" |
|
||||
ENV MAX_CONCURRENT_TASKS="5" |
|
||||
|
|
||||
# Health check |
|
||||
HEALTHCHECK --interval=15s --timeout=5s --start-period=30s --retries=3 \ |
|
||||
CMD curl -f http://localhost:9900/health || exit 1 |
|
||||
|
|
||||
# Start admin server |
|
||||
ENTRYPOINT ["/entrypoint.sh"] |
|
||||
@ -1,44 +0,0 @@ |
|||||
FROM golang:1.24-alpine AS builder |
|
||||
|
|
||||
# Install dependencies |
|
||||
RUN apk add --no-cache git build-base |
|
||||
|
|
||||
# Set working directory |
|
||||
WORKDIR /app |
|
||||
|
|
||||
# Copy and create load generator |
|
||||
COPY ./docker/admin_integration/load-generator.go . |
|
||||
COPY go.mod go.sum ./ |
|
||||
RUN go mod download |
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o load-generator load-generator.go |
|
||||
|
|
||||
# Final stage |
|
||||
FROM alpine:latest |
|
||||
|
|
||||
# Install dependencies |
|
||||
RUN apk --no-cache add curl ca-certificates openssl |
|
||||
|
|
||||
WORKDIR /root/ |
|
||||
|
|
||||
# Copy the binary |
|
||||
COPY --from=builder /app/load-generator . |
|
||||
|
|
||||
# Copy load generator script |
|
||||
COPY ./docker/admin_integration/load-entrypoint.sh /entrypoint.sh |
|
||||
RUN chmod +x /entrypoint.sh |
|
||||
|
|
||||
# Create directories for test data |
|
||||
RUN mkdir -p /test-data /temp |
|
||||
|
|
||||
# Set environment variables |
|
||||
ENV FILER_ADDRESS="filer:8888" |
|
||||
ENV MASTER_ADDRESS="master:9333" |
|
||||
ENV WRITE_RATE="10" |
|
||||
ENV DELETE_RATE="2" |
|
||||
ENV FILE_SIZE_MIN="1MB" |
|
||||
ENV FILE_SIZE_MAX="5MB" |
|
||||
ENV TEST_DURATION="3600" |
|
||||
ENV COLLECTION="" |
|
||||
|
|
||||
# Start load generator |
|
||||
ENTRYPOINT ["/entrypoint.sh"] |
|
||||
@ -0,0 +1,18 @@ |
|||||
|
FROM alpine:latest |
||||
|
|
||||
|
# Install required packages |
||||
|
RUN apk add --no-cache \ |
||||
|
ca-certificates \ |
||||
|
fuse \ |
||||
|
curl \ |
||||
|
jq |
||||
|
|
||||
|
# Copy our locally built binary |
||||
|
COPY weed-local /usr/bin/weed |
||||
|
RUN chmod +x /usr/bin/weed |
||||
|
|
||||
|
# Create working directory |
||||
|
WORKDIR /data |
||||
|
|
||||
|
# Default command |
||||
|
ENTRYPOINT ["/usr/bin/weed"] |
||||
@ -1,48 +0,0 @@ |
|||||
FROM golang:1.24-alpine AS builder |
|
||||
|
|
||||
# Install dependencies |
|
||||
RUN apk add --no-cache git build-base |
|
||||
|
|
||||
# Set working directory |
|
||||
WORKDIR /app |
|
||||
|
|
||||
# Copy and create monitor |
|
||||
COPY ./docker/admin_integration/monitor.go . |
|
||||
COPY go.mod go.sum ./ |
|
||||
RUN go mod download |
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o monitor monitor.go |
|
||||
|
|
||||
# Final stage |
|
||||
FROM alpine:latest |
|
||||
|
|
||||
# Install dependencies |
|
||||
RUN apk --no-cache add curl ca-certificates jq |
|
||||
|
|
||||
WORKDIR /root/ |
|
||||
|
|
||||
# Copy the binary |
|
||||
COPY --from=builder /app/monitor . |
|
||||
|
|
||||
# Copy monitor scripts |
|
||||
COPY ./docker/admin_integration/monitor-entrypoint.sh /entrypoint.sh |
|
||||
RUN chmod +x /entrypoint.sh |
|
||||
|
|
||||
# Create monitoring directories |
|
||||
RUN mkdir -p /monitor-data /logs |
|
||||
|
|
||||
# Expose monitor port |
|
||||
EXPOSE 9999 |
|
||||
|
|
||||
# Set environment variables |
|
||||
ENV MASTER_ADDRESS="master:9333" |
|
||||
ENV ADMIN_ADDRESS="admin:9900" |
|
||||
ENV FILER_ADDRESS="filer:8888" |
|
||||
ENV MONITOR_INTERVAL="10s" |
|
||||
ENV LOG_LEVEL="info" |
|
||||
|
|
||||
# Health check |
|
||||
HEALTHCHECK --interval=30s --timeout=5s --start-period=30s --retries=3 \ |
|
||||
CMD curl -f http://localhost:9999/health || exit 1 |
|
||||
|
|
||||
# Start monitor |
|
||||
ENTRYPOINT ["/entrypoint.sh"] |
|
||||
@ -1,35 +0,0 @@ |
|||||
# Final stage |
|
||||
FROM alpine:latest |
|
||||
|
|
||||
# Install dependencies including Go for the entrypoint script |
|
||||
RUN apk --no-cache add curl ca-certificates go |
|
||||
|
|
||||
WORKDIR /root/ |
|
||||
|
|
||||
# Copy gRPC worker files |
|
||||
COPY ./docker/admin_integration/worker-grpc-entrypoint.sh /entrypoint.sh |
|
||||
COPY ./docker/admin_integration/worker_grpc_client.go /worker_grpc_client.go |
|
||||
COPY ./weed/pb/worker.proto /worker.proto |
|
||||
RUN chmod +x /entrypoint.sh |
|
||||
|
|
||||
# Create working directories |
|
||||
RUN mkdir -p /work /tmp/ec_work |
|
||||
|
|
||||
# Expose worker port |
|
||||
EXPOSE 9001 |
|
||||
|
|
||||
# Set environment variables |
|
||||
ENV ADMIN_GRPC_ADDRESS="admin:9901" |
|
||||
ENV WORKER_ID="worker-1" |
|
||||
ENV WORKER_ADDRESS="worker:9001" |
|
||||
ENV CAPABILITIES="erasure_coding" |
|
||||
ENV MAX_CONCURRENT="2" |
|
||||
ENV WORK_DIR="/work" |
|
||||
ENV HEARTBEAT_INTERVAL="10s" |
|
||||
|
|
||||
# Health check |
|
||||
HEALTHCHECK --interval=15s --timeout=5s --start-period=30s --retries=3 \ |
|
||||
CMD curl -f http://localhost:9001/health || exit 1 |
|
||||
|
|
||||
# Start worker |
|
||||
ENTRYPOINT ["/entrypoint.sh"] |
|
||||
@ -1,301 +1,195 @@ |
|||||
# SeaweedFS EC Worker Testing Environment Makefile
|
|
||||
# Usage: make <target>
|
|
||||
|
# SeaweedFS Admin Integration Test Makefile
|
||||
|
# Tests the admin server and worker functionality using official weed commands
|
||||
|
|
||||
.PHONY: help start stop clean logs status monitor health up down restart scale docs test |
|
||||
|
|
||||
# Default target
|
|
||||
|
.PHONY: help start stop restart logs clean status test admin-ui worker-logs master-logs admin-logs |
||||
.DEFAULT_GOAL := help |
.DEFAULT_GOAL := help |
||||
|
|
||||
# Docker compose file
|
|
||||
COMPOSE_FILE := docker-compose-ec-test.yml |
COMPOSE_FILE := docker-compose-ec-test.yml |
||||
|
|
||||
# Color codes for output
|
|
||||
GREEN := \033[32m |
|
||||
YELLOW := \033[33m |
|
||||
BLUE := \033[34m |
|
||||
RED := \033[31m |
|
||||
NC := \033[0m # No Color |
|
||||
|
PROJECT_NAME := admin_integration |
||||
|
|
||||
help: ## Show this help message
|
help: ## Show this help message
|
||||
@echo "$(BLUE)🧪 SeaweedFS EC Worker Testing Environment$(NC)" |
|
||||
@echo "$(BLUE)===========================================$(NC)" |
|
||||
|
@echo "SeaweedFS Admin Integration Test" |
||||
|
@echo "================================" |
||||
|
@echo "Tests admin server task distribution to workers using official weed commands" |
||||
|
@echo "" |
||||
|
@echo "Available targets:" |
||||
|
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf " %-15s %s\n", $$1, $$2}' |
||||
|
|
||||
|
start: ## Start the complete SeaweedFS cluster with admin and workers
|
||||
|
@echo "🚀 Starting SeaweedFS cluster with admin and workers..." |
||||
|
@docker-compose -f $(COMPOSE_FILE) up -d |
||||
|
@echo "✅ Cluster started!" |
||||
|
@echo "" |
||||
|
@echo "📊 Access points:" |
||||
|
@echo " • Admin UI: http://localhost:23646/" |
||||
|
@echo " • Master UI: http://localhost:9333/" |
||||
|
@echo " • Filer: http://localhost:8888/" |
||||
|
@echo "" |
||||
|
@echo "📈 Services starting up..." |
||||
|
@echo " • Master server: ✓" |
||||
|
@echo " • Volume servers: Starting (6 servers)..." |
||||
|
@echo " • Filer: Starting..." |
||||
|
@echo " • Admin server: Starting..." |
||||
|
@echo " • Workers: Starting (3 workers)..." |
||||
|
@echo "" |
||||
|
@echo "⏳ Use 'make status' to check startup progress" |
||||
|
@echo "💡 Use 'make logs' to watch the startup process" |
||||
|
|
||||
|
start-staged: ## Start services in proper order with delays
|
||||
|
@echo "🚀 Starting SeaweedFS cluster in stages..." |
||||
|
@echo "" |
||||
|
@echo "Stage 1: Starting Master server..." |
||||
|
@docker-compose -f $(COMPOSE_FILE) up -d master |
||||
|
@sleep 10 |
||||
@echo "" |
@echo "" |
||||
@echo "$(YELLOW)Available targets:$(NC)" |
|
||||
@awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf " $(GREEN)%-15s$(NC) %s\n", $$1, $$2}' $(MAKEFILE_LIST) |
|
||||
|
@echo "Stage 2: Starting Volume servers..." |
||||
|
@docker-compose -f $(COMPOSE_FILE) up -d volume1 volume2 volume3 volume4 volume5 volume6 |
||||
|
@sleep 15 |
||||
@echo "" |
@echo "" |
||||
@echo "$(YELLOW)Quick start:$(NC) make start" |
|
||||
@echo "$(YELLOW)Monitor:$(NC) make monitor" |
|
||||
@echo "$(YELLOW)Cleanup:$(NC) make clean" |
|
||||
|
|
||||
start: ## Start the complete EC testing environment
|
|
||||
@echo "$(GREEN)🚀 Starting SeaweedFS EC testing environment...$(NC)" |
|
||||
@echo "$(BLUE)This will start:$(NC)" |
|
||||
@echo " • 1 Master server (port 9333)" |
|
||||
@echo " • 6 Volume servers (ports 8080-8085) with 50MB volume limit" |
|
||||
@echo " • 1 Filer (port 8888)" |
|
||||
@echo " • 1 Admin server (port 9900)" |
|
||||
@echo " • 3 EC Workers" |
|
||||
@echo " • 1 Load generator (continuous read/write)" |
|
||||
@echo " • 1 Monitor (port 9999)" |
|
||||
|
@echo "Stage 3: Starting Filer..." |
||||
|
@docker-compose -f $(COMPOSE_FILE) up -d filer |
||||
|
@sleep 10 |
||||
@echo "" |
@echo "" |
||||
@mkdir -p monitor-data admin-config |
|
||||
@chmod +x *.sh 2>/dev/null || true |
|
||||
@docker-compose -f $(COMPOSE_FILE) down -v 2>/dev/null || true |
|
||||
@docker-compose -f $(COMPOSE_FILE) up --build -d |
|
||||
|
@echo "Stage 4: Starting Admin server..." |
||||
|
@docker-compose -f $(COMPOSE_FILE) up -d admin |
||||
|
@sleep 15 |
||||
@echo "" |
@echo "" |
||||
@echo "$(GREEN)✅ Environment started successfully!$(NC)" |
|
||||
|
@echo "Stage 5: Starting Workers..." |
||||
|
@docker-compose -f $(COMPOSE_FILE) up -d worker1 worker2 worker3 |
||||
|
@sleep 10 |
||||
@echo "" |
@echo "" |
||||
@$(MAKE) urls |
|
||||
|
@echo "Stage 6: Starting Load generator and Monitor..." |
||||
|
@docker-compose -f $(COMPOSE_FILE) up -d load_generator monitor |
||||
@echo "" |
@echo "" |
||||
@echo "$(YELLOW)⏳ Waiting for services to be ready...$(NC)" |
|
||||
@sleep 10 |
|
||||
@$(MAKE) health |
|
||||
|
@echo "✅ All services started!" |
||||
|
@echo "" |
||||
|
@echo "📊 Access points:" |
||||
|
@echo " • Admin UI: http://localhost:23646/" |
||||
|
@echo " • Master UI: http://localhost:9333/" |
||||
|
@echo " • Filer: http://localhost:8888/" |
||||
|
@echo "" |
||||
|
@echo "⏳ Services are initializing... Use 'make status' to check progress" |
||||
|
|
||||
stop: ## Stop all services
|
stop: ## Stop all services
|
||||
@echo "$(YELLOW)🛑 Stopping all services...$(NC)" |
|
||||
@docker-compose -f $(COMPOSE_FILE) stop |
|
||||
@echo "$(GREEN)✅ All services stopped$(NC)" |
|
||||
|
|
||||
down: ## Stop and remove all containers
|
|
||||
@echo "$(YELLOW)🛑 Stopping and removing containers...$(NC)" |
|
||||
|
@echo "🛑 Stopping SeaweedFS cluster..." |
||||
@docker-compose -f $(COMPOSE_FILE) down |
@docker-compose -f $(COMPOSE_FILE) down |
||||
@echo "$(GREEN)✅ Containers stopped and removed$(NC)" |
|
||||
|
@echo "✅ Cluster stopped" |
||||
|
|
||||
clean: ## Stop and remove all containers, networks, volumes, and images
|
|
||||
@echo "$(RED)🧹 Cleaning up entire environment...$(NC)" |
|
||||
@docker-compose -f $(COMPOSE_FILE) down -v --rmi all 2>/dev/null || true |
|
||||
@docker system prune -f |
|
||||
@echo "$(GREEN)✅ Environment cleaned up$(NC)" |
|
||||
|
|
||||
restart: ## Restart all services
|
|
||||
@echo "$(YELLOW)🔄 Restarting all services...$(NC)" |
|
||||
@docker-compose -f $(COMPOSE_FILE) restart |
|
||||
@echo "$(GREEN)✅ All services restarted$(NC)" |
|
||||
|
restart: stop start ## Restart the entire cluster
|
||||
|
|
||||
up: start ## Alias for start
|
|
||||
|
clean: ## Stop and remove all containers, networks, and volumes
|
||||
|
@echo "🧹 Cleaning up SeaweedFS test environment..." |
||||
|
@docker-compose -f $(COMPOSE_FILE) down -v --remove-orphans |
||||
|
@docker system prune -f |
||||
|
@rm -rf data/ |
||||
|
@echo "✅ Environment cleaned" |
||||
|
|
||||
status: ## Show status of all services
|
|
||||
@echo "$(BLUE)📊 Service Status:$(NC)" |
|
||||
|
status: ## Check the status of all services
|
||||
|
@echo "📊 SeaweedFS Cluster Status" |
||||
|
@echo "==========================" |
||||
@docker-compose -f $(COMPOSE_FILE) ps |
@docker-compose -f $(COMPOSE_FILE) ps |
||||
|
@echo "" |
||||
|
@echo "📋 Service Health:" |
||||
|
@echo "Master:" |
||||
|
@curl -s http://localhost:9333/cluster/status | jq '.IsLeader' 2>/dev/null || echo " ❌ Master not ready" |
||||
|
@echo "Admin:" |
||||
|
@curl -s http://localhost:23646/ | grep -q "Admin" && echo " ✅ Admin ready" || echo " ❌ Admin not ready" |
||||
|
|
||||
logs: ## Show logs from all services
|
logs: ## Show logs from all services
|
||||
@echo "$(BLUE)📋 Showing logs from all services (Ctrl+C to exit):$(NC)" |
|
||||
|
@echo "📜 Following logs from all services..." |
||||
|
@echo "💡 Press Ctrl+C to stop following logs" |
||||
@docker-compose -f $(COMPOSE_FILE) logs -f |
@docker-compose -f $(COMPOSE_FILE) logs -f |
||||
|
|
||||
logs-admin: ## Show admin server logs
|
|
||||
@echo "$(BLUE)📋 Admin Server Logs:$(NC)" |
|
||||
|
admin-logs: ## Show logs from admin server only
|
||||
|
@echo "📜 Admin server logs:" |
||||
@docker-compose -f $(COMPOSE_FILE) logs -f admin |
@docker-compose -f $(COMPOSE_FILE) logs -f admin |
||||
|
|
||||
logs-workers: ## Show all worker logs
|
|
||||
@echo "$(BLUE)📋 Worker Logs:$(NC)" |
|
||||
|
worker-logs: ## Show logs from all workers
|
||||
|
@echo "📜 Worker logs:" |
||||
@docker-compose -f $(COMPOSE_FILE) logs -f worker1 worker2 worker3 |
@docker-compose -f $(COMPOSE_FILE) logs -f worker1 worker2 worker3 |
||||
|
|
||||
logs-worker1: ## Show worker1 logs
|
|
||||
@docker-compose -f $(COMPOSE_FILE) logs -f worker1 |
|
||||
|
|
||||
logs-worker2: ## Show worker2 logs
|
|
||||
@docker-compose -f $(COMPOSE_FILE) logs -f worker2 |
|
||||
|
|
||||
logs-worker3: ## Show worker3 logs
|
|
||||
@docker-compose -f $(COMPOSE_FILE) logs -f worker3 |
|
||||
|
|
||||
logs-load: ## Show load generator logs
|
|
||||
@echo "$(BLUE)📋 Load Generator Logs:$(NC)" |
|
||||
@docker-compose -f $(COMPOSE_FILE) logs -f load_generator |
|
||||
|
|
||||
logs-monitor: ## Show monitor logs
|
|
||||
@echo "$(BLUE)📋 Monitor Logs:$(NC)" |
|
||||
@docker-compose -f $(COMPOSE_FILE) logs -f monitor |
|
||||
|
|
||||
logs-master: ## Show master logs
|
|
||||
|
master-logs: ## Show logs from master server
|
||||
|
@echo "📜 Master server logs:" |
||||
@docker-compose -f $(COMPOSE_FILE) logs -f master |
@docker-compose -f $(COMPOSE_FILE) logs -f master |
||||
|
|
||||
logs-volumes: ## Show all volume server logs
|
|
||||
@echo "$(BLUE)📋 Volume Server Logs:$(NC)" |
|
||||
@docker-compose -f $(COMPOSE_FILE) logs -f volume1 volume2 volume3 volume4 volume5 volume6 |
|
||||
|
admin-ui: ## Open admin UI in browser (macOS)
|
||||
|
@echo "🌐 Opening admin UI in browser..." |
||||
|
@open http://localhost:23646/ || echo "💡 Manually open: http://localhost:23646/" |
||||
|
|
||||
urls: ## Show monitoring URLs
|
|
||||
@echo "$(BLUE)📊 Monitoring URLs:$(NC)" |
|
||||
@echo " • Master UI: http://localhost:9333" |
|
||||
@echo " • Filer: http://localhost:8888" |
|
||||
@echo " • Admin Server: http://localhost:9900/status" |
|
||||
@echo " • Monitor: http://localhost:9999/status" |
|
||||
|
test: ## Run integration test to verify task assignment and completion
|
||||
|
@echo "🧪 Running Admin-Worker Integration Test" |
||||
|
@echo "========================================" |
||||
@echo "" |
@echo "" |
||||
@echo "$(BLUE)📈 Volume Servers:$(NC)" |
|
||||
@echo " • Volume1: http://localhost:8080/status" |
|
||||
@echo " • Volume2: http://localhost:8081/status" |
|
||||
@echo " • Volume3: http://localhost:8082/status" |
|
||||
@echo " • Volume4: http://localhost:8083/status" |
|
||||
@echo " • Volume5: http://localhost:8084/status" |
|
||||
@echo " • Volume6: http://localhost:8085/status" |
|
||||
|
|
||||
health: ## Check health of all services
|
|
||||
@echo "$(BLUE)🔍 Checking service health...$(NC)" |
|
||||
@echo -n " Master: "; \
|
|
||||
if curl -s http://localhost:9333/cluster/status > /dev/null 2>&1; then \
|
|
||||
echo "$(GREEN)✅ Healthy$(NC)"; \
|
|
||||
else \
|
|
||||
echo "$(RED)❌ Not responding$(NC)"; \
|
|
||||
fi |
|
||||
@echo -n " Filer: "; \
|
|
||||
if curl -s http://localhost:8888/ > /dev/null 2>&1; then \
|
|
||||
echo "$(GREEN)✅ Healthy$(NC)"; \
|
|
||||
else \
|
|
||||
echo "$(RED)❌ Not responding$(NC)"; \
|
|
||||
fi |
|
||||
@echo -n " Admin: "; \
|
|
||||
if curl -s http://localhost:9900/health > /dev/null 2>&1; then \
|
|
||||
echo "$(GREEN)✅ Healthy$(NC)"; \
|
|
||||
else \
|
|
||||
echo "$(RED)❌ Not responding$(NC)"; \
|
|
||||
fi |
|
||||
@echo -n " Monitor: "; \
|
|
||||
if curl -s http://localhost:9999/health > /dev/null 2>&1; then \
|
|
||||
echo "$(GREEN)✅ Healthy$(NC)"; \
|
|
||||
else \
|
|
||||
echo "$(RED)❌ Not responding$(NC)"; \
|
|
||||
fi |
|
||||
|
|
||||
monitor: ## Open monitor dashboard in browser
|
|
||||
@echo "$(BLUE)📊 Opening monitor dashboard...$(NC)" |
|
||||
@echo "Monitor URL: http://localhost:9999/status" |
|
||||
@command -v open >/dev/null 2>&1 && open http://localhost:9999/status || \
|
|
||||
command -v xdg-open >/dev/null 2>&1 && xdg-open http://localhost:9999/status || \
|
|
||||
echo "Please open http://localhost:9999/status in your browser" |
|
||||
|
|
||||
monitor-status: ## Show current monitoring status via API
|
|
||||
@echo "$(BLUE)📊 Current Monitor Status:$(NC)" |
|
||||
@curl -s http://localhost:9999/status | jq . 2>/dev/null || \
|
|
||||
curl -s http://localhost:9999/status 2>/dev/null || \
|
|
||||
echo "Monitor not available" |
|
||||
|
|
||||
volume-status: ## Show volume status from master
|
|
||||
@echo "$(BLUE)💾 Volume Status:$(NC)" |
|
||||
@curl -s http://localhost:9333/vol/status | jq . 2>/dev/null || \
|
|
||||
curl -s http://localhost:9333/vol/status 2>/dev/null || \
|
|
||||
echo "Master not available" |
|
||||
|
|
||||
admin-status: ## Show admin server status
|
|
||||
@echo "$(BLUE)🏭 Admin Server Status:$(NC)" |
|
||||
@curl -s http://localhost:9900/status | jq . 2>/dev/null || \
|
|
||||
curl -s http://localhost:9900/status 2>/dev/null || \
|
|
||||
echo "Admin server not available" |
|
||||
|
|
||||
cluster-status: ## Show complete cluster status
|
|
||||
@echo "$(BLUE)🌐 Cluster Status:$(NC)" |
|
||||
@curl -s http://localhost:9333/cluster/status | jq . 2>/dev/null || \
|
|
||||
curl -s http://localhost:9333/cluster/status 2>/dev/null || \
|
|
||||
echo "Master not available" |
|
||||
|
|
||||
scale-workers: ## Scale workers (usage: make scale-workers WORKERS=5)
|
|
||||
@echo "$(YELLOW)⚖️ Scaling workers to $(or $(WORKERS),3)...$(NC)" |
|
||||
@docker-compose -f $(COMPOSE_FILE) up -d --scale worker2=$(or $(WORKERS),3) |
|
||||
|
|
||||
scale-load: ## Restart load generator with higher rate (usage: make scale-load RATE=20)
|
|
||||
@echo "$(YELLOW)📈 Scaling load generation to $(or $(RATE),20) files/sec...$(NC)" |
|
||||
@docker-compose -f $(COMPOSE_FILE) stop load_generator |
|
||||
@docker-compose -f $(COMPOSE_FILE) run -d --name temp_load_generator \
|
|
||||
-e WRITE_RATE=$(or $(RATE),20) -e DELETE_RATE=$(or $(shell expr $(or $(RATE),20) / 4),5) \
|
|
||||
load_generator |
|
||||
@echo "$(GREEN)✅ Load generator restarted with higher rate$(NC)" |
|
||||
|
|
||||
test-ec: ## Run a focused EC test scenario
|
|
||||
@echo "$(YELLOW)🧪 Running focused EC test...$(NC)" |
|
||||
@$(MAKE) scale-load RATE=25 |
|
||||
@echo "$(BLUE)Monitoring EC detection...$(NC)" |
|
||||
@echo "Watch for volumes >40MB that trigger EC conversion" |
|
||||
@echo "Monitor at: http://localhost:9999/status" |
|
||||
|
|
||||
shell-admin: ## Open shell in admin container
|
|
||||
@docker-compose -f $(COMPOSE_FILE) exec admin /bin/sh |
|
||||
|
|
||||
shell-worker1: ## Open shell in worker1 container
|
|
||||
@docker-compose -f $(COMPOSE_FILE) exec worker1 /bin/sh |
|
||||
|
|
||||
shell-master: ## Open shell in master container
|
|
||||
@docker-compose -f $(COMPOSE_FILE) exec master /bin/sh |
|
||||
|
|
||||
docs: ## Show documentation
|
|
||||
@echo "$(BLUE)📖 EC Testing Documentation:$(NC)" |
|
||||
@echo "" |
|
||||
@cat EC-TESTING-README.md |
|
||||
|
|
||||
build: ## Build all Docker images without starting
|
|
||||
@echo "$(YELLOW)🔨 Building all Docker images...$(NC)" |
|
||||
@docker-compose -f $(COMPOSE_FILE) build |
|
||||
@echo "$(GREEN)✅ All images built$(NC)" |
|
||||
|
|
||||
pull: ## Pull latest SeaweedFS image
|
|
||||
@echo "$(YELLOW)📥 Pulling latest SeaweedFS image...$(NC)" |
|
||||
@docker pull chrislusf/seaweedfs:latest |
|
||||
@echo "$(GREEN)✅ Latest image pulled$(NC)" |
|
||||
|
|
||||
debug: ## Show debug information
|
|
||||
@echo "$(BLUE)🔍 Debug Information:$(NC)" |
|
||||
@echo "" |
|
||||
@echo "$(YELLOW)Docker Compose Version:$(NC)" |
|
||||
@docker-compose --version |
|
||||
|
@echo "1️⃣ Checking cluster health..." |
||||
|
@sleep 5 |
||||
|
@curl -s http://localhost:9333/cluster/status | jq '.IsLeader' > /dev/null && echo "✅ Master healthy" || echo "❌ Master not ready" |
||||
|
@curl -s http://localhost:23646/ | grep -q "Admin" && echo "✅ Admin healthy" || echo "❌ Admin not ready" |
||||
@echo "" |
@echo "" |
||||
@echo "$(YELLOW)Docker Version:$(NC)" |
|
||||
@docker --version |
|
||||
@echo "" |
|
||||
@echo "$(YELLOW)Current Directory:$(NC)" |
|
||||
@pwd |
|
||||
@echo "" |
|
||||
@echo "$(YELLOW)Available Files:$(NC)" |
|
||||
@ls -la *.yml *.sh *.md 2>/dev/null || echo "No config files found" |
|
||||
@echo "" |
|
||||
@echo "$(YELLOW)Running Containers:$(NC)" |
|
||||
@docker ps --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}" |
|
||||
|
|
||||
# Targets for development and testing
|
|
||||
dev-start: ## Start with development settings (faster iteration)
|
|
||||
@echo "$(YELLOW)🛠️ Starting development environment...$(NC)" |
|
||||
@mkdir -p monitor-data admin-config |
|
||||
@WRITE_RATE=50 DELETE_RATE=10 docker-compose -f $(COMPOSE_FILE) up --build -d |
|
||||
@echo "$(GREEN)✅ Development environment started with high load$(NC)" |
|
||||
|
|
||||
dev-stop: stop ## Stop development environment
|
|
||||
|
|
||||
# Clean specific components
|
|
||||
clean-volumes: ## Remove only data volumes
|
|
||||
@echo "$(YELLOW)🗄️ Removing data volumes...$(NC)" |
|
||||
@docker-compose -f $(COMPOSE_FILE) down -v |
|
||||
@echo "$(GREEN)✅ Data volumes removed$(NC)" |
|
||||
|
|
||||
clean-images: ## Remove built images
|
|
||||
@echo "$(YELLOW)🖼️ Removing built images...$(NC)" |
|
||||
@docker-compose -f $(COMPOSE_FILE) down --rmi local |
|
||||
@echo "$(GREEN)✅ Built images removed$(NC)" |
|
||||
|
|
||||
# Backup and restore
|
|
||||
backup-logs: ## Backup all service logs
|
|
||||
@echo "$(YELLOW)💾 Backing up service logs...$(NC)" |
|
||||
@mkdir -p logs-backup |
|
||||
@docker-compose -f $(COMPOSE_FILE) logs admin > logs-backup/admin.log 2>&1 |
|
||||
@docker-compose -f $(COMPOSE_FILE) logs worker1 > logs-backup/worker1.log 2>&1 |
|
||||
@docker-compose -f $(COMPOSE_FILE) logs worker2 > logs-backup/worker2.log 2>&1 |
|
||||
@docker-compose -f $(COMPOSE_FILE) logs worker3 > logs-backup/worker3.log 2>&1 |
|
||||
@docker-compose -f $(COMPOSE_FILE) logs monitor > logs-backup/monitor.log 2>&1 |
|
||||
@docker-compose -f $(COMPOSE_FILE) logs load_generator > logs-backup/load_generator.log 2>&1 |
|
||||
@echo "$(GREEN)✅ Logs backed up to logs-backup/$(NC)" |
|
||||
|
|
||||
# Quick troubleshooting
|
|
||||
troubleshoot: ## Run troubleshooting checks
|
|
||||
@echo "$(BLUE)🔧 Running troubleshooting checks...$(NC)" |
|
||||
@echo "" |
|
||||
@echo "$(YELLOW)1. Checking required ports:$(NC)" |
|
||||
@for port in 9333 8888 9900 9999 8080 8081 8082 8083 8084 8085; do \
|
|
||||
echo -n " Port $$port: "; \
|
|
||||
if lsof -i :$$port >/dev/null 2>&1; then \
|
|
||||
echo "$(RED)❌ In use$(NC)"; \
|
|
||||
else \
|
|
||||
echo "$(GREEN)✅ Available$(NC)"; \
|
|
||||
fi; \
|
|
||||
|
@echo "2️⃣ Checking worker registration..." |
||||
|
@sleep 10 |
||||
|
@echo "💡 Check admin UI for connected workers: http://localhost:23646/" |
||||
|
@echo "" |
||||
|
@echo "3️⃣ Generating load to trigger EC tasks..." |
||||
|
@echo "📝 Creating test files to fill volumes..." |
||||
|
@echo "Creating large files with random data to trigger EC (targeting ~60MB total to exceed 50MB limit)..." |
||||
|
@for i in {1..12}; do \
|
||||
|
echo "Creating 5MB random file $$i..."; \
|
||||
|
docker run --rm --network admin_integration_seaweed_net -v /tmp:/tmp --entrypoint sh chrislusf/seaweedfs:local -c "dd if=/dev/urandom of=/tmp/largefile$$i.dat bs=1M count=5 2>/dev/null && weed upload -master=master:9333 /tmp/largefile$$i.dat && rm /tmp/largefile$$i.dat"; \
|
||||
|
sleep 3; \
|
||||
done |
done |
||||
@echo "" |
@echo "" |
||||
@echo "$(YELLOW)2. Docker resources:$(NC)" |
|
||||
@docker system df |
|
||||
@echo "" |
|
||||
@echo "$(YELLOW)3. Service health:$(NC)" |
|
||||
@$(MAKE) health |
|
||||
|
@echo "4️⃣ Waiting for volumes to process large files and reach 50MB limit..." |
||||
|
@echo "This may take a few minutes as we're uploading 60MB of data..." |
||||
|
@sleep 60 |
||||
|
@echo "" |
||||
|
@echo "5️⃣ Checking for EC task creation and assignment..." |
||||
|
@echo "💡 Monitor the admin UI to see:" |
||||
|
@echo " • Tasks being created for volumes needing EC" |
||||
|
@echo " • Workers picking up tasks" |
||||
|
@echo " • Task progress (pending → running → completed)" |
||||
|
@echo " • EC shards being distributed" |
||||
|
@echo "" |
||||
|
@echo "✅ Integration test setup complete!" |
||||
|
@echo "📊 Monitor progress at: http://localhost:23646/" |
||||
|
|
||||
|
quick-test: ## Quick verification that core services are running
|
||||
|
@echo "⚡ Quick Health Check" |
||||
|
@echo "====================" |
||||
|
@echo "Master: $$(curl -s http://localhost:9333/cluster/status | jq -r '.IsLeader // "not ready"')" |
||||
|
@echo "Admin: $$(curl -s http://localhost:23646/ | grep -q "Admin" && echo "ready" || echo "not ready")" |
||||
|
@echo "Workers: $$(docker-compose -f $(COMPOSE_FILE) ps worker1 worker2 worker3 | grep -c Up) running" |
||||
|
|
||||
|
validate: ## Validate integration test configuration
|
||||
|
@echo "🔍 Validating Integration Test Configuration" |
||||
|
@echo "===========================================" |
||||
|
@chmod +x test-integration.sh |
||||
|
@./test-integration.sh |
||||
|
|
||||
|
demo: start ## Start cluster and run demonstration
|
||||
|
@echo "🎭 SeaweedFS Admin-Worker Demo" |
||||
|
@echo "=============================" |
||||
|
@echo "" |
||||
|
@echo "⏳ Waiting for services to start..." |
||||
|
@sleep 45 |
||||
|
@echo "" |
||||
|
@echo "🎯 Demo Overview:" |
||||
|
@echo " • 1 Master server (coordinates cluster)" |
||||
|
@echo " • 6 Volume servers (50MB volume limit)" |
||||
|
@echo " • 1 Admin server (task management)" |
||||
|
@echo " • 3 Workers (execute EC tasks)" |
||||
|
@echo " • Load generator (creates files continuously)" |
||||
|
@echo "" |
||||
|
@echo "📊 Watch the process:" |
||||
|
@echo " 1. Visit: http://localhost:23646/" |
||||
|
@echo " 2. Observe workers connecting" |
||||
|
@echo " 3. Watch tasks being created and assigned" |
||||
|
@echo " 4. See tasks progress from pending → completed" |
||||
|
@echo "" |
||||
|
@echo "🔄 The demo will:" |
||||
|
@echo " • Fill volumes to 50MB limit" |
||||
|
@echo " • Admin detects volumes needing EC" |
||||
|
@echo " • Workers receive and execute EC tasks" |
||||
|
@echo " • Tasks complete with shard distribution" |
||||
|
@echo "" |
||||
|
@echo "💡 Use 'make worker-logs' to see worker activity" |
||||
|
@echo "💡 Use 'make admin-logs' to see admin task management" |
||||
@ -1,521 +0,0 @@ |
|||||
#!/bin/sh |
|
||||
|
|
||||
set -e |
|
||||
|
|
||||
echo "Starting SeaweedFS Admin Server..." |
|
||||
echo "Master Address: $MASTER_ADDRESS" |
|
||||
echo "Admin Port: $ADMIN_PORT" |
|
||||
echo "Scan Interval: $SCAN_INTERVAL" |
|
||||
|
|
||||
# Wait for master to be ready |
|
||||
echo "Waiting for master to be ready..." |
|
||||
until curl -f http://$MASTER_ADDRESS/cluster/status > /dev/null 2>&1; do |
|
||||
echo "Master not ready, waiting..." |
|
||||
sleep 5 |
|
||||
done |
|
||||
echo "Master is ready!" |
|
||||
|
|
||||
# For now, use a simple HTTP server to simulate admin functionality |
|
||||
# In a real implementation, this would start the actual admin server |
|
||||
cat > /tmp/admin_server.go << 'EOF' |
|
||||
package main |
|
||||
|
|
||||
import ( |
|
||||
"encoding/json" |
|
||||
"fmt" |
|
||||
"log" |
|
||||
"net/http" |
|
||||
"os" |
|
||||
"time" |
|
||||
) |
|
||||
|
|
||||
type AdminServer struct { |
|
||||
masterAddr string |
|
||||
port string |
|
||||
startTime time.Time |
|
||||
tasks []Task |
|
||||
workers []Worker |
|
||||
} |
|
||||
|
|
||||
type Task struct { |
|
||||
ID string `json:"id"` |
|
||||
Type string `json:"type"` |
|
||||
VolumeID int `json:"volume_id"` |
|
||||
Status string `json:"status"` |
|
||||
Progress float64 `json:"progress"` |
|
||||
Created time.Time `json:"created"` |
|
||||
} |
|
||||
|
|
||||
type Worker struct { |
|
||||
ID string `json:"id"` |
|
||||
Address string `json:"address"` |
|
||||
Capabilities []string `json:"capabilities"` |
|
||||
Status string `json:"status"` |
|
||||
LastSeen time.Time `json:"last_seen"` |
|
||||
} |
|
||||
|
|
||||
func (s *AdminServer) healthHandler(w http.ResponseWriter, r *http.Request) { |
|
||||
w.Header().Set("Content-Type", "application/json") |
|
||||
json.NewEncoder(w).Encode(map[string]interface{}{ |
|
||||
"status": "healthy", |
|
||||
"uptime": time.Since(s.startTime).String(), |
|
||||
"tasks": len(s.tasks), |
|
||||
"workers": len(s.workers), |
|
||||
}) |
|
||||
} |
|
||||
|
|
||||
func (s *AdminServer) statusHandler(w http.ResponseWriter, r *http.Request) { |
|
||||
w.Header().Set("Content-Type", "application/json") |
|
||||
json.NewEncoder(w).Encode(map[string]interface{}{ |
|
||||
"admin_server": "running", |
|
||||
"master_addr": s.masterAddr, |
|
||||
"tasks": s.tasks, |
|
||||
"workers": s.workers, |
|
||||
"uptime": time.Since(s.startTime).String(), |
|
||||
}) |
|
||||
} |
|
||||
|
|
||||
func (s *AdminServer) registerWorkerHandler(w http.ResponseWriter, r *http.Request) { |
|
||||
if r.Method != "POST" { |
|
||||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) |
|
||||
return |
|
||||
} |
|
||||
|
|
||||
var worker Worker |
|
||||
if err := json.NewDecoder(r.Body).Decode(&worker); err != nil { |
|
||||
http.Error(w, "Invalid JSON", http.StatusBadRequest) |
|
||||
return |
|
||||
} |
|
||||
|
|
||||
worker.LastSeen = time.Now() |
|
||||
worker.Status = "active" |
|
||||
|
|
||||
// Check if worker already exists, update if so |
|
||||
found := false |
|
||||
for i, w := range s.workers { |
|
||||
if w.ID == worker.ID { |
|
||||
s.workers[i] = worker |
|
||||
found = true |
|
||||
break |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
if !found { |
|
||||
s.workers = append(s.workers, worker) |
|
||||
log.Printf("Registered new worker: %s with capabilities: %v", worker.ID, worker.Capabilities) |
|
||||
} else { |
|
||||
log.Printf("Updated worker: %s", worker.ID) |
|
||||
} |
|
||||
|
|
||||
w.Header().Set("Content-Type", "application/json") |
|
||||
json.NewEncoder(w).Encode(map[string]string{"status": "registered"}) |
|
||||
} |
|
||||
|
|
||||
func (s *AdminServer) heartbeatHandler(w http.ResponseWriter, r *http.Request) { |
|
||||
if r.Method != "POST" { |
|
||||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) |
|
||||
return |
|
||||
} |
|
||||
|
|
||||
var heartbeat struct { |
|
||||
WorkerID string `json:"worker_id"` |
|
||||
Status string `json:"status"` |
|
||||
} |
|
||||
|
|
||||
if err := json.NewDecoder(r.Body).Decode(&heartbeat); err != nil { |
|
||||
http.Error(w, "Invalid JSON", http.StatusBadRequest) |
|
||||
return |
|
||||
} |
|
||||
|
|
||||
// Update worker last seen time |
|
||||
for i, w := range s.workers { |
|
||||
if w.ID == heartbeat.WorkerID { |
|
||||
s.workers[i].LastSeen = time.Now() |
|
||||
s.workers[i].Status = heartbeat.Status |
|
||||
break |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
w.Header().Set("Content-Type", "application/json") |
|
||||
json.NewEncoder(w).Encode(map[string]string{"status": "ok"}) |
|
||||
} |
|
||||
|
|
||||
func (s *AdminServer) assignTaskHandler(w http.ResponseWriter, r *http.Request) { |
|
||||
if r.Method != "POST" { |
|
||||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) |
|
||||
return |
|
||||
} |
|
||||
|
|
||||
var request struct { |
|
||||
WorkerID string `json:"worker_id"` |
|
||||
Capabilities []string `json:"capabilities"` |
|
||||
} |
|
||||
|
|
||||
if err := json.NewDecoder(r.Body).Decode(&request); err != nil { |
|
||||
http.Error(w, "Invalid JSON", http.StatusBadRequest) |
|
||||
return |
|
||||
} |
|
||||
|
|
||||
// Find a pending task that matches worker capabilities |
|
||||
for i, task := range s.tasks { |
|
||||
if task.Status == "pending" { |
|
||||
// Assign task to worker |
|
||||
s.tasks[i].Status = "assigned" |
|
||||
|
|
||||
w.Header().Set("Content-Type", "application/json") |
|
||||
json.NewEncoder(w).Encode(map[string]interface{}{ |
|
||||
"task_id": task.ID, |
|
||||
"type": task.Type, |
|
||||
"volume_id": task.VolumeID, |
|
||||
"parameters": map[string]interface{}{ |
|
||||
"server": "volume1:8080", // Simplified assignment |
|
||||
}, |
|
||||
}) |
|
||||
|
|
||||
log.Printf("Assigned task %s to worker %s", task.ID, request.WorkerID) |
|
||||
return |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
// No tasks available |
|
||||
w.Header().Set("Content-Type", "application/json") |
|
||||
json.NewEncoder(w).Encode(map[string]string{"status": "no_tasks"}) |
|
||||
} |
|
||||
|
|
||||
func (s *AdminServer) taskProgressHandler(w http.ResponseWriter, r *http.Request) { |
|
||||
if r.Method != "POST" { |
|
||||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) |
|
||||
return |
|
||||
} |
|
||||
|
|
||||
var progress struct { |
|
||||
TaskID string `json:"task_id"` |
|
||||
Progress float64 `json:"progress"` |
|
||||
Status string `json:"status"` |
|
||||
Message string `json:"message"` |
|
||||
} |
|
||||
|
|
||||
if err := json.NewDecoder(r.Body).Decode(&progress); err != nil { |
|
||||
http.Error(w, "Invalid JSON", http.StatusBadRequest) |
|
||||
return |
|
||||
} |
|
||||
|
|
||||
// Update task progress |
|
||||
for i, task := range s.tasks { |
|
||||
if task.ID == progress.TaskID { |
|
||||
s.tasks[i].Progress = progress.Progress |
|
||||
s.tasks[i].Status = progress.Status |
|
||||
|
|
||||
log.Printf("Task %s: %.1f%% - %s", progress.TaskID, progress.Progress, progress.Message) |
|
||||
break |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
w.Header().Set("Content-Type", "application/json") |
|
||||
json.NewEncoder(w).Encode(map[string]string{"status": "updated"}) |
|
||||
} |
|
||||
|
|
||||
func (s *AdminServer) webUIHandler(w http.ResponseWriter, r *http.Request) { |
|
||||
w.Header().Set("Content-Type", "text/html") |
|
||||
|
|
||||
html := `<!DOCTYPE html> |
|
||||
<html> |
|
||||
<head> |
|
||||
<title>SeaweedFS Admin - EC Task Monitor</title> |
|
||||
<meta charset="UTF-8"> |
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|
||||
<style> |
|
||||
body { |
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; |
|
||||
margin: 0; padding: 20px; background: #f5f5f5; |
|
||||
} |
|
||||
.container { max-width: 1200px; margin: 0 auto; } |
|
||||
.header { |
|
||||
background: #2c3e50; color: white; padding: 20px; border-radius: 8px; |
|
||||
margin-bottom: 20px; text-align: center; |
|
||||
} |
|
||||
.stats { |
|
||||
display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); |
|
||||
gap: 20px; margin-bottom: 20px; |
|
||||
} |
|
||||
.stat-card { |
|
||||
background: white; padding: 20px; border-radius: 8px; |
|
||||
box-shadow: 0 2px 4px rgba(0,0,0,0.1); text-align: center; |
|
||||
} |
|
||||
.stat-number { font-size: 2em; font-weight: bold; color: #3498db; } |
|
||||
.stat-label { color: #7f8c8d; margin-top: 5px; } |
|
||||
.section { |
|
||||
background: white; border-radius: 8px; margin-bottom: 20px; |
|
||||
box-shadow: 0 2px 4px rgba(0,0,0,0.1); overflow: hidden; |
|
||||
} |
|
||||
.section-header { |
|
||||
background: #34495e; color: white; padding: 15px 20px; |
|
||||
font-weight: bold; font-size: 1.1em; |
|
||||
} |
|
||||
.section-content { padding: 20px; } |
|
||||
table { width: 100%; border-collapse: collapse; } |
|
||||
th, td { padding: 12px; text-align: left; border-bottom: 1px solid #ecf0f1; } |
|
||||
th { background: #f8f9fa; font-weight: 600; } |
|
||||
.status-pending { color: #f39c12; font-weight: bold; } |
|
||||
.status-assigned { color: #3498db; font-weight: bold; } |
|
||||
.status-running { color: #e67e22; font-weight: bold; } |
|
||||
.status-completed { color: #27ae60; font-weight: bold; } |
|
||||
.status-failed { color: #e74c3c; font-weight: bold; } |
|
||||
.progress-bar { |
|
||||
background: #ecf0f1; height: 20px; border-radius: 10px; |
|
||||
overflow: hidden; position: relative; |
|
||||
} |
|
||||
.progress-fill { |
|
||||
height: 100%; background: linear-gradient(90deg, #3498db, #2ecc71); |
|
||||
transition: width 0.3s ease; |
|
||||
} |
|
||||
.progress-text { |
|
||||
position: absolute; top: 50%; left: 50%; transform: translate(-50%, -50%); |
|
||||
font-size: 0.8em; font-weight: bold; color: white; text-shadow: 1px 1px 2px rgba(0,0,0,0.5); |
|
||||
} |
|
||||
.worker-online { color: #27ae60; } |
|
||||
.worker-offline { color: #e74c3c; } |
|
||||
.refresh-btn { |
|
||||
background: #3498db; color: white; padding: 10px 20px; |
|
||||
border: none; border-radius: 5px; cursor: pointer; margin-bottom: 20px; |
|
||||
} |
|
||||
.refresh-btn:hover { background: #2980b9; } |
|
||||
.last-updated { color: #7f8c8d; font-size: 0.9em; text-align: center; margin-top: 20px; } |
|
||||
</style> |
|
||||
</head> |
|
||||
<body> |
|
||||
<div class="container"> |
|
||||
<div class="header"> |
|
||||
<h1>🧪 SeaweedFS EC Task Monitor</h1> |
|
||||
<p>Real-time Erasure Coding Task Management Dashboard</p> |
|
||||
</div> |
|
||||
|
|
||||
<button class="refresh-btn" onclick="location.reload()">🔄 Refresh</button> |
|
||||
|
|
||||
<div class="stats" id="stats"> |
|
||||
<div class="stat-card"> |
|
||||
<div class="stat-number" id="total-tasks">0</div> |
|
||||
<div class="stat-label">Total Tasks</div> |
|
||||
</div> |
|
||||
<div class="stat-card"> |
|
||||
<div class="stat-number" id="active-workers">0</div> |
|
||||
<div class="stat-label">Active Workers</div> |
|
||||
</div> |
|
||||
<div class="stat-card"> |
|
||||
<div class="stat-number" id="completed-tasks">0</div> |
|
||||
<div class="stat-label">Completed</div> |
|
||||
</div> |
|
||||
<div class="stat-card"> |
|
||||
<div class="stat-number" id="uptime">--</div> |
|
||||
<div class="stat-label">Uptime</div> |
|
||||
</div> |
|
||||
</div> |
|
||||
|
|
||||
<div class="section"> |
|
||||
<div class="section-header">📋 EC Tasks</div> |
|
||||
<div class="section-content"> |
|
||||
<table> |
|
||||
<thead> |
|
||||
<tr> |
|
||||
<th>Task ID</th> |
|
||||
<th>Type</th> |
|
||||
<th>Volume ID</th> |
|
||||
<th>Status</th> |
|
||||
<th>Progress</th> |
|
||||
<th>Created</th> |
|
||||
</tr> |
|
||||
</thead> |
|
||||
<tbody id="tasks-table"> |
|
||||
<tr><td colspan="6" style="text-align: center; color: #7f8c8d;">Loading tasks...</td></tr> |
|
||||
</tbody> |
|
||||
</table> |
|
||||
</div> |
|
||||
</div> |
|
||||
|
|
||||
<div class="section"> |
|
||||
<div class="section-header">⚙️ Workers</div> |
|
||||
<div class="section-content"> |
|
||||
<table> |
|
||||
<thead> |
|
||||
<tr> |
|
||||
<th>Worker ID</th> |
|
||||
<th>Address</th> |
|
||||
<th>Status</th> |
|
||||
<th>Capabilities</th> |
|
||||
<th>Last Seen</th> |
|
||||
</tr> |
|
||||
</thead> |
|
||||
<tbody id="workers-table"> |
|
||||
<tr><td colspan="5" style="text-align: center; color: #7f8c8d;">Loading workers...</td></tr> |
|
||||
</tbody> |
|
||||
</table> |
|
||||
</div> |
|
||||
</div> |
|
||||
|
|
||||
<div class="last-updated"> |
|
||||
Last updated: <span id="last-updated">--</span> | |
|
||||
Auto-refresh every 5 seconds |
|
||||
</div> |
|
||||
</div> |
|
||||
|
|
||||
<script> |
|
||||
function formatTime(timestamp) { |
|
||||
return new Date(timestamp).toLocaleString(); |
|
||||
} |
|
||||
|
|
||||
function formatUptime(uptime) { |
|
||||
if (!uptime) return '--'; |
|
||||
const matches = uptime.match(/(\d+h)?(\d+m)?(\d+\.?\d*s)?/); |
|
||||
if (!matches) return uptime; |
|
||||
|
|
||||
let parts = []; |
|
||||
if (matches[1]) parts.push(matches[1]); |
|
||||
if (matches[2]) parts.push(matches[2]); |
|
||||
if (matches[3] && !matches[1]) parts.push(matches[3]); |
|
||||
|
|
||||
return parts.join(' ') || uptime; |
|
||||
} |
|
||||
|
|
||||
function updateData() { |
|
||||
fetch('/status') |
|
||||
.then(response => response.json()) |
|
||||
.then(data => { |
|
||||
// Update stats |
|
||||
document.getElementById('total-tasks').textContent = data.tasks.length; |
|
||||
document.getElementById('active-workers').textContent = data.workers.length; |
|
||||
document.getElementById('completed-tasks').textContent = |
|
||||
data.tasks.filter(function(t) { return t.status === 'completed'; }).length; |
|
||||
document.getElementById('uptime').textContent = formatUptime(data.uptime); |
|
||||
|
|
||||
// Update tasks table |
|
||||
const tasksTable = document.getElementById('tasks-table'); |
|
||||
if (data.tasks.length === 0) { |
|
||||
tasksTable.innerHTML = '<tr><td colspan="6" style="text-align: center; color: #7f8c8d;">No tasks available</td></tr>'; |
|
||||
} else { |
|
||||
tasksTable.innerHTML = data.tasks.map(task => '<tr>' + |
|
||||
'<td><code>' + task.id + '</code></td>' + |
|
||||
'<td>' + task.type + '</td>' + |
|
||||
'<td>' + task.volume_id + '</td>' + |
|
||||
'<td><span class="status-' + task.status + '">' + task.status.toUpperCase() + '</span></td>' + |
|
||||
'<td><div class="progress-bar">' + |
|
||||
'<div class="progress-fill" style="width: ' + task.progress + '%"></div>' + |
|
||||
'<div class="progress-text">' + task.progress.toFixed(1) + '%</div>' + |
|
||||
'</div></td>' + |
|
||||
'<td>' + formatTime(task.created) + '</td>' + |
|
||||
'</tr>').join(''); |
|
||||
} |
|
||||
|
|
||||
// Update workers table |
|
||||
const workersTable = document.getElementById('workers-table'); |
|
||||
if (data.workers.length === 0) { |
|
||||
workersTable.innerHTML = '<tr><td colspan="5" style="text-align: center; color: #7f8c8d;">No workers registered</td></tr>'; |
|
||||
} else { |
|
||||
workersTable.innerHTML = data.workers.map(worker => '<tr>' + |
|
||||
'<td><strong>' + worker.id + '</strong></td>' + |
|
||||
'<td>' + (worker.address || 'N/A') + '</td>' + |
|
||||
'<td><span class="worker-' + (worker.status === 'active' ? 'online' : 'offline') + '">' + worker.status.toUpperCase() + '</span></td>' + |
|
||||
'<td>' + worker.capabilities.join(', ') + '</td>' + |
|
||||
'<td>' + formatTime(worker.last_seen) + '</td>' + |
|
||||
'</tr>').join(''); |
|
||||
} |
|
||||
|
|
||||
document.getElementById('last-updated').textContent = new Date().toLocaleTimeString(); |
|
||||
}) |
|
||||
.catch(error => { |
|
||||
console.error('Failed to fetch data:', error); |
|
||||
}); |
|
||||
} |
|
||||
|
|
||||
// Initial load |
|
||||
updateData(); |
|
||||
|
|
||||
// Auto-refresh every 5 seconds |
|
||||
setInterval(updateData, 5000); |
|
||||
</script> |
|
||||
</body> |
|
||||
</html>` |
|
||||
|
|
||||
fmt.Fprint(w, html) |
|
||||
} |
|
||||
|
|
||||
func (s *AdminServer) detectVolumesForEC() { |
|
||||
// Simulate volume detection logic |
|
||||
// In real implementation, this would query the master for volume status |
|
||||
ticker := time.NewTicker(30 * time.Second) |
|
||||
go func() { |
|
||||
for range ticker.C { |
|
||||
log.Println("Scanning for volumes requiring EC...") |
|
||||
|
|
||||
// Check master for volume status |
|
||||
resp, err := http.Get(fmt.Sprintf("http://%s/vol/status", s.masterAddr)) |
|
||||
if err != nil { |
|
||||
log.Printf("Error checking master: %v", err) |
|
||||
continue |
|
||||
} |
|
||||
resp.Body.Close() |
|
||||
|
|
||||
// Simulate detecting a volume that needs EC |
|
||||
if len(s.tasks) < 5 { // Don't create too many tasks |
|
||||
taskID := fmt.Sprintf("ec-task-%d", len(s.tasks)+1) |
|
||||
volumeID := 1000 + len(s.tasks) |
|
||||
|
|
||||
task := Task{ |
|
||||
ID: taskID, |
|
||||
Type: "erasure_coding", |
|
||||
VolumeID: volumeID, |
|
||||
Status: "pending", |
|
||||
Progress: 0.0, |
|
||||
Created: time.Now(), |
|
||||
} |
|
||||
|
|
||||
s.tasks = append(s.tasks, task) |
|
||||
log.Printf("Created EC task %s for volume %d", taskID, volumeID) |
|
||||
} |
|
||||
} |
|
||||
}() |
|
||||
} |
|
||||
|
|
||||
func main() { |
|
||||
masterAddr := os.Getenv("MASTER_ADDRESS") |
|
||||
if masterAddr == "" { |
|
||||
masterAddr = "master:9333" |
|
||||
} |
|
||||
|
|
||||
port := os.Getenv("ADMIN_PORT") |
|
||||
if port == "" { |
|
||||
port = "9900" |
|
||||
} |
|
||||
|
|
||||
server := &AdminServer{ |
|
||||
masterAddr: masterAddr, |
|
||||
port: port, |
|
||||
startTime: time.Now(), |
|
||||
tasks: make([]Task, 0), |
|
||||
workers: make([]Worker, 0), |
|
||||
} |
|
||||
|
|
||||
http.HandleFunc("/health", server.healthHandler) |
|
||||
http.HandleFunc("/status", server.statusHandler) |
|
||||
http.HandleFunc("/register", server.registerWorkerHandler) |
|
||||
http.HandleFunc("/register-worker", server.registerWorkerHandler) // Worker compatibility |
|
||||
http.HandleFunc("/heartbeat", server.heartbeatHandler) |
|
||||
http.HandleFunc("/assign-task", server.assignTaskHandler) |
|
||||
http.HandleFunc("/task-progress", server.taskProgressHandler) |
|
||||
http.HandleFunc("/", server.webUIHandler) // Web UI |
|
||||
|
|
||||
// Start volume detection |
|
||||
server.detectVolumesForEC() |
|
||||
|
|
||||
log.Printf("Admin server starting on port %s", port) |
|
||||
log.Printf("Master address: %s", masterAddr) |
|
||||
|
|
||||
if err := http.ListenAndServe(":"+port, nil); err != nil { |
|
||||
log.Fatal("Server failed to start:", err) |
|
||||
} |
|
||||
} |
|
||||
EOF |
|
||||
|
|
||||
# Compile and run the admin server |
|
||||
cd /tmp |
|
||||
go mod init admin-server |
|
||||
go run admin_server.go |
|
||||
@ -1,73 +0,0 @@ |
|||||
#!/bin/sh |
|
||||
|
|
||||
set -e |
|
||||
|
|
||||
echo "Starting SeaweedFS Admin Server (gRPC)..." |
|
||||
echo "Master Address: $MASTER_ADDRESS" |
|
||||
echo "Admin HTTP Port: $ADMIN_PORT" |
|
||||
echo "Admin gRPC Port: $GRPC_PORT" |
|
||||
|
|
||||
# Wait for master to be ready |
|
||||
echo "Waiting for master to be ready..." |
|
||||
until wget --quiet --tries=1 --spider http://$MASTER_ADDRESS/cluster/status > /dev/null 2>&1; do |
|
||||
echo "Master not ready, waiting..." |
|
||||
sleep 5 |
|
||||
done |
|
||||
echo "Master is ready!" |
|
||||
|
|
||||
# Install protobuf compiler and Go protobuf plugins |
|
||||
apk add --no-cache protobuf protobuf-dev |
|
||||
|
|
||||
# Set up Go environment |
|
||||
export GOPATH=/tmp/go |
|
||||
export PATH=$PATH:$GOPATH/bin |
|
||||
mkdir -p $GOPATH/src $GOPATH/bin $GOPATH/pkg |
|
||||
|
|
||||
# Install Go protobuf plugins globally first |
|
||||
export GOPATH=/tmp/go |
|
||||
mkdir -p $GOPATH |
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest |
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest |
|
||||
|
|
||||
# Set up working directory for compilation |
|
||||
cd /tmp |
|
||||
mkdir -p admin-project |
|
||||
cd admin-project |
|
||||
|
|
||||
# Create a basic go.mod with required dependencies |
|
||||
cat > go.mod << 'EOF' |
|
||||
module admin-server |
|
||||
|
|
||||
go 1.24 |
|
||||
|
|
||||
require ( |
|
||||
google.golang.org/grpc v1.65.0 |
|
||||
google.golang.org/protobuf v1.34.2 |
|
||||
) |
|
||||
EOF |
|
||||
|
|
||||
go mod tidy |
|
||||
|
|
||||
# Add Go bin to PATH |
|
||||
export PATH=$PATH:$(go env GOPATH)/bin |
|
||||
|
|
||||
# Create directory structure for protobuf |
|
||||
mkdir -p worker_pb |
|
||||
|
|
||||
# Copy the admin server source and existing worker protobuf file |
|
||||
cp /admin_grpc_server.go . |
|
||||
cp /worker.proto . |
|
||||
|
|
||||
# Generate Go code from the existing worker protobuf |
|
||||
echo "Generating gRPC code from worker.proto..." |
|
||||
protoc --go_out=. --go_opt=paths=source_relative \ |
|
||||
--go-grpc_out=. --go-grpc_opt=paths=source_relative \ |
|
||||
worker.proto |
|
||||
|
|
||||
# Build and run the admin server |
|
||||
echo "Building admin server..." |
|
||||
go mod tidy |
|
||||
go build -o admin-server admin_grpc_server.go |
|
||||
|
|
||||
echo "Starting admin server..." |
|
||||
exec ./admin-server |
|
||||
@ -1,663 +0,0 @@ |
|||||
package main |
|
||||
|
|
||||
import ( |
|
||||
"encoding/json" |
|
||||
"fmt" |
|
||||
"io" |
|
||||
"log" |
|
||||
"net" |
|
||||
"net/http" |
|
||||
"os" |
|
||||
"sync" |
|
||||
"time" |
|
||||
|
|
||||
pb "github.com/seaweedfs/seaweedfs/weed/pb/worker_pb" |
|
||||
"google.golang.org/grpc" |
|
||||
"google.golang.org/grpc/reflection" |
|
||||
) |
|
||||
|
|
||||
type AdminServer struct { |
|
||||
pb.UnimplementedWorkerServiceServer |
|
||||
|
|
||||
// Server configuration
|
|
||||
grpcPort string |
|
||||
httpPort string |
|
||||
masterAddr string |
|
||||
startTime time.Time |
|
||||
|
|
||||
// Data storage
|
|
||||
workers map[string]*WorkerInfo |
|
||||
tasks map[string]*TaskInfo |
|
||||
taskQueue []string |
|
||||
|
|
||||
// Synchronization
|
|
||||
mu sync.RWMutex |
|
||||
|
|
||||
// Active streams for workers
|
|
||||
workerStreams map[string]pb.WorkerService_WorkerStreamServer |
|
||||
streamMu sync.RWMutex |
|
||||
} |
|
||||
|
|
||||
type WorkerInfo struct { |
|
||||
ID string |
|
||||
Address string |
|
||||
Capabilities []string |
|
||||
MaxConcurrent int32 |
|
||||
Status string |
|
||||
CurrentLoad int32 |
|
||||
LastSeen time.Time |
|
||||
TasksCompleted int32 |
|
||||
TasksFailed int32 |
|
||||
UptimeSeconds int64 |
|
||||
Stream pb.WorkerService_WorkerStreamServer |
|
||||
} |
|
||||
|
|
||||
type TaskInfo struct { |
|
||||
ID string |
|
||||
Type string |
|
||||
VolumeID uint32 |
|
||||
Status string |
|
||||
Progress float32 |
|
||||
AssignedTo string |
|
||||
Created time.Time |
|
||||
Updated time.Time |
|
||||
Server string |
|
||||
Collection string |
|
||||
DataCenter string |
|
||||
Rack string |
|
||||
Parameters map[string]string |
|
||||
} |
|
||||
|
|
||||
// gRPC service implementation
|
|
||||
func (s *AdminServer) WorkerStream(stream pb.WorkerService_WorkerStreamServer) error { |
|
||||
var workerID string |
|
||||
|
|
||||
for { |
|
||||
req, err := stream.Recv() |
|
||||
if err == io.EOF { |
|
||||
log.Printf("Worker %s disconnected", workerID) |
|
||||
s.removeWorkerStream(workerID) |
|
||||
return nil |
|
||||
} |
|
||||
if err != nil { |
|
||||
log.Printf("Stream error from worker %s: %v", workerID, err) |
|
||||
s.removeWorkerStream(workerID) |
|
||||
return err |
|
||||
} |
|
||||
|
|
||||
// Handle different message types
|
|
||||
switch msg := req.Message.(type) { |
|
||||
case *pb.WorkerMessage_Registration: |
|
||||
workerID = msg.Registration.WorkerId |
|
||||
s.handleWorkerRegistration(workerID, msg.Registration, stream) |
|
||||
|
|
||||
case *pb.WorkerMessage_Heartbeat: |
|
||||
s.handleWorkerHeartbeat(msg.Heartbeat, stream) |
|
||||
|
|
||||
case *pb.WorkerMessage_TaskRequest: |
|
||||
s.handleTaskRequest(msg.TaskRequest, stream) |
|
||||
|
|
||||
case *pb.WorkerMessage_TaskUpdate: |
|
||||
s.handleTaskUpdate(msg.TaskUpdate) |
|
||||
|
|
||||
case *pb.WorkerMessage_TaskComplete: |
|
||||
s.handleTaskComplete(msg.TaskComplete) |
|
||||
|
|
||||
case *pb.WorkerMessage_Shutdown: |
|
||||
log.Printf("Worker %s shutting down: %s", msg.Shutdown.WorkerId, msg.Shutdown.Reason) |
|
||||
s.removeWorkerStream(msg.Shutdown.WorkerId) |
|
||||
return nil |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
func (s *AdminServer) handleWorkerRegistration(workerID string, reg *pb.WorkerRegistration, stream pb.WorkerService_WorkerStreamServer) { |
|
||||
s.mu.Lock() |
|
||||
defer s.mu.Unlock() |
|
||||
|
|
||||
worker := &WorkerInfo{ |
|
||||
ID: reg.WorkerId, |
|
||||
Address: reg.Address, |
|
||||
Capabilities: reg.Capabilities, |
|
||||
MaxConcurrent: reg.MaxConcurrent, |
|
||||
Status: "active", |
|
||||
CurrentLoad: 0, |
|
||||
LastSeen: time.Now(), |
|
||||
TasksCompleted: 0, |
|
||||
TasksFailed: 0, |
|
||||
UptimeSeconds: 0, |
|
||||
Stream: stream, |
|
||||
} |
|
||||
|
|
||||
s.workers[reg.WorkerId] = worker |
|
||||
s.addWorkerStream(reg.WorkerId, stream) |
|
||||
|
|
||||
log.Printf("Registered worker %s with capabilities: %v", reg.WorkerId, reg.Capabilities) |
|
||||
|
|
||||
// Send registration response
|
|
||||
response := &pb.AdminMessage{ |
|
||||
AdminId: "admin-server", |
|
||||
Timestamp: time.Now().Unix(), |
|
||||
Message: &pb.AdminMessage_RegistrationResponse{ |
|
||||
RegistrationResponse: &pb.RegistrationResponse{ |
|
||||
Success: true, |
|
||||
Message: "Worker registered successfully", |
|
||||
AssignedWorkerId: reg.WorkerId, |
|
||||
}, |
|
||||
}, |
|
||||
} |
|
||||
|
|
||||
if err := stream.Send(response); err != nil { |
|
||||
log.Printf("Failed to send registration response to worker %s: %v", reg.WorkerId, err) |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
func (s *AdminServer) handleWorkerHeartbeat(heartbeat *pb.WorkerHeartbeat, stream pb.WorkerService_WorkerStreamServer) { |
|
||||
s.mu.Lock() |
|
||||
defer s.mu.Unlock() |
|
||||
|
|
||||
if worker, exists := s.workers[heartbeat.WorkerId]; exists { |
|
||||
worker.Status = heartbeat.Status |
|
||||
worker.CurrentLoad = heartbeat.CurrentLoad |
|
||||
worker.LastSeen = time.Now() |
|
||||
worker.TasksCompleted = heartbeat.TasksCompleted |
|
||||
worker.TasksFailed = heartbeat.TasksFailed |
|
||||
worker.UptimeSeconds = heartbeat.UptimeSeconds |
|
||||
|
|
||||
log.Printf("Heartbeat from worker %s: status=%s, load=%d/%d", |
|
||||
heartbeat.WorkerId, heartbeat.Status, heartbeat.CurrentLoad, heartbeat.MaxConcurrent) |
|
||||
} |
|
||||
|
|
||||
// Send heartbeat response
|
|
||||
response := &pb.AdminMessage{ |
|
||||
AdminId: "admin-server", |
|
||||
Timestamp: time.Now().Unix(), |
|
||||
Message: &pb.AdminMessage_HeartbeatResponse{ |
|
||||
HeartbeatResponse: &pb.HeartbeatResponse{ |
|
||||
Success: true, |
|
||||
Message: "Heartbeat received", |
|
||||
}, |
|
||||
}, |
|
||||
} |
|
||||
|
|
||||
if err := stream.Send(response); err != nil { |
|
||||
log.Printf("Failed to send heartbeat response to worker %s: %v", heartbeat.WorkerId, err) |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
func (s *AdminServer) handleTaskRequest(taskReq *pb.TaskRequest, stream pb.WorkerService_WorkerStreamServer) { |
|
||||
s.mu.Lock() |
|
||||
defer s.mu.Unlock() |
|
||||
|
|
||||
// Find a pending task that matches worker capabilities
|
|
||||
for taskID, task := range s.tasks { |
|
||||
if task.Status == "pending" { |
|
||||
// Check if worker has required capability
|
|
||||
hasCapability := false |
|
||||
for _, capability := range taskReq.Capabilities { |
|
||||
if capability == task.Type { |
|
||||
hasCapability = true |
|
||||
break |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
if hasCapability && taskReq.AvailableSlots > 0 { |
|
||||
// Assign task to worker
|
|
||||
task.Status = "assigned" |
|
||||
task.AssignedTo = taskReq.WorkerId |
|
||||
task.Updated = time.Now() |
|
||||
|
|
||||
log.Printf("Assigned task %s (volume %d) to worker %s", taskID, task.VolumeID, taskReq.WorkerId) |
|
||||
|
|
||||
// Send task assignment
|
|
||||
response := &pb.AdminMessage{ |
|
||||
AdminId: "admin-server", |
|
||||
Timestamp: time.Now().Unix(), |
|
||||
Message: &pb.AdminMessage_TaskAssignment{ |
|
||||
TaskAssignment: &pb.TaskAssignment{ |
|
||||
TaskId: taskID, |
|
||||
TaskType: task.Type, |
|
||||
Priority: 1, |
|
||||
CreatedTime: task.Created.Unix(), |
|
||||
Params: &pb.TaskParams{ |
|
||||
VolumeId: task.VolumeID, |
|
||||
Server: task.Server, |
|
||||
Collection: task.Collection, |
|
||||
DataCenter: task.DataCenter, |
|
||||
Rack: task.Rack, |
|
||||
Parameters: task.Parameters, |
|
||||
}, |
|
||||
}, |
|
||||
}, |
|
||||
} |
|
||||
|
|
||||
if err := stream.Send(response); err != nil { |
|
||||
log.Printf("Failed to send task assignment to worker %s: %v", taskReq.WorkerId, err) |
|
||||
} |
|
||||
return |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
log.Printf("No suitable tasks available for worker %s", taskReq.WorkerId) |
|
||||
} |
|
||||
|
|
||||
func (s *AdminServer) handleTaskUpdate(update *pb.TaskUpdate) { |
|
||||
s.mu.Lock() |
|
||||
defer s.mu.Unlock() |
|
||||
|
|
||||
if task, exists := s.tasks[update.TaskId]; exists { |
|
||||
task.Progress = update.Progress |
|
||||
task.Status = update.Status |
|
||||
task.Updated = time.Now() |
|
||||
|
|
||||
log.Printf("Task %s progress: %.1f%% - %s", update.TaskId, update.Progress, update.Message) |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
func (s *AdminServer) handleTaskComplete(complete *pb.TaskComplete) { |
|
||||
s.mu.Lock() |
|
||||
defer s.mu.Unlock() |
|
||||
|
|
||||
if task, exists := s.tasks[complete.TaskId]; exists { |
|
||||
if complete.Success { |
|
||||
task.Status = "completed" |
|
||||
task.Progress = 100.0 |
|
||||
} else { |
|
||||
task.Status = "failed" |
|
||||
} |
|
||||
task.Updated = time.Now() |
|
||||
|
|
||||
// Update worker stats
|
|
||||
if worker, workerExists := s.workers[complete.WorkerId]; workerExists { |
|
||||
if complete.Success { |
|
||||
worker.TasksCompleted++ |
|
||||
} else { |
|
||||
worker.TasksFailed++ |
|
||||
} |
|
||||
if worker.CurrentLoad > 0 { |
|
||||
worker.CurrentLoad-- |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
log.Printf("Task %s completed by worker %s: success=%v", complete.TaskId, complete.WorkerId, complete.Success) |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
func (s *AdminServer) addWorkerStream(workerID string, stream pb.WorkerService_WorkerStreamServer) { |
|
||||
s.streamMu.Lock() |
|
||||
defer s.streamMu.Unlock() |
|
||||
s.workerStreams[workerID] = stream |
|
||||
} |
|
||||
|
|
||||
func (s *AdminServer) removeWorkerStream(workerID string) { |
|
||||
s.streamMu.Lock() |
|
||||
defer s.streamMu.Unlock() |
|
||||
delete(s.workerStreams, workerID) |
|
||||
|
|
||||
s.mu.Lock() |
|
||||
defer s.mu.Unlock() |
|
||||
delete(s.workers, workerID) |
|
||||
} |
|
||||
|
|
||||
// HTTP handlers for web UI
|
|
||||
func (s *AdminServer) statusHandler(w http.ResponseWriter, r *http.Request) { |
|
||||
s.mu.RLock() |
|
||||
defer s.mu.RUnlock() |
|
||||
|
|
||||
// Convert internal data to JSON-friendly format
|
|
||||
taskList := make([]map[string]interface{}, 0, len(s.tasks)) |
|
||||
for _, task := range s.tasks { |
|
||||
taskList = append(taskList, map[string]interface{}{ |
|
||||
"id": task.ID, |
|
||||
"type": task.Type, |
|
||||
"volume_id": task.VolumeID, |
|
||||
"status": task.Status, |
|
||||
"progress": task.Progress, |
|
||||
"created": task.Created, |
|
||||
}) |
|
||||
} |
|
||||
|
|
||||
workerList := make([]map[string]interface{}, 0, len(s.workers)) |
|
||||
for _, worker := range s.workers { |
|
||||
workerList = append(workerList, map[string]interface{}{ |
|
||||
"id": worker.ID, |
|
||||
"address": worker.Address, |
|
||||
"capabilities": worker.Capabilities, |
|
||||
"status": worker.Status, |
|
||||
"last_seen": worker.LastSeen, |
|
||||
}) |
|
||||
} |
|
||||
|
|
||||
w.Header().Set("Content-Type", "application/json") |
|
||||
json.NewEncoder(w).Encode(map[string]interface{}{ |
|
||||
"admin_server": "running", |
|
||||
"master_addr": s.masterAddr, |
|
||||
"tasks": taskList, |
|
||||
"workers": workerList, |
|
||||
"uptime": time.Since(s.startTime).String(), |
|
||||
}) |
|
||||
} |
|
||||
|
|
||||
func (s *AdminServer) healthHandler(w http.ResponseWriter, r *http.Request) { |
|
||||
s.mu.RLock() |
|
||||
defer s.mu.RUnlock() |
|
||||
|
|
||||
w.Header().Set("Content-Type", "application/json") |
|
||||
json.NewEncoder(w).Encode(map[string]interface{}{ |
|
||||
"status": "healthy", |
|
||||
"uptime": time.Since(s.startTime).String(), |
|
||||
"tasks": len(s.tasks), |
|
||||
"workers": len(s.workers), |
|
||||
}) |
|
||||
} |
|
||||
|
|
||||
func (s *AdminServer) webUIHandler(w http.ResponseWriter, r *http.Request) { |
|
||||
w.Header().Set("Content-Type", "text/html") |
|
||||
|
|
||||
html := `<!DOCTYPE html> |
|
||||
<html> |
|
||||
<head> |
|
||||
<title>SeaweedFS Admin - EC Task Monitor</title> |
|
||||
<meta charset="UTF-8"> |
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|
||||
<style> |
|
||||
body { |
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; |
|
||||
margin: 0; padding: 20px; background: #f5f5f5; |
|
||||
} |
|
||||
.container { max-width: 1200px; margin: 0 auto; } |
|
||||
.header { |
|
||||
background: #2c3e50; color: white; padding: 20px; border-radius: 8px; |
|
||||
margin-bottom: 20px; text-align: center; |
|
||||
} |
|
||||
.grpc-badge { |
|
||||
background: #9b59b6; color: white; padding: 4px 8px; |
|
||||
border-radius: 12px; font-size: 0.8em; margin-left: 10px; |
|
||||
} |
|
||||
.worker-badge { |
|
||||
background: #27ae60; color: white; padding: 4px 8px; |
|
||||
border-radius: 12px; font-size: 0.8em; margin-left: 10px; |
|
||||
} |
|
||||
.stats { |
|
||||
display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); |
|
||||
gap: 20px; margin-bottom: 20px; |
|
||||
} |
|
||||
.stat-card { |
|
||||
background: white; padding: 20px; border-radius: 8px; |
|
||||
box-shadow: 0 2px 4px rgba(0,0,0,0.1); text-align: center; |
|
||||
} |
|
||||
.stat-number { font-size: 2em; font-weight: bold; color: #3498db; } |
|
||||
.stat-label { color: #7f8c8d; margin-top: 5px; } |
|
||||
.section { |
|
||||
background: white; border-radius: 8px; margin-bottom: 20px; |
|
||||
box-shadow: 0 2px 4px rgba(0,0,0,0.1); overflow: hidden; |
|
||||
} |
|
||||
.section-header { |
|
||||
background: #34495e; color: white; padding: 15px 20px; |
|
||||
font-weight: bold; font-size: 1.1em; |
|
||||
} |
|
||||
.section-content { padding: 20px; } |
|
||||
table { width: 100%; border-collapse: collapse; } |
|
||||
th, td { padding: 12px; text-align: left; border-bottom: 1px solid #ecf0f1; } |
|
||||
th { background: #f8f9fa; font-weight: 600; } |
|
||||
.status-pending { color: #f39c12; font-weight: bold; } |
|
||||
.status-assigned { color: #3498db; font-weight: bold; } |
|
||||
.status-running { color: #e67e22; font-weight: bold; } |
|
||||
.status-completed { color: #27ae60; font-weight: bold; } |
|
||||
.status-failed { color: #e74c3c; font-weight: bold; } |
|
||||
.worker-online { color: #27ae60; } |
|
||||
.refresh-btn { |
|
||||
background: #3498db; color: white; padding: 10px 20px; |
|
||||
border: none; border-radius: 5px; cursor: pointer; margin-bottom: 20px; |
|
||||
} |
|
||||
.refresh-btn:hover { background: #2980b9; } |
|
||||
</style> |
|
||||
</head> |
|
||||
<body> |
|
||||
<div class="container"> |
|
||||
<div class="header"> |
|
||||
<h1>🧪 SeaweedFS EC Task Monitor |
|
||||
<span class="grpc-badge">gRPC Streaming</span> |
|
||||
<span class="worker-badge">worker.proto</span> |
|
||||
</h1> |
|
||||
<p>Real-time Erasure Coding Task Management Dashboard</p> |
|
||||
</div> |
|
||||
|
|
||||
<button class="refresh-btn" onclick="location.reload()">🔄 Refresh</button> |
|
||||
|
|
||||
<div class="stats" id="stats"> |
|
||||
<div class="stat-card"> |
|
||||
<div class="stat-number" id="total-tasks">0</div> |
|
||||
<div class="stat-label">Total Tasks</div> |
|
||||
</div> |
|
||||
<div class="stat-card"> |
|
||||
<div class="stat-number" id="active-workers">0</div> |
|
||||
<div class="stat-label">Active Workers</div> |
|
||||
</div> |
|
||||
<div class="stat-card"> |
|
||||
<div class="stat-number" id="completed-tasks">0</div> |
|
||||
<div class="stat-label">Completed</div> |
|
||||
</div> |
|
||||
<div class="stat-card"> |
|
||||
<div class="stat-number" id="uptime">--</div> |
|
||||
<div class="stat-label">Uptime</div> |
|
||||
</div> |
|
||||
</div> |
|
||||
|
|
||||
<div class="section"> |
|
||||
<div class="section-header">📋 EC Tasks</div> |
|
||||
<div class="section-content"> |
|
||||
<table> |
|
||||
<thead> |
|
||||
<tr> |
|
||||
<th>Task ID</th> |
|
||||
<th>Type</th> |
|
||||
<th>Volume ID</th> |
|
||||
<th>Status</th> |
|
||||
<th>Progress</th> |
|
||||
<th>Created</th> |
|
||||
</tr> |
|
||||
</thead> |
|
||||
<tbody id="tasks-table"> |
|
||||
<tr><td colspan="6" style="text-align: center; color: #7f8c8d;">Loading tasks...</td></tr> |
|
||||
</tbody> |
|
||||
</table> |
|
||||
</div> |
|
||||
</div> |
|
||||
|
|
||||
<div class="section"> |
|
||||
<div class="section-header">⚙️ Workers (via gRPC Streaming)</div> |
|
||||
<div class="section-content"> |
|
||||
<table> |
|
||||
<thead> |
|
||||
<tr> |
|
||||
<th>Worker ID</th> |
|
||||
<th>Address</th> |
|
||||
<th>Status</th> |
|
||||
<th>Capabilities</th> |
|
||||
<th>Last Seen</th> |
|
||||
</tr> |
|
||||
</thead> |
|
||||
<tbody id="workers-table"> |
|
||||
<tr><td colspan="5" style="text-align: center; color: #7f8c8d;">Loading workers...</td></tr> |
|
||||
</tbody> |
|
||||
</table> |
|
||||
</div> |
|
||||
</div> |
|
||||
</div> |
|
||||
|
|
||||
<script> |
|
||||
function formatTime(timestamp) { |
|
||||
return new Date(timestamp).toLocaleString(); |
|
||||
} |
|
||||
|
|
||||
function formatUptime(uptime) { |
|
||||
if (!uptime) return '--'; |
|
||||
const matches = uptime.match(/(\d+h)?(\d+m)?(\d+\.?\d*s)?/); |
|
||||
if (!matches) return uptime; |
|
||||
|
|
||||
let parts = []; |
|
||||
if (matches[1]) parts.push(matches[1]); |
|
||||
if (matches[2]) parts.push(matches[2]); |
|
||||
if (matches[3] && !matches[1]) parts.push(matches[3]); |
|
||||
|
|
||||
return parts.join(' ') || uptime; |
|
||||
} |
|
||||
|
|
||||
function updateData() { |
|
||||
fetch('/status') |
|
||||
.then(response => response.json()) |
|
||||
.then(data => { |
|
||||
document.getElementById('total-tasks').textContent = data.tasks.length; |
|
||||
document.getElementById('active-workers').textContent = data.workers.length; |
|
||||
document.getElementById('completed-tasks').textContent = |
|
||||
data.tasks.filter(function(t) { return t.status === 'completed'; }).length; |
|
||||
document.getElementById('uptime').textContent = formatUptime(data.uptime); |
|
||||
|
|
||||
const tasksTable = document.getElementById('tasks-table'); |
|
||||
if (data.tasks.length === 0) { |
|
||||
tasksTable.innerHTML = '<tr><td colspan="6" style="text-align: center; color: #7f8c8d;">No tasks available</td></tr>'; |
|
||||
} else { |
|
||||
tasksTable.innerHTML = data.tasks.map(task => '<tr>' + |
|
||||
'<td><code>' + task.id + '</code></td>' + |
|
||||
'<td>' + task.type + '</td>' + |
|
||||
'<td>' + task.volume_id + '</td>' + |
|
||||
'<td><span class="status-' + task.status + '">' + task.status.toUpperCase() + '</span></td>' + |
|
||||
'<td>' + task.progress.toFixed(1) + '%</td>' + |
|
||||
'<td>' + formatTime(task.created) + '</td>' + |
|
||||
'</tr>').join(''); |
|
||||
} |
|
||||
|
|
||||
const workersTable = document.getElementById('workers-table'); |
|
||||
if (data.workers.length === 0) { |
|
||||
workersTable.innerHTML = '<tr><td colspan="5" style="text-align: center; color: #7f8c8d;">No workers registered</td></tr>'; |
|
||||
} else { |
|
||||
workersTable.innerHTML = data.workers.map(worker => '<tr>' + |
|
||||
'<td><strong>' + worker.id + '</strong></td>' + |
|
||||
'<td>' + (worker.address || 'N/A') + '</td>' + |
|
||||
'<td><span class="worker-online">' + worker.status.toUpperCase() + '</span></td>' + |
|
||||
'<td>' + worker.capabilities.join(', ') + '</td>' + |
|
||||
'<td>' + formatTime(worker.last_seen) + '</td>' + |
|
||||
'</tr>').join(''); |
|
||||
} |
|
||||
}) |
|
||||
.catch(error => { |
|
||||
console.error('Failed to fetch data:', error); |
|
||||
}); |
|
||||
} |
|
||||
|
|
||||
updateData(); |
|
||||
setInterval(updateData, 5000); |
|
||||
</script> |
|
||||
</body> |
|
||||
</html>` |
|
||||
|
|
||||
fmt.Fprint(w, html) |
|
||||
} |
|
||||
|
|
||||
// Task detection and creation
|
|
||||
func (s *AdminServer) detectVolumesForEC() { |
|
||||
ticker := time.NewTicker(30 * time.Second) |
|
||||
go func() { |
|
||||
for range ticker.C { |
|
||||
s.mu.Lock() |
|
||||
|
|
||||
log.Println("Scanning for volumes requiring EC...") |
|
||||
|
|
||||
// Simulate volume detection - in real implementation, query master
|
|
||||
if len(s.tasks) < 5 { // Don't create too many tasks
|
|
||||
taskID := fmt.Sprintf("ec-task-%d", time.Now().Unix()) |
|
||||
volumeID := uint32(1000 + len(s.tasks)) |
|
||||
|
|
||||
task := &TaskInfo{ |
|
||||
ID: taskID, |
|
||||
Type: "erasure_coding", |
|
||||
VolumeID: volumeID, |
|
||||
Status: "pending", |
|
||||
Progress: 0.0, |
|
||||
Created: time.Now(), |
|
||||
Updated: time.Now(), |
|
||||
Server: "volume1:8080", // Simplified
|
|
||||
Collection: "", |
|
||||
DataCenter: "dc1", |
|
||||
Rack: "rack1", |
|
||||
Parameters: map[string]string{ |
|
||||
"data_shards": "10", |
|
||||
"parity_shards": "4", |
|
||||
}, |
|
||||
} |
|
||||
|
|
||||
s.tasks[taskID] = task |
|
||||
log.Printf("Created EC task %s for volume %d", taskID, volumeID) |
|
||||
} |
|
||||
|
|
||||
s.mu.Unlock() |
|
||||
} |
|
||||
}() |
|
||||
} |
|
||||
|
|
||||
func main() { |
|
||||
grpcPort := os.Getenv("GRPC_PORT") |
|
||||
if grpcPort == "" { |
|
||||
grpcPort = "9901" |
|
||||
} |
|
||||
|
|
||||
httpPort := os.Getenv("ADMIN_PORT") |
|
||||
if httpPort == "" { |
|
||||
httpPort = "9900" |
|
||||
} |
|
||||
|
|
||||
masterAddr := os.Getenv("MASTER_ADDRESS") |
|
||||
if masterAddr == "" { |
|
||||
masterAddr = "master:9333" |
|
||||
} |
|
||||
|
|
||||
server := &AdminServer{ |
|
||||
grpcPort: grpcPort, |
|
||||
httpPort: httpPort, |
|
||||
masterAddr: masterAddr, |
|
||||
startTime: time.Now(), |
|
||||
workers: make(map[string]*WorkerInfo), |
|
||||
tasks: make(map[string]*TaskInfo), |
|
||||
taskQueue: make([]string, 0), |
|
||||
workerStreams: make(map[string]pb.WorkerService_WorkerStreamServer), |
|
||||
} |
|
||||
|
|
||||
// Start gRPC server
|
|
||||
go func() { |
|
||||
lis, err := net.Listen("tcp", ":"+grpcPort) |
|
||||
if err != nil { |
|
||||
log.Fatalf("Failed to listen on gRPC port %s: %v", grpcPort, err) |
|
||||
} |
|
||||
|
|
||||
grpcServer := grpc.NewServer() |
|
||||
pb.RegisterWorkerServiceServer(grpcServer, server) |
|
||||
reflection.Register(grpcServer) |
|
||||
|
|
||||
log.Printf("gRPC server starting on port %s", grpcPort) |
|
||||
if err := grpcServer.Serve(lis); err != nil { |
|
||||
log.Fatalf("Failed to serve gRPC: %v", err) |
|
||||
} |
|
||||
}() |
|
||||
|
|
||||
// Start HTTP server for web UI
|
|
||||
go func() { |
|
||||
http.HandleFunc("/", server.webUIHandler) |
|
||||
http.HandleFunc("/status", server.statusHandler) |
|
||||
http.HandleFunc("/health", server.healthHandler) |
|
||||
|
|
||||
log.Printf("HTTP server starting on port %s", httpPort) |
|
||||
if err := http.ListenAndServe(":"+httpPort, nil); err != nil { |
|
||||
log.Fatalf("Failed to serve HTTP: %v", err) |
|
||||
} |
|
||||
}() |
|
||||
|
|
||||
// Start task detection
|
|
||||
server.detectVolumesForEC() |
|
||||
|
|
||||
log.Printf("Admin server started - gRPC port: %s, HTTP port: %s, Master: %s", grpcPort, httpPort, masterAddr) |
|
||||
|
|
||||
// Keep the main goroutine running
|
|
||||
select {} |
|
||||
} |
|
||||
@ -1,395 +1,216 @@ |
|||||
|
version: '3.8' |
||||
|
name: admin_integration |
||||
|
|
||||
|
networks: |
||||
|
seaweed_net: |
||||
|
driver: bridge |
||||
|
|
||||
services: |
services: |
||||
# Master server - coordinates the cluster |
|
||||
master: |
master: |
||||
image: chrislusf/seaweedfs:latest |
|
||||
container_name: seaweed-master |
|
||||
|
image: chrislusf/seaweedfs:local |
||||
ports: |
ports: |
||||
- "9333:9333" |
- "9333:9333" |
||||
- "19333:19333" |
- "19333:19333" |
||||
command: > |
|
||||
master |
|
||||
-ip=master |
|
||||
-port=9333 |
|
||||
-volumeSizeLimitMB=50 |
|
||||
-defaultReplication=001 |
|
||||
|
command: "master -ip=master -mdir=/data -volumeSizeLimitMB=50" |
||||
|
environment: |
||||
|
- WEED_MASTER_VOLUME_GROWTH_COPY_1=1 |
||||
|
- WEED_MASTER_VOLUME_GROWTH_COPY_2=2 |
||||
|
- WEED_MASTER_VOLUME_GROWTH_COPY_OTHER=1 |
||||
volumes: |
volumes: |
||||
- master_data:/data |
|
||||
|
- ./data/master:/data |
||||
networks: |
networks: |
||||
- seaweed_net |
- seaweed_net |
||||
healthcheck: |
|
||||
test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://master:9333/cluster/status"] |
|
||||
interval: 10s |
|
||||
timeout: 5s |
|
||||
retries: 3 |
|
||||
|
|
||||
# Volume Server 1 |
|
||||
volume1: |
volume1: |
||||
image: chrislusf/seaweedfs:latest |
|
||||
container_name: seaweed-volume1 |
|
||||
|
image: chrislusf/seaweedfs:local |
||||
ports: |
ports: |
||||
- "8080:8080" |
- "8080:8080" |
||||
- "18080:18080" |
- "18080:18080" |
||||
command: > |
|
||||
volume |
|
||||
-mserver=master:9333 |
|
||||
-ip=volume1 |
|
||||
-port=8080 |
|
||||
-dir=/data |
|
||||
-max=100 |
|
||||
-dataCenter=dc1 |
|
||||
-rack=rack1 |
|
||||
volumes: |
|
||||
- volume1_data:/data |
|
||||
|
command: "volume -mserver=master:9333 -ip=volume1 -dir=/data -max=10" |
||||
depends_on: |
depends_on: |
||||
master: |
|
||||
condition: service_healthy |
|
||||
|
- master |
||||
|
volumes: |
||||
|
- ./data/volume1:/data |
||||
networks: |
networks: |
||||
- seaweed_net |
- seaweed_net |
||||
healthcheck: |
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8080/status"] |
|
||||
interval: 10s |
|
||||
timeout: 5s |
|
||||
retries: 3 |
|
||||
|
|
||||
# Volume Server 2 |
|
||||
volume2: |
volume2: |
||||
image: chrislusf/seaweedfs:latest |
|
||||
container_name: seaweed-volume2 |
|
||||
|
image: chrislusf/seaweedfs:local |
||||
ports: |
ports: |
||||
- "8081:8080" |
- "8081:8080" |
||||
- "18081:18080" |
- "18081:18080" |
||||
command: > |
|
||||
volume |
|
||||
-mserver=master:9333 |
|
||||
-ip=volume2 |
|
||||
-port=8080 |
|
||||
-dir=/data |
|
||||
-max=100 |
|
||||
-dataCenter=dc1 |
|
||||
-rack=rack1 |
|
||||
volumes: |
|
||||
- volume2_data:/data |
|
||||
|
command: "volume -mserver=master:9333 -ip=volume2 -dir=/data -max=10" |
||||
depends_on: |
depends_on: |
||||
master: |
|
||||
condition: service_healthy |
|
||||
|
- master |
||||
|
volumes: |
||||
|
- ./data/volume2:/data |
||||
networks: |
networks: |
||||
- seaweed_net |
- seaweed_net |
||||
healthcheck: |
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8080/status"] |
|
||||
interval: 10s |
|
||||
timeout: 5s |
|
||||
retries: 3 |
|
||||
|
|
||||
# Volume Server 3 |
|
||||
volume3: |
volume3: |
||||
image: chrislusf/seaweedfs:latest |
|
||||
container_name: seaweed-volume3 |
|
||||
|
image: chrislusf/seaweedfs:local |
||||
ports: |
ports: |
||||
- "8082:8080" |
- "8082:8080" |
||||
- "18082:18080" |
- "18082:18080" |
||||
command: > |
|
||||
volume |
|
||||
-mserver=master:9333 |
|
||||
-ip=volume3 |
|
||||
-port=8080 |
|
||||
-dir=/data |
|
||||
-max=100 |
|
||||
-dataCenter=dc1 |
|
||||
-rack=rack2 |
|
||||
volumes: |
|
||||
- volume3_data:/data |
|
||||
|
command: "volume -mserver=master:9333 -ip=volume3 -dir=/data -max=10" |
||||
depends_on: |
depends_on: |
||||
master: |
|
||||
condition: service_healthy |
|
||||
|
- master |
||||
|
volumes: |
||||
|
- ./data/volume3:/data |
||||
networks: |
networks: |
||||
- seaweed_net |
- seaweed_net |
||||
healthcheck: |
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8080/status"] |
|
||||
interval: 10s |
|
||||
timeout: 5s |
|
||||
retries: 3 |
|
||||
|
|
||||
# Volume Server 4 |
|
||||
volume4: |
volume4: |
||||
image: chrislusf/seaweedfs:latest |
|
||||
container_name: seaweed-volume4 |
|
||||
|
image: chrislusf/seaweedfs:local |
||||
ports: |
ports: |
||||
- "8083:8080" |
- "8083:8080" |
||||
- "18083:18080" |
- "18083:18080" |
||||
command: > |
|
||||
volume |
|
||||
-mserver=master:9333 |
|
||||
-ip=volume4 |
|
||||
-port=8080 |
|
||||
-dir=/data |
|
||||
-max=100 |
|
||||
-dataCenter=dc2 |
|
||||
-rack=rack1 |
|
||||
volumes: |
|
||||
- volume4_data:/data |
|
||||
|
command: "volume -mserver=master:9333 -ip=volume4 -dir=/data -max=10" |
||||
depends_on: |
depends_on: |
||||
master: |
|
||||
condition: service_healthy |
|
||||
|
- master |
||||
|
volumes: |
||||
|
- ./data/volume4:/data |
||||
networks: |
networks: |
||||
- seaweed_net |
- seaweed_net |
||||
healthcheck: |
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8080/status"] |
|
||||
interval: 10s |
|
||||
timeout: 5s |
|
||||
retries: 3 |
|
||||
|
|
||||
# Volume Server 5 |
|
||||
volume5: |
volume5: |
||||
image: chrislusf/seaweedfs:latest |
|
||||
container_name: seaweed-volume5 |
|
||||
|
image: chrislusf/seaweedfs:local |
||||
ports: |
ports: |
||||
- "8084:8080" |
- "8084:8080" |
||||
- "18084:18080" |
- "18084:18080" |
||||
command: > |
|
||||
volume |
|
||||
-mserver=master:9333 |
|
||||
-ip=volume5 |
|
||||
-port=8080 |
|
||||
-dir=/data |
|
||||
-max=100 |
|
||||
-dataCenter=dc2 |
|
||||
-rack=rack2 |
|
||||
volumes: |
|
||||
- volume5_data:/data |
|
||||
|
command: "volume -mserver=master:9333 -ip=volume5 -dir=/data -max=10" |
||||
depends_on: |
depends_on: |
||||
master: |
|
||||
condition: service_healthy |
|
||||
|
- master |
||||
|
volumes: |
||||
|
- ./data/volume5:/data |
||||
networks: |
networks: |
||||
- seaweed_net |
- seaweed_net |
||||
healthcheck: |
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8080/status"] |
|
||||
interval: 10s |
|
||||
timeout: 5s |
|
||||
retries: 3 |
|
||||
|
|
||||
# Volume Server 6 |
|
||||
volume6: |
volume6: |
||||
image: chrislusf/seaweedfs:latest |
|
||||
container_name: seaweed-volume6 |
|
||||
|
image: chrislusf/seaweedfs:local |
||||
ports: |
ports: |
||||
- "8085:8080" |
- "8085:8080" |
||||
- "18085:18080" |
- "18085:18080" |
||||
command: > |
|
||||
volume |
|
||||
-mserver=master:9333 |
|
||||
-ip=volume6 |
|
||||
-port=8080 |
|
||||
-dir=/data |
|
||||
-max=100 |
|
||||
-dataCenter=dc2 |
|
||||
-rack=rack3 |
|
||||
volumes: |
|
||||
- volume6_data:/data |
|
||||
|
command: "volume -mserver=master:9333 -ip=volume6 -dir=/data -max=10" |
||||
depends_on: |
depends_on: |
||||
master: |
|
||||
condition: service_healthy |
|
||||
|
- master |
||||
|
volumes: |
||||
|
- ./data/volume6:/data |
||||
networks: |
networks: |
||||
- seaweed_net |
- seaweed_net |
||||
healthcheck: |
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8080/status"] |
|
||||
interval: 10s |
|
||||
timeout: 5s |
|
||||
retries: 3 |
|
||||
|
|
||||
# Filer for easier data access |
|
||||
filer: |
filer: |
||||
image: chrislusf/seaweedfs:latest |
|
||||
container_name: seaweed-filer |
|
||||
|
image: chrislusf/seaweedfs:local |
||||
ports: |
ports: |
||||
- "8888:8888" |
- "8888:8888" |
||||
- "18888:18888" |
- "18888:18888" |
||||
command: > |
|
||||
filer |
|
||||
-master=master:9333 |
|
||||
-ip=filer |
|
||||
-port=8888 |
|
||||
|
command: "filer -master=master:9333 -ip=filer" |
||||
depends_on: |
depends_on: |
||||
master: |
|
||||
condition: service_healthy |
|
||||
|
- master |
||||
|
volumes: |
||||
|
- ./data/filer:/data |
||||
networks: |
networks: |
||||
- seaweed_net |
- seaweed_net |
||||
healthcheck: |
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8888/"] |
|
||||
interval: 10s |
|
||||
timeout: 5s |
|
||||
retries: 3 |
|
||||
|
|
||||
# Admin Server - manages EC tasks |
|
||||
admin: |
admin: |
||||
build: |
|
||||
context: ../../ |
|
||||
dockerfile: docker/admin_integration/Dockerfile.admin |
|
||||
container_name: seaweed-admin |
|
||||
|
image: chrislusf/seaweedfs:local |
||||
ports: |
ports: |
||||
- "9900:9900" |
|
||||
- "9901:9901" |
|
||||
environment: |
|
||||
- MASTER_ADDRESS=master:9333 |
|
||||
- ADMIN_PORT=9900 |
|
||||
- GRPC_PORT=9901 |
|
||||
- SCAN_INTERVAL=30s |
|
||||
- WORKER_TIMEOUT=5m |
|
||||
- TASK_TIMEOUT=30m |
|
||||
- MAX_RETRIES=3 |
|
||||
- MAX_CONCURRENT_TASKS=5 |
|
||||
volumes: |
|
||||
- admin_data:/data |
|
||||
- ./admin-config:/config |
|
||||
|
- "23646:23646" # HTTP admin interface (default port) |
||||
|
- "33646:33646" # gRPC worker communication (23646 + 10000) |
||||
|
command: "admin -port=23646 -masters=master:9333 -dataDir=/data" |
||||
depends_on: |
depends_on: |
||||
master: |
|
||||
condition: service_healthy |
|
||||
filer: |
|
||||
condition: service_healthy |
|
||||
|
- master |
||||
|
- filer |
||||
|
volumes: |
||||
|
- ./data/admin:/data |
||||
networks: |
networks: |
||||
- seaweed_net |
- seaweed_net |
||||
healthcheck: |
|
||||
test: ["CMD", "curl", "-f", "http://localhost:9900/health"] |
|
||||
interval: 15s |
|
||||
timeout: 5s |
|
||||
retries: 3 |
|
||||
|
|
||||
# EC Worker 1 |
|
||||
worker1: |
worker1: |
||||
build: |
|
||||
context: ../../ |
|
||||
dockerfile: docker/admin_integration/Dockerfile.worker |
|
||||
container_name: seaweed-worker1 |
|
||||
environment: |
|
||||
- ADMIN_GRPC_ADDRESS=admin:9901 |
|
||||
- WORKER_ID=worker-1 |
|
||||
- WORKER_ADDRESS=worker1:9001 |
|
||||
- CAPABILITIES=erasure_coding |
|
||||
- MAX_CONCURRENT=2 |
|
||||
- WORK_DIR=/work |
|
||||
volumes: |
|
||||
- worker1_data:/work |
|
||||
|
image: chrislusf/seaweedfs:local |
||||
|
command: "worker -admin=admin:23646 -capabilities=ec,vacuum -maxConcurrent=2" |
||||
depends_on: |
depends_on: |
||||
admin: |
|
||||
condition: service_healthy |
|
||||
|
- admin |
||||
|
volumes: |
||||
|
- ./data/worker1:/data |
||||
networks: |
networks: |
||||
- seaweed_net |
- seaweed_net |
||||
healthcheck: |
|
||||
test: ["CMD", "curl", "-f", "http://localhost:9001/health"] |
|
||||
interval: 15s |
|
||||
timeout: 5s |
|
||||
retries: 3 |
|
||||
|
environment: |
||||
|
- WORKER_ID=worker-1 |
||||
|
|
||||
# EC Worker 2 |
|
||||
worker2: |
worker2: |
||||
build: |
|
||||
context: ../../ |
|
||||
dockerfile: docker/admin_integration/Dockerfile.worker |
|
||||
container_name: seaweed-worker2 |
|
||||
environment: |
|
||||
- ADMIN_GRPC_ADDRESS=admin:9901 |
|
||||
- WORKER_ID=worker-2 |
|
||||
- WORKER_ADDRESS=worker2:9001 |
|
||||
- CAPABILITIES=erasure_coding,vacuum |
|
||||
- MAX_CONCURRENT=2 |
|
||||
- WORK_DIR=/work |
|
||||
volumes: |
|
||||
- worker2_data:/work |
|
||||
|
image: chrislusf/seaweedfs:local |
||||
|
command: "worker -admin=admin:23646 -capabilities=ec,vacuum -maxConcurrent=2" |
||||
depends_on: |
depends_on: |
||||
admin: |
|
||||
condition: service_healthy |
|
||||
|
- admin |
||||
|
volumes: |
||||
|
- ./data/worker2:/data |
||||
networks: |
networks: |
||||
- seaweed_net |
- seaweed_net |
||||
healthcheck: |
|
||||
test: ["CMD", "curl", "-f", "http://localhost:9001/health"] |
|
||||
interval: 15s |
|
||||
timeout: 5s |
|
||||
retries: 3 |
|
||||
|
environment: |
||||
|
- WORKER_ID=worker-2 |
||||
|
|
||||
# EC Worker 3 |
|
||||
worker3: |
worker3: |
||||
build: |
|
||||
context: ../../ |
|
||||
dockerfile: docker/admin_integration/Dockerfile.worker |
|
||||
container_name: seaweed-worker3 |
|
||||
environment: |
|
||||
- ADMIN_GRPC_ADDRESS=admin:9901 |
|
||||
- WORKER_ID=worker-3 |
|
||||
- WORKER_ADDRESS=worker3:9001 |
|
||||
- CAPABILITIES=erasure_coding,vacuum |
|
||||
- MAX_CONCURRENT=1 |
|
||||
- WORK_DIR=/work |
|
||||
volumes: |
|
||||
- worker3_data:/work |
|
||||
|
image: chrislusf/seaweedfs:local |
||||
|
command: "worker -admin=admin:23646 -capabilities=ec,vacuum -maxConcurrent=2" |
||||
depends_on: |
depends_on: |
||||
admin: |
|
||||
condition: service_healthy |
|
||||
|
- admin |
||||
|
volumes: |
||||
|
- ./data/worker3:/data |
||||
networks: |
networks: |
||||
- seaweed_net |
- seaweed_net |
||||
healthcheck: |
|
||||
test: ["CMD", "curl", "-f", "http://localhost:9001/health"] |
|
||||
interval: 15s |
|
||||
timeout: 5s |
|
||||
retries: 3 |
|
||||
|
environment: |
||||
|
- WORKER_ID=worker-3 |
||||
|
|
||||
# Continuous Load Generator |
|
||||
load_generator: |
load_generator: |
||||
build: |
|
||||
context: ../../ |
|
||||
dockerfile: docker/admin_integration/Dockerfile.load |
|
||||
container_name: seaweed-load |
|
||||
environment: |
|
||||
- FILER_ADDRESS=filer:8888 |
|
||||
- MASTER_ADDRESS=master:9333 |
|
||||
- WRITE_RATE=10 # files per second |
|
||||
- DELETE_RATE=2 # files per second |
|
||||
- FILE_SIZE_MIN=1MB |
|
||||
- FILE_SIZE_MAX=5MB |
|
||||
- TEST_DURATION=3600 # 1 hour |
|
||||
|
image: chrislusf/seaweedfs:local |
||||
|
entrypoint: ["/bin/sh"] |
||||
|
command: > |
||||
|
-c " |
||||
|
echo 'Starting load generator...'; |
||||
|
sleep 30; |
||||
|
echo 'Generating continuous load with 50MB volume limit...'; |
||||
|
while true; do |
||||
|
echo 'Writing test files...'; |
||||
|
echo 'Test file content at $(date)' | /usr/bin/weed upload -server=master:9333; |
||||
|
sleep 5; |
||||
|
echo 'Deleting some files...'; |
||||
|
/usr/bin/weed shell -master=master:9333 <<< 'fs.rm /test_file_*' || true; |
||||
|
sleep 10; |
||||
|
done |
||||
|
" |
||||
depends_on: |
depends_on: |
||||
filer: |
|
||||
condition: service_healthy |
|
||||
admin: |
|
||||
condition: service_healthy |
|
||||
|
- master |
||||
|
- filer |
||||
|
- admin |
||||
networks: |
networks: |
||||
- seaweed_net |
- seaweed_net |
||||
|
|
||||
# Monitoring and Health Check |
|
||||
monitor: |
monitor: |
||||
build: |
|
||||
context: ../../ |
|
||||
dockerfile: docker/admin_integration/Dockerfile.monitor |
|
||||
container_name: seaweed-monitor |
|
||||
ports: |
|
||||
- "9999:9999" |
|
||||
environment: |
|
||||
- MASTER_ADDRESS=master:9333 |
|
||||
- ADMIN_ADDRESS=admin:9900 |
|
||||
- FILER_ADDRESS=filer:8888 |
|
||||
- MONITOR_INTERVAL=10s |
|
||||
|
image: alpine:latest |
||||
|
entrypoint: ["/bin/sh"] |
||||
|
command: > |
||||
|
-c " |
||||
|
apk add --no-cache curl jq; |
||||
|
echo 'Starting cluster monitor...'; |
||||
|
sleep 30; |
||||
|
while true; do |
||||
|
echo '=== Cluster Status $(date) ==='; |
||||
|
echo 'Master status:'; |
||||
|
curl -s http://master:9333/cluster/status | jq '.IsLeader, .Peers' || echo 'Master not ready'; |
||||
|
echo; |
||||
|
echo 'Admin status:'; |
||||
|
curl -s http://admin:23646/ | grep -o 'Admin.*Interface' || echo 'Admin not ready'; |
||||
|
echo; |
||||
|
echo 'Volume count by server:'; |
||||
|
curl -s http://master:9333/vol/status | jq '.Volumes | length' || echo 'Volumes not ready'; |
||||
|
echo; |
||||
|
sleep 60; |
||||
|
done |
||||
|
" |
||||
depends_on: |
depends_on: |
||||
admin: |
|
||||
condition: service_healthy |
|
||||
|
- master |
||||
|
- admin |
||||
|
- filer |
||||
networks: |
networks: |
||||
- seaweed_net |
- seaweed_net |
||||
volumes: |
|
||||
- ./monitor-data:/monitor-data |
|
||||
|
|
||||
volumes: |
|
||||
master_data: |
|
||||
volume1_data: |
|
||||
volume2_data: |
|
||||
volume3_data: |
|
||||
volume4_data: |
|
||||
volume5_data: |
|
||||
volume6_data: |
|
||||
admin_data: |
|
||||
worker1_data: |
|
||||
worker2_data: |
|
||||
worker3_data: |
|
||||
|
|
||||
networks: |
|
||||
seaweed_net: |
|
||||
driver: bridge |
|
||||
ipam: |
|
||||
config: |
|
||||
- subnet: 172.20.0.0/16 |
|
||||
@ -1,21 +0,0 @@ |
|||||
#!/bin/sh |
|
||||
|
|
||||
set -e |
|
||||
|
|
||||
echo "Starting Load Generator..." |
|
||||
echo "Filer Address: $FILER_ADDRESS" |
|
||||
echo "Write Rate: $WRITE_RATE files/sec" |
|
||||
echo "Delete Rate: $DELETE_RATE files/sec" |
|
||||
echo "File Size Range: $FILE_SIZE_MIN - $FILE_SIZE_MAX" |
|
||||
echo "Test Duration: $TEST_DURATION seconds" |
|
||||
|
|
||||
# Wait for filer to be ready |
|
||||
echo "Waiting for filer to be ready..." |
|
||||
until curl -f http://$FILER_ADDRESS/ > /dev/null 2>&1; do |
|
||||
echo "Filer not ready, waiting..." |
|
||||
sleep 5 |
|
||||
done |
|
||||
echo "Filer is ready!" |
|
||||
|
|
||||
# Start the load generator |
|
||||
exec ./load-generator |
|
||||
@ -1,375 +0,0 @@ |
|||||
package main |
|
||||
|
|
||||
import ( |
|
||||
"bytes" |
|
||||
"crypto/rand" |
|
||||
"fmt" |
|
||||
"io" |
|
||||
"log" |
|
||||
"mime/multipart" |
|
||||
"net/http" |
|
||||
"os" |
|
||||
"strconv" |
|
||||
"strings" |
|
||||
"sync" |
|
||||
"time" |
|
||||
) |
|
||||
|
|
||||
type LoadGenerator struct { |
|
||||
filerAddr string |
|
||||
masterAddr string |
|
||||
writeRate int |
|
||||
deleteRate int |
|
||||
fileSizeMin int64 |
|
||||
fileSizeMax int64 |
|
||||
testDuration int |
|
||||
collection string |
|
||||
|
|
||||
// State tracking
|
|
||||
createdFiles []string |
|
||||
mutex sync.RWMutex |
|
||||
stats LoadStats |
|
||||
} |
|
||||
|
|
||||
type LoadStats struct { |
|
||||
FilesWritten int64 |
|
||||
FilesDeleted int64 |
|
||||
BytesWritten int64 |
|
||||
Errors int64 |
|
||||
StartTime time.Time |
|
||||
LastOperation time.Time |
|
||||
} |
|
||||
|
|
||||
// parseSize converts size strings like "1MB", "5MB" to bytes
|
|
||||
func parseSize(sizeStr string) int64 { |
|
||||
sizeStr = strings.ToUpper(strings.TrimSpace(sizeStr)) |
|
||||
|
|
||||
var multiplier int64 = 1 |
|
||||
if strings.HasSuffix(sizeStr, "KB") { |
|
||||
multiplier = 1024 |
|
||||
sizeStr = strings.TrimSuffix(sizeStr, "KB") |
|
||||
} else if strings.HasSuffix(sizeStr, "MB") { |
|
||||
multiplier = 1024 * 1024 |
|
||||
sizeStr = strings.TrimSuffix(sizeStr, "MB") |
|
||||
} else if strings.HasSuffix(sizeStr, "GB") { |
|
||||
multiplier = 1024 * 1024 * 1024 |
|
||||
sizeStr = strings.TrimSuffix(sizeStr, "GB") |
|
||||
} |
|
||||
|
|
||||
size, err := strconv.ParseInt(sizeStr, 10, 64) |
|
||||
if err != nil { |
|
||||
return 1024 * 1024 // Default to 1MB
|
|
||||
} |
|
||||
|
|
||||
return size * multiplier |
|
||||
} |
|
||||
|
|
||||
// generateRandomData creates random data of specified size
|
|
||||
func (lg *LoadGenerator) generateRandomData(size int64) []byte { |
|
||||
data := make([]byte, size) |
|
||||
_, err := rand.Read(data) |
|
||||
if err != nil { |
|
||||
// Fallback to deterministic data
|
|
||||
for i := range data { |
|
||||
data[i] = byte(i % 256) |
|
||||
} |
|
||||
} |
|
||||
return data |
|
||||
} |
|
||||
|
|
||||
// uploadFile uploads a file to SeaweedFS via filer
|
|
||||
func (lg *LoadGenerator) uploadFile(filename string, data []byte) error { |
|
||||
url := fmt.Sprintf("http://%s/%s", lg.filerAddr, filename) |
|
||||
if lg.collection != "" { |
|
||||
url = fmt.Sprintf("http://%s/%s/%s", lg.filerAddr, lg.collection, filename) |
|
||||
} |
|
||||
|
|
||||
// Create multipart form data
|
|
||||
var b bytes.Buffer |
|
||||
writer := multipart.NewWriter(&b) |
|
||||
|
|
||||
// Create form file field
|
|
||||
part, err := writer.CreateFormFile("file", filename) |
|
||||
if err != nil { |
|
||||
return err |
|
||||
} |
|
||||
|
|
||||
// Write file data
|
|
||||
_, err = part.Write(data) |
|
||||
if err != nil { |
|
||||
return err |
|
||||
} |
|
||||
|
|
||||
// Close the multipart writer
|
|
||||
err = writer.Close() |
|
||||
if err != nil { |
|
||||
return err |
|
||||
} |
|
||||
|
|
||||
req, err := http.NewRequest("POST", url, &b) |
|
||||
if err != nil { |
|
||||
return err |
|
||||
} |
|
||||
|
|
||||
req.Header.Set("Content-Type", writer.FormDataContentType()) |
|
||||
|
|
||||
client := &http.Client{Timeout: 30 * time.Second} |
|
||||
resp, err := client.Do(req) |
|
||||
if err != nil { |
|
||||
return err |
|
||||
} |
|
||||
defer resp.Body.Close() |
|
||||
|
|
||||
if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusCreated { |
|
||||
return fmt.Errorf("upload failed with status: %d", resp.StatusCode) |
|
||||
} |
|
||||
|
|
||||
return nil |
|
||||
} |
|
||||
|
|
||||
// deleteFile deletes a file from SeaweedFS via filer
|
|
||||
func (lg *LoadGenerator) deleteFile(filename string) error { |
|
||||
url := fmt.Sprintf("http://%s/%s", lg.filerAddr, filename) |
|
||||
if lg.collection != "" { |
|
||||
url = fmt.Sprintf("http://%s/%s/%s", lg.filerAddr, lg.collection, filename) |
|
||||
} |
|
||||
|
|
||||
req, err := http.NewRequest("DELETE", url, nil) |
|
||||
if err != nil { |
|
||||
return err |
|
||||
} |
|
||||
|
|
||||
client := &http.Client{Timeout: 10 * time.Second} |
|
||||
resp, err := client.Do(req) |
|
||||
if err != nil { |
|
||||
return err |
|
||||
} |
|
||||
defer resp.Body.Close() |
|
||||
|
|
||||
if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusNoContent && resp.StatusCode != http.StatusNotFound { |
|
||||
return fmt.Errorf("delete failed with status: %d", resp.StatusCode) |
|
||||
} |
|
||||
|
|
||||
return nil |
|
||||
} |
|
||||
|
|
||||
// writeFiles continuously writes files at the specified rate
|
|
||||
func (lg *LoadGenerator) writeFiles() { |
|
||||
writeInterval := time.Second / time.Duration(lg.writeRate) |
|
||||
ticker := time.NewTicker(writeInterval) |
|
||||
defer ticker.Stop() |
|
||||
|
|
||||
fileCounter := 0 |
|
||||
|
|
||||
for range ticker.C { |
|
||||
fileCounter++ |
|
||||
|
|
||||
// Random file size between min and max
|
|
||||
sizeDiff := lg.fileSizeMax - lg.fileSizeMin |
|
||||
randomSize := lg.fileSizeMin |
|
||||
if sizeDiff > 0 { |
|
||||
randomSize += int64(time.Now().UnixNano()) % sizeDiff |
|
||||
} |
|
||||
|
|
||||
// Generate filename
|
|
||||
filename := fmt.Sprintf("test-data/file-%d-%d.bin", time.Now().Unix(), fileCounter) |
|
||||
|
|
||||
// Generate random data
|
|
||||
data := lg.generateRandomData(randomSize) |
|
||||
|
|
||||
// Upload file
|
|
||||
err := lg.uploadFile(filename, data) |
|
||||
if err != nil { |
|
||||
log.Printf("Error uploading file %s: %v", filename, err) |
|
||||
lg.stats.Errors++ |
|
||||
} else { |
|
||||
lg.mutex.Lock() |
|
||||
lg.createdFiles = append(lg.createdFiles, filename) |
|
||||
lg.stats.FilesWritten++ |
|
||||
lg.stats.BytesWritten += randomSize |
|
||||
lg.stats.LastOperation = time.Now() |
|
||||
lg.mutex.Unlock() |
|
||||
|
|
||||
log.Printf("Uploaded file: %s (size: %d bytes, total files: %d)", |
|
||||
filename, randomSize, lg.stats.FilesWritten) |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
// deleteFiles continuously deletes files at the specified rate
|
|
||||
func (lg *LoadGenerator) deleteFiles() { |
|
||||
deleteInterval := time.Second / time.Duration(lg.deleteRate) |
|
||||
ticker := time.NewTicker(deleteInterval) |
|
||||
defer ticker.Stop() |
|
||||
|
|
||||
for range ticker.C { |
|
||||
lg.mutex.Lock() |
|
||||
if len(lg.createdFiles) == 0 { |
|
||||
lg.mutex.Unlock() |
|
||||
continue |
|
||||
} |
|
||||
|
|
||||
// Pick a random file to delete
|
|
||||
index := int(time.Now().UnixNano()) % len(lg.createdFiles) |
|
||||
filename := lg.createdFiles[index] |
|
||||
|
|
||||
// Remove from slice
|
|
||||
lg.createdFiles = append(lg.createdFiles[:index], lg.createdFiles[index+1:]...) |
|
||||
lg.mutex.Unlock() |
|
||||
|
|
||||
// Delete file
|
|
||||
err := lg.deleteFile(filename) |
|
||||
if err != nil { |
|
||||
log.Printf("Error deleting file %s: %v", filename, err) |
|
||||
lg.stats.Errors++ |
|
||||
} else { |
|
||||
lg.stats.FilesDeleted++ |
|
||||
lg.stats.LastOperation = time.Now() |
|
||||
log.Printf("Deleted file: %s (remaining files: %d)", filename, len(lg.createdFiles)) |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
// printStats periodically prints load generation statistics
|
|
||||
func (lg *LoadGenerator) printStats() { |
|
||||
ticker := time.NewTicker(30 * time.Second) |
|
||||
defer ticker.Stop() |
|
||||
|
|
||||
for range ticker.C { |
|
||||
uptime := time.Since(lg.stats.StartTime) |
|
||||
writeRate := float64(lg.stats.FilesWritten) / uptime.Seconds() |
|
||||
deleteRate := float64(lg.stats.FilesDeleted) / uptime.Seconds() |
|
||||
|
|
||||
lg.mutex.RLock() |
|
||||
pendingFiles := len(lg.createdFiles) |
|
||||
lg.mutex.RUnlock() |
|
||||
|
|
||||
log.Printf("STATS: Files written=%d, deleted=%d, pending=%d, errors=%d", |
|
||||
lg.stats.FilesWritten, lg.stats.FilesDeleted, pendingFiles, lg.stats.Errors) |
|
||||
log.Printf("RATES: Write=%.2f/sec, Delete=%.2f/sec, Data=%.2f MB written", |
|
||||
writeRate, deleteRate, float64(lg.stats.BytesWritten)/(1024*1024)) |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
// checkClusterHealth periodically checks cluster status
|
|
||||
func (lg *LoadGenerator) checkClusterHealth() { |
|
||||
ticker := time.NewTicker(1 * time.Minute) |
|
||||
defer ticker.Stop() |
|
||||
|
|
||||
for range ticker.C { |
|
||||
// Check master status
|
|
||||
resp, err := http.Get(fmt.Sprintf("http://%s/cluster/status", lg.masterAddr)) |
|
||||
if err != nil { |
|
||||
log.Printf("WARNING: Cannot reach master: %v", err) |
|
||||
continue |
|
||||
} |
|
||||
|
|
||||
body, err := io.ReadAll(resp.Body) |
|
||||
resp.Body.Close() |
|
||||
|
|
||||
if err != nil { |
|
||||
log.Printf("WARNING: Cannot read master response: %v", err) |
|
||||
continue |
|
||||
} |
|
||||
|
|
||||
if resp.StatusCode == http.StatusOK { |
|
||||
log.Printf("Cluster health check: OK (response size: %d bytes)", len(body)) |
|
||||
} else { |
|
||||
log.Printf("WARNING: Cluster health check failed with status: %d", resp.StatusCode) |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
func main() { |
|
||||
filerAddr := os.Getenv("FILER_ADDRESS") |
|
||||
if filerAddr == "" { |
|
||||
filerAddr = "filer:8888" |
|
||||
} |
|
||||
|
|
||||
masterAddr := os.Getenv("MASTER_ADDRESS") |
|
||||
if masterAddr == "" { |
|
||||
masterAddr = "master:9333" |
|
||||
} |
|
||||
|
|
||||
writeRate, _ := strconv.Atoi(os.Getenv("WRITE_RATE")) |
|
||||
if writeRate <= 0 { |
|
||||
writeRate = 10 |
|
||||
} |
|
||||
|
|
||||
deleteRate, _ := strconv.Atoi(os.Getenv("DELETE_RATE")) |
|
||||
if deleteRate <= 0 { |
|
||||
deleteRate = 2 |
|
||||
} |
|
||||
|
|
||||
fileSizeMin := parseSize(os.Getenv("FILE_SIZE_MIN")) |
|
||||
if fileSizeMin <= 0 { |
|
||||
fileSizeMin = 1024 * 1024 // 1MB
|
|
||||
} |
|
||||
|
|
||||
fileSizeMax := parseSize(os.Getenv("FILE_SIZE_MAX")) |
|
||||
if fileSizeMax <= fileSizeMin { |
|
||||
fileSizeMax = 5 * 1024 * 1024 // 5MB
|
|
||||
} |
|
||||
|
|
||||
testDuration, _ := strconv.Atoi(os.Getenv("TEST_DURATION")) |
|
||||
if testDuration <= 0 { |
|
||||
testDuration = 3600 // 1 hour
|
|
||||
} |
|
||||
|
|
||||
collection := os.Getenv("COLLECTION") |
|
||||
|
|
||||
lg := &LoadGenerator{ |
|
||||
filerAddr: filerAddr, |
|
||||
masterAddr: masterAddr, |
|
||||
writeRate: writeRate, |
|
||||
deleteRate: deleteRate, |
|
||||
fileSizeMin: fileSizeMin, |
|
||||
fileSizeMax: fileSizeMax, |
|
||||
testDuration: testDuration, |
|
||||
collection: collection, |
|
||||
createdFiles: make([]string, 0), |
|
||||
stats: LoadStats{ |
|
||||
StartTime: time.Now(), |
|
||||
}, |
|
||||
} |
|
||||
|
|
||||
log.Printf("Starting load generator...") |
|
||||
log.Printf("Filer: %s", filerAddr) |
|
||||
log.Printf("Master: %s", masterAddr) |
|
||||
log.Printf("Write rate: %d files/sec", writeRate) |
|
||||
log.Printf("Delete rate: %d files/sec", deleteRate) |
|
||||
log.Printf("File size: %d - %d bytes", fileSizeMin, fileSizeMax) |
|
||||
log.Printf("Test duration: %d seconds", testDuration) |
|
||||
log.Printf("Collection: '%s'", collection) |
|
||||
|
|
||||
// Wait for filer to be ready
|
|
||||
log.Println("Waiting for filer to be ready...") |
|
||||
for { |
|
||||
resp, err := http.Get(fmt.Sprintf("http://%s/", filerAddr)) |
|
||||
if err == nil && resp.StatusCode == http.StatusOK { |
|
||||
resp.Body.Close() |
|
||||
break |
|
||||
} |
|
||||
if resp != nil { |
|
||||
resp.Body.Close() |
|
||||
} |
|
||||
log.Println("Filer not ready, waiting...") |
|
||||
time.Sleep(5 * time.Second) |
|
||||
} |
|
||||
log.Println("Filer is ready!") |
|
||||
|
|
||||
// Start background goroutines
|
|
||||
go lg.writeFiles() |
|
||||
go lg.deleteFiles() |
|
||||
go lg.printStats() |
|
||||
go lg.checkClusterHealth() |
|
||||
|
|
||||
// Run for specified duration
|
|
||||
log.Printf("Load test will run for %d seconds...", testDuration) |
|
||||
time.Sleep(time.Duration(testDuration) * time.Second) |
|
||||
|
|
||||
log.Println("Load test completed!") |
|
||||
log.Printf("Final stats: Files written=%d, deleted=%d, errors=%d, total data=%.2f MB", |
|
||||
lg.stats.FilesWritten, lg.stats.FilesDeleted, lg.stats.Errors, |
|
||||
float64(lg.stats.BytesWritten)/(1024*1024)) |
|
||||
} |
|
||||
@ -1,38 +0,0 @@ |
|||||
#!/bin/sh |
|
||||
|
|
||||
set -e |
|
||||
|
|
||||
echo "Starting Cluster Monitor..." |
|
||||
echo "Master Address: $MASTER_ADDRESS" |
|
||||
echo "Admin Address: $ADMIN_ADDRESS" |
|
||||
echo "Filer Address: $FILER_ADDRESS" |
|
||||
echo "Monitor Interval: $MONITOR_INTERVAL" |
|
||||
|
|
||||
# Wait for core services to be ready |
|
||||
echo "Waiting for core services to be ready..." |
|
||||
|
|
||||
echo "Waiting for master..." |
|
||||
until curl -f http://$MASTER_ADDRESS/cluster/status > /dev/null 2>&1; do |
|
||||
echo "Master not ready, waiting..." |
|
||||
sleep 5 |
|
||||
done |
|
||||
echo "Master is ready!" |
|
||||
|
|
||||
echo "Waiting for admin..." |
|
||||
until curl -f http://$ADMIN_ADDRESS/health > /dev/null 2>&1; do |
|
||||
echo "Admin not ready, waiting..." |
|
||||
sleep 5 |
|
||||
done |
|
||||
echo "Admin is ready!" |
|
||||
|
|
||||
echo "Waiting for filer..." |
|
||||
until curl -f http://$FILER_ADDRESS/ > /dev/null 2>&1; do |
|
||||
echo "Filer not ready, waiting..." |
|
||||
sleep 5 |
|
||||
done |
|
||||
echo "Filer is ready!" |
|
||||
|
|
||||
echo "All services ready! Starting monitor..." |
|
||||
|
|
||||
# Start the monitor |
|
||||
exec ./monitor |
|
||||
@ -1,366 +0,0 @@ |
|||||
package main |
|
||||
|
|
||||
import ( |
|
||||
"encoding/json" |
|
||||
"fmt" |
|
||||
"io" |
|
||||
"log" |
|
||||
"net/http" |
|
||||
"os" |
|
||||
"time" |
|
||||
) |
|
||||
|
|
||||
type Monitor struct { |
|
||||
masterAddr string |
|
||||
adminAddr string |
|
||||
filerAddr string |
|
||||
interval time.Duration |
|
||||
startTime time.Time |
|
||||
stats MonitorStats |
|
||||
} |
|
||||
|
|
||||
type MonitorStats struct { |
|
||||
TotalChecks int64 |
|
||||
MasterHealthy int64 |
|
||||
AdminHealthy int64 |
|
||||
FilerHealthy int64 |
|
||||
VolumeCount int64 |
|
||||
LastVolumeCheck time.Time |
|
||||
ECTasksDetected int64 |
|
||||
WorkersActive int64 |
|
||||
LastWorkerCheck time.Time |
|
||||
} |
|
||||
|
|
||||
type ClusterStatus struct { |
|
||||
IsLeader bool `json:"IsLeader"` |
|
||||
Leader string `json:"Leader"` |
|
||||
Peers []string `json:"Peers"` |
|
||||
} |
|
||||
|
|
||||
type VolumeStatus struct { |
|
||||
Volumes []VolumeInfo `json:"Volumes"` |
|
||||
} |
|
||||
|
|
||||
type VolumeInfo struct { |
|
||||
Id uint32 `json:"Id"` |
|
||||
Size uint64 `json:"Size"` |
|
||||
Collection string `json:"Collection"` |
|
||||
FileCount int64 `json:"FileCount"` |
|
||||
DeleteCount int64 `json:"DeleteCount"` |
|
||||
DeletedByteCount uint64 `json:"DeletedByteCount"` |
|
||||
ReadOnly bool `json:"ReadOnly"` |
|
||||
CompactRevision uint32 `json:"CompactRevision"` |
|
||||
Version uint32 `json:"Version"` |
|
||||
} |
|
||||
|
|
||||
type AdminStatus struct { |
|
||||
Status string `json:"status"` |
|
||||
Uptime string `json:"uptime"` |
|
||||
Tasks int `json:"tasks"` |
|
||||
Workers int `json:"workers"` |
|
||||
} |
|
||||
|
|
||||
// checkMasterHealth checks the master server health
|
|
||||
func (m *Monitor) checkMasterHealth() bool { |
|
||||
resp, err := http.Get(fmt.Sprintf("http://%s/cluster/status", m.masterAddr)) |
|
||||
if err != nil { |
|
||||
log.Printf("ERROR: Cannot reach master %s: %v", m.masterAddr, err) |
|
||||
return false |
|
||||
} |
|
||||
defer resp.Body.Close() |
|
||||
|
|
||||
if resp.StatusCode != http.StatusOK { |
|
||||
log.Printf("ERROR: Master returned status %d", resp.StatusCode) |
|
||||
return false |
|
||||
} |
|
||||
|
|
||||
var status ClusterStatus |
|
||||
body, err := io.ReadAll(resp.Body) |
|
||||
if err != nil { |
|
||||
log.Printf("ERROR: Cannot read master response: %v", err) |
|
||||
return false |
|
||||
} |
|
||||
|
|
||||
err = json.Unmarshal(body, &status) |
|
||||
if err != nil { |
|
||||
log.Printf("WARNING: Cannot parse master status: %v", err) |
|
||||
// Still consider it healthy if we got a response
|
|
||||
return true |
|
||||
} |
|
||||
|
|
||||
log.Printf("Master status: Leader=%s, IsLeader=%t, Peers=%d", |
|
||||
status.Leader, status.IsLeader, len(status.Peers)) |
|
||||
|
|
||||
m.stats.MasterHealthy++ |
|
||||
return true |
|
||||
} |
|
||||
|
|
||||
// checkAdminHealth checks the admin server health
|
|
||||
func (m *Monitor) checkAdminHealth() bool { |
|
||||
resp, err := http.Get(fmt.Sprintf("http://%s/health", m.adminAddr)) |
|
||||
if err != nil { |
|
||||
log.Printf("ERROR: Cannot reach admin %s: %v", m.adminAddr, err) |
|
||||
return false |
|
||||
} |
|
||||
defer resp.Body.Close() |
|
||||
|
|
||||
if resp.StatusCode != http.StatusOK { |
|
||||
log.Printf("ERROR: Admin returned status %d", resp.StatusCode) |
|
||||
return false |
|
||||
} |
|
||||
|
|
||||
var status AdminStatus |
|
||||
body, err := io.ReadAll(resp.Body) |
|
||||
if err != nil { |
|
||||
log.Printf("ERROR: Cannot read admin response: %v", err) |
|
||||
return false |
|
||||
} |
|
||||
|
|
||||
err = json.Unmarshal(body, &status) |
|
||||
if err != nil { |
|
||||
log.Printf("WARNING: Cannot parse admin status: %v", err) |
|
||||
return true |
|
||||
} |
|
||||
|
|
||||
log.Printf("Admin status: %s, Uptime=%s, Tasks=%d, Workers=%d", |
|
||||
status.Status, status.Uptime, status.Tasks, status.Workers) |
|
||||
|
|
||||
m.stats.AdminHealthy++ |
|
||||
m.stats.ECTasksDetected += int64(status.Tasks) |
|
||||
m.stats.WorkersActive = int64(status.Workers) |
|
||||
m.stats.LastWorkerCheck = time.Now() |
|
||||
|
|
||||
return true |
|
||||
} |
|
||||
|
|
||||
// checkFilerHealth checks the filer health
|
|
||||
func (m *Monitor) checkFilerHealth() bool { |
|
||||
resp, err := http.Get(fmt.Sprintf("http://%s/", m.filerAddr)) |
|
||||
if err != nil { |
|
||||
log.Printf("ERROR: Cannot reach filer %s: %v", m.filerAddr, err) |
|
||||
return false |
|
||||
} |
|
||||
defer resp.Body.Close() |
|
||||
|
|
||||
if resp.StatusCode != http.StatusOK { |
|
||||
log.Printf("ERROR: Filer returned status %d", resp.StatusCode) |
|
||||
return false |
|
||||
} |
|
||||
|
|
||||
m.stats.FilerHealthy++ |
|
||||
return true |
|
||||
} |
|
||||
|
|
||||
// checkVolumeStatus checks volume information from master
|
|
||||
func (m *Monitor) checkVolumeStatus() { |
|
||||
resp, err := http.Get(fmt.Sprintf("http://%s/vol/status", m.masterAddr)) |
|
||||
if err != nil { |
|
||||
log.Printf("ERROR: Cannot get volume status: %v", err) |
|
||||
return |
|
||||
} |
|
||||
defer resp.Body.Close() |
|
||||
|
|
||||
if resp.StatusCode != http.StatusOK { |
|
||||
log.Printf("ERROR: Volume status returned status %d", resp.StatusCode) |
|
||||
return |
|
||||
} |
|
||||
|
|
||||
body, err := io.ReadAll(resp.Body) |
|
||||
if err != nil { |
|
||||
log.Printf("ERROR: Cannot read volume status: %v", err) |
|
||||
return |
|
||||
} |
|
||||
|
|
||||
var volumeStatus VolumeStatus |
|
||||
err = json.Unmarshal(body, &volumeStatus) |
|
||||
if err != nil { |
|
||||
log.Printf("WARNING: Cannot parse volume status: %v", err) |
|
||||
return |
|
||||
} |
|
||||
|
|
||||
m.stats.VolumeCount = int64(len(volumeStatus.Volumes)) |
|
||||
m.stats.LastVolumeCheck = time.Now() |
|
||||
|
|
||||
// Analyze volumes
|
|
||||
var readOnlyCount, fullVolumeCount, ecCandidates int |
|
||||
var totalSize, totalFiles uint64 |
|
||||
|
|
||||
for _, vol := range volumeStatus.Volumes { |
|
||||
totalSize += vol.Size |
|
||||
totalFiles += uint64(vol.FileCount) |
|
||||
|
|
||||
if vol.ReadOnly { |
|
||||
readOnlyCount++ |
|
||||
} |
|
||||
|
|
||||
// Volume is close to full (>40MB for 50MB limit)
|
|
||||
if vol.Size > 40*1024*1024 { |
|
||||
fullVolumeCount++ |
|
||||
if !vol.ReadOnly { |
|
||||
ecCandidates++ |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
log.Printf("Volume analysis: Total=%d, ReadOnly=%d, Full=%d, EC_Candidates=%d", |
|
||||
len(volumeStatus.Volumes), readOnlyCount, fullVolumeCount, ecCandidates) |
|
||||
log.Printf("Storage stats: Total_Size=%.2fMB, Total_Files=%d", |
|
||||
float64(totalSize)/(1024*1024), totalFiles) |
|
||||
|
|
||||
if ecCandidates > 0 { |
|
||||
log.Printf("⚠️ DETECTED %d volumes that should be EC'd!", ecCandidates) |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
// healthHandler provides a health endpoint for the monitor itself
|
|
||||
func (m *Monitor) healthHandler(w http.ResponseWriter, r *http.Request) { |
|
||||
w.Header().Set("Content-Type", "application/json") |
|
||||
json.NewEncoder(w).Encode(map[string]interface{}{ |
|
||||
"status": "healthy", |
|
||||
"uptime": time.Since(m.startTime).String(), |
|
||||
"checks": m.stats.TotalChecks, |
|
||||
"last_check": m.stats.LastVolumeCheck.Format(time.RFC3339), |
|
||||
}) |
|
||||
} |
|
||||
|
|
||||
// statusHandler provides detailed monitoring status
|
|
||||
func (m *Monitor) statusHandler(w http.ResponseWriter, r *http.Request) { |
|
||||
w.Header().Set("Content-Type", "application/json") |
|
||||
json.NewEncoder(w).Encode(map[string]interface{}{ |
|
||||
"monitor": map[string]interface{}{ |
|
||||
"uptime": time.Since(m.startTime).String(), |
|
||||
"master_addr": m.masterAddr, |
|
||||
"admin_addr": m.adminAddr, |
|
||||
"filer_addr": m.filerAddr, |
|
||||
"interval": m.interval.String(), |
|
||||
}, |
|
||||
"stats": m.stats, |
|
||||
"health": map[string]interface{}{ |
|
||||
"master_healthy": m.stats.MasterHealthy > 0 && time.Since(m.stats.LastVolumeCheck) < 2*m.interval, |
|
||||
"admin_healthy": m.stats.AdminHealthy > 0 && time.Since(m.stats.LastWorkerCheck) < 2*m.interval, |
|
||||
"filer_healthy": m.stats.FilerHealthy > 0, |
|
||||
}, |
|
||||
}) |
|
||||
} |
|
||||
|
|
||||
// runMonitoring runs the main monitoring loop
|
|
||||
func (m *Monitor) runMonitoring() { |
|
||||
ticker := time.NewTicker(m.interval) |
|
||||
defer ticker.Stop() |
|
||||
|
|
||||
log.Printf("Starting monitoring loop every %v", m.interval) |
|
||||
|
|
||||
for { |
|
||||
m.stats.TotalChecks++ |
|
||||
|
|
||||
log.Printf("=== Monitoring Check #%d ===", m.stats.TotalChecks) |
|
||||
|
|
||||
// Check master health
|
|
||||
if m.checkMasterHealth() { |
|
||||
// If master is healthy, check volumes
|
|
||||
m.checkVolumeStatus() |
|
||||
} |
|
||||
|
|
||||
// Check admin health
|
|
||||
m.checkAdminHealth() |
|
||||
|
|
||||
// Check filer health
|
|
||||
m.checkFilerHealth() |
|
||||
|
|
||||
// Print summary
|
|
||||
log.Printf("Health Summary: Master=%t, Admin=%t, Filer=%t, Volumes=%d, Workers=%d", |
|
||||
m.stats.MasterHealthy > 0, |
|
||||
m.stats.AdminHealthy > 0, |
|
||||
m.stats.FilerHealthy > 0, |
|
||||
m.stats.VolumeCount, |
|
||||
m.stats.WorkersActive) |
|
||||
|
|
||||
log.Printf("=== End Check #%d ===", m.stats.TotalChecks) |
|
||||
|
|
||||
<-ticker.C |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
func main() { |
|
||||
masterAddr := os.Getenv("MASTER_ADDRESS") |
|
||||
if masterAddr == "" { |
|
||||
masterAddr = "master:9333" |
|
||||
} |
|
||||
|
|
||||
adminAddr := os.Getenv("ADMIN_ADDRESS") |
|
||||
if adminAddr == "" { |
|
||||
adminAddr = "admin:9900" |
|
||||
} |
|
||||
|
|
||||
filerAddr := os.Getenv("FILER_ADDRESS") |
|
||||
if filerAddr == "" { |
|
||||
filerAddr = "filer:8888" |
|
||||
} |
|
||||
|
|
||||
intervalStr := os.Getenv("MONITOR_INTERVAL") |
|
||||
interval, err := time.ParseDuration(intervalStr) |
|
||||
if err != nil { |
|
||||
interval = 10 * time.Second |
|
||||
} |
|
||||
|
|
||||
monitor := &Monitor{ |
|
||||
masterAddr: masterAddr, |
|
||||
adminAddr: adminAddr, |
|
||||
filerAddr: filerAddr, |
|
||||
interval: interval, |
|
||||
startTime: time.Now(), |
|
||||
stats: MonitorStats{}, |
|
||||
} |
|
||||
|
|
||||
log.Printf("Starting SeaweedFS Cluster Monitor") |
|
||||
log.Printf("Master: %s", masterAddr) |
|
||||
log.Printf("Admin: %s", adminAddr) |
|
||||
log.Printf("Filer: %s", filerAddr) |
|
||||
log.Printf("Interval: %v", interval) |
|
||||
|
|
||||
// Setup HTTP endpoints
|
|
||||
http.HandleFunc("/health", monitor.healthHandler) |
|
||||
http.HandleFunc("/status", monitor.statusHandler) |
|
||||
|
|
||||
// Start HTTP server in background
|
|
||||
go func() { |
|
||||
log.Println("Monitor HTTP server starting on :9999") |
|
||||
if err := http.ListenAndServe(":9999", nil); err != nil { |
|
||||
log.Printf("Monitor HTTP server error: %v", err) |
|
||||
} |
|
||||
}() |
|
||||
|
|
||||
// Wait for services to be ready
|
|
||||
log.Println("Waiting for services to be ready...") |
|
||||
for { |
|
||||
masterOK := false |
|
||||
adminOK := false |
|
||||
filerOK := false |
|
||||
|
|
||||
if resp, err := http.Get(fmt.Sprintf("http://%s/cluster/status", masterAddr)); err == nil && resp.StatusCode == http.StatusOK { |
|
||||
masterOK = true |
|
||||
resp.Body.Close() |
|
||||
} |
|
||||
|
|
||||
if resp, err := http.Get(fmt.Sprintf("http://%s/health", adminAddr)); err == nil && resp.StatusCode == http.StatusOK { |
|
||||
adminOK = true |
|
||||
resp.Body.Close() |
|
||||
} |
|
||||
|
|
||||
if resp, err := http.Get(fmt.Sprintf("http://%s/", filerAddr)); err == nil && resp.StatusCode == http.StatusOK { |
|
||||
filerOK = true |
|
||||
resp.Body.Close() |
|
||||
} |
|
||||
|
|
||||
if masterOK && adminOK && filerOK { |
|
||||
log.Println("All services are ready!") |
|
||||
break |
|
||||
} |
|
||||
|
|
||||
log.Printf("Services ready: Master=%t, Admin=%t, Filer=%t", masterOK, adminOK, filerOK) |
|
||||
time.Sleep(5 * time.Second) |
|
||||
} |
|
||||
|
|
||||
// Start monitoring
|
|
||||
monitor.runMonitoring() |
|
||||
} |
|
||||
@ -1,106 +0,0 @@ |
|||||
#!/bin/bash |
|
||||
|
|
||||
set -e |
|
||||
|
|
||||
# Colors for output |
|
||||
RED='\033[0;31m' |
|
||||
GREEN='\033[0;32m' |
|
||||
YELLOW='\033[1;33m' |
|
||||
BLUE='\033[0;34m' |
|
||||
NC='\033[0m' # No Color |
|
||||
|
|
||||
echo -e "${BLUE}🧪 SeaweedFS EC Worker Testing Environment${NC}" |
|
||||
echo -e "${BLUE}===========================================${NC}" |
|
||||
|
|
||||
# Check if docker-compose is available |
|
||||
if ! command -v docker-compose &> /dev/null; then |
|
||||
echo -e "${RED}❌ docker-compose is required but not installed${NC}" |
|
||||
exit 1 |
|
||||
fi |
|
||||
|
|
||||
# Create necessary directories |
|
||||
echo -e "${YELLOW}📁 Creating required directories...${NC}" |
|
||||
mkdir -p monitor-data admin-config |
|
||||
|
|
||||
# Make scripts executable |
|
||||
echo -e "${YELLOW}🔧 Making scripts executable...${NC}" |
|
||||
chmod +x *.sh |
|
||||
|
|
||||
# Stop any existing containers |
|
||||
echo -e "${YELLOW}🛑 Stopping any existing containers...${NC}" |
|
||||
docker-compose -f docker-compose-ec-test.yml down -v 2>/dev/null || true |
|
||||
|
|
||||
# Build and start the environment |
|
||||
echo -e "${GREEN}🚀 Starting SeaweedFS EC testing environment...${NC}" |
|
||||
echo -e "${BLUE}This will start:${NC}" |
|
||||
echo -e " • 1 Master server (port 9333)" |
|
||||
echo -e " • 6 Volume servers (ports 8080-8085) with 50MB volume limit" |
|
||||
echo -e " • 1 Filer (port 8888)" |
|
||||
echo -e " • 1 Admin server (port 9900)" |
|
||||
echo -e " • 3 EC Workers" |
|
||||
echo -e " • 1 Load generator (continuous read/write)" |
|
||||
echo -e " • 1 Monitor (port 9999)" |
|
||||
echo "" |
|
||||
|
|
||||
docker-compose -f docker-compose-ec-test.yml up --build -d |
|
||||
|
|
||||
echo -e "${GREEN}✅ Environment started successfully!${NC}" |
|
||||
echo "" |
|
||||
echo -e "${BLUE}📊 Monitoring URLs:${NC}" |
|
||||
echo -e " • Master UI: http://localhost:9333" |
|
||||
echo -e " • Filer: http://localhost:8888" |
|
||||
echo -e " • Admin Server: http://localhost:9900/status" |
|
||||
echo -e " • Monitor: http://localhost:9999/status" |
|
||||
echo "" |
|
||||
echo -e "${BLUE}📈 Volume Servers:${NC}" |
|
||||
echo -e " • Volume1: http://localhost:8080/status" |
|
||||
echo -e " • Volume2: http://localhost:8081/status" |
|
||||
echo -e " • Volume3: http://localhost:8082/status" |
|
||||
echo -e " • Volume4: http://localhost:8083/status" |
|
||||
echo -e " • Volume5: http://localhost:8084/status" |
|
||||
echo -e " • Volume6: http://localhost:8085/status" |
|
||||
echo "" |
|
||||
|
|
||||
echo -e "${YELLOW}⏳ Waiting for services to be ready...${NC}" |
|
||||
sleep 10 |
|
||||
|
|
||||
# Check service health |
|
||||
echo -e "${BLUE}🔍 Checking service health...${NC}" |
|
||||
|
|
||||
check_service() { |
|
||||
local name=$1 |
|
||||
local url=$2 |
|
||||
|
|
||||
if curl -s "$url" > /dev/null 2>&1; then |
|
||||
echo -e " ✅ $name: ${GREEN}Healthy${NC}" |
|
||||
return 0 |
|
||||
else |
|
||||
echo -e " ❌ $name: ${RED}Not responding${NC}" |
|
||||
return 1 |
|
||||
fi |
|
||||
} |
|
||||
|
|
||||
check_service "Master" "http://localhost:9333/cluster/status" |
|
||||
check_service "Filer" "http://localhost:8888/" |
|
||||
check_service "Admin" "http://localhost:9900/health" |
|
||||
check_service "Monitor" "http://localhost:9999/health" |
|
||||
|
|
||||
echo "" |
|
||||
echo -e "${GREEN}🎯 Test Environment is Ready!${NC}" |
|
||||
echo "" |
|
||||
echo -e "${BLUE}What's happening:${NC}" |
|
||||
echo -e " 1. 📝 Load generator continuously writes 1-5MB files at 10 files/sec" |
|
||||
echo -e " 2. 🗑️ Load generator deletes files at 2 files/sec" |
|
||||
echo -e " 3. 📊 Volumes fill up to 50MB limit and trigger EC conversion" |
|
||||
echo -e " 4. 🏭 Admin server detects volumes needing EC and assigns to workers" |
|
||||
echo -e " 5. ⚡ Workers perform comprehensive EC (copy→encode→distribute)" |
|
||||
echo -e " 6. 📈 Monitor tracks all activity and volume states" |
|
||||
echo "" |
|
||||
echo -e "${YELLOW}📋 Useful Commands:${NC}" |
|
||||
echo -e " • View logs: docker-compose -f docker-compose-ec-test.yml logs -f [service]" |
|
||||
echo -e " • Check worker status: docker-compose -f docker-compose-ec-test.yml logs worker1" |
|
||||
echo -e " • Stop environment: docker-compose -f docker-compose-ec-test.yml down -v" |
|
||||
echo -e " • Monitor logs: docker-compose -f docker-compose-ec-test.yml logs -f monitor" |
|
||||
echo "" |
|
||||
echo -e "${GREEN}🔥 The test will run for 1 hour by default${NC}" |
|
||||
echo -e "${BLUE}Monitor progress at: http://localhost:9999/status${NC}" |
|
||||
@ -0,0 +1,73 @@ |
|||||
|
#!/bin/bash |
||||
|
|
||||
|
set -e |
||||
|
|
||||
|
echo "🧪 Testing SeaweedFS Admin-Worker Integration" |
||||
|
echo "=============================================" |
||||
|
|
||||
|
# Colors for output |
||||
|
RED='\033[0;31m' |
||||
|
GREEN='\033[0;32m' |
||||
|
YELLOW='\033[1;33m' |
||||
|
BLUE='\033[0;34m' |
||||
|
NC='\033[0m' # No Color |
||||
|
|
||||
|
cd "$(dirname "$0")" |
||||
|
|
||||
|
echo -e "${BLUE}1. Validating docker-compose configuration...${NC}" |
||||
|
if docker-compose -f docker-compose-ec-test.yml config > /dev/null; then |
||||
|
echo -e "${GREEN}✅ Docker compose configuration is valid${NC}" |
||||
|
else |
||||
|
echo -e "${RED}❌ Docker compose configuration is invalid${NC}" |
||||
|
exit 1 |
||||
|
fi |
||||
|
|
||||
|
echo -e "${BLUE}2. Checking if required ports are available...${NC}" |
||||
|
for port in 9333 8080 8081 8082 8083 8084 8085 8888 23646; do |
||||
|
if lsof -i :$port > /dev/null 2>&1; then |
||||
|
echo -e "${YELLOW}⚠️ Port $port is in use${NC}" |
||||
|
else |
||||
|
echo -e "${GREEN}✅ Port $port is available${NC}" |
||||
|
fi |
||||
|
done |
||||
|
|
||||
|
echo -e "${BLUE}3. Testing worker command syntax...${NC}" |
||||
|
# Test that the worker command in docker-compose has correct syntax |
||||
|
if docker-compose -f docker-compose-ec-test.yml config | grep -q "workingDir=/work"; then |
||||
|
echo -e "${GREEN}✅ Worker working directory option is properly configured${NC}" |
||||
|
else |
||||
|
echo -e "${RED}❌ Worker working directory option is missing${NC}" |
||||
|
exit 1 |
||||
|
fi |
||||
|
|
||||
|
echo -e "${BLUE}4. Verifying admin server configuration...${NC}" |
||||
|
if docker-compose -f docker-compose-ec-test.yml config | grep -q "admin:23646"; then |
||||
|
echo -e "${GREEN}✅ Admin server port configuration is correct${NC}" |
||||
|
else |
||||
|
echo -e "${RED}❌ Admin server port configuration is incorrect${NC}" |
||||
|
exit 1 |
||||
|
fi |
||||
|
|
||||
|
echo -e "${BLUE}5. Checking service dependencies...${NC}" |
||||
|
if docker-compose -f docker-compose-ec-test.yml config | grep -q "depends_on"; then |
||||
|
echo -e "${GREEN}✅ Service dependencies are configured${NC}" |
||||
|
else |
||||
|
echo -e "${YELLOW}⚠️ Service dependencies may not be configured${NC}" |
||||
|
fi |
||||
|
|
||||
|
echo "" |
||||
|
echo -e "${GREEN}🎉 Integration test configuration is ready!${NC}" |
||||
|
echo "" |
||||
|
echo -e "${BLUE}To start the integration test:${NC}" |
||||
|
echo " make start # Start all services" |
||||
|
echo " make health # Check service health" |
||||
|
echo " make logs # View logs" |
||||
|
echo " make stop # Stop all services" |
||||
|
echo "" |
||||
|
echo -e "${BLUE}Key features verified:${NC}" |
||||
|
echo " ✅ Official SeaweedFS images are used" |
||||
|
echo " ✅ Worker working directories are configured" |
||||
|
echo " ✅ Admin-worker communication on correct ports" |
||||
|
echo " ✅ Task-specific directories will be created" |
||||
|
echo " ✅ Load generator will trigger EC tasks" |
||||
|
echo " ✅ Monitor will track progress" |
||||
@ -1,230 +0,0 @@ |
|||||
#!/bin/sh |
|
||||
|
|
||||
set -e |
|
||||
|
|
||||
echo "Starting SeaweedFS EC Worker..." |
|
||||
echo "Worker ID: $WORKER_ID" |
|
||||
echo "Admin Address: $ADMIN_ADDRESS" |
|
||||
echo "Capabilities: $CAPABILITIES" |
|
||||
|
|
||||
# Wait for admin server to be ready |
|
||||
echo "Waiting for admin server to be ready..." |
|
||||
until curl -f http://$ADMIN_ADDRESS/health > /dev/null 2>&1; do |
|
||||
echo "Admin server not ready, waiting..." |
|
||||
sleep 5 |
|
||||
done |
|
||||
echo "Admin server is ready!" |
|
||||
|
|
||||
# Create worker simulation |
|
||||
cat > /tmp/worker.go << 'EOF' |
|
||||
package main |
|
||||
|
|
||||
import ( |
|
||||
"encoding/json" |
|
||||
"fmt" |
|
||||
"log" |
|
||||
"net/http" |
|
||||
"os" |
|
||||
"strings" |
|
||||
"time" |
|
||||
) |
|
||||
|
|
||||
type Worker struct { |
|
||||
id string |
|
||||
adminAddr string |
|
||||
address string |
|
||||
capabilities []string |
|
||||
maxConcurrent int |
|
||||
workDir string |
|
||||
startTime time.Time |
|
||||
activeTasks map[string]*Task |
|
||||
} |
|
||||
|
|
||||
type Task struct { |
|
||||
ID string `json:"id"` |
|
||||
Type string `json:"type"` |
|
||||
VolumeID int `json:"volume_id"` |
|
||||
Status string `json:"status"` |
|
||||
Progress float64 `json:"progress"` |
|
||||
Started time.Time `json:"started"` |
|
||||
} |
|
||||
|
|
||||
func (w *Worker) healthHandler(res http.ResponseWriter, req *http.Request) { |
|
||||
res.Header().Set("Content-Type", "application/json") |
|
||||
json.NewEncoder(res).Encode(map[string]interface{}{ |
|
||||
"status": "healthy", |
|
||||
"worker_id": w.id, |
|
||||
"uptime": time.Since(w.startTime).String(), |
|
||||
"active_tasks": len(w.activeTasks), |
|
||||
"capabilities": w.capabilities, |
|
||||
}) |
|
||||
} |
|
||||
|
|
||||
func (w *Worker) statusHandler(res http.ResponseWriter, req *http.Request) { |
|
||||
res.Header().Set("Content-Type", "application/json") |
|
||||
json.NewEncoder(res).Encode(map[string]interface{}{ |
|
||||
"worker_id": w.id, |
|
||||
"admin_addr": w.adminAddr, |
|
||||
"capabilities": w.capabilities, |
|
||||
"max_concurrent": w.maxConcurrent, |
|
||||
"active_tasks": w.activeTasks, |
|
||||
"uptime": time.Since(w.startTime).String(), |
|
||||
}) |
|
||||
} |
|
||||
|
|
||||
func (w *Worker) simulateECTask(taskID string, volumeID int) { |
|
||||
log.Printf("Starting EC task %s for volume %d", taskID, volumeID) |
|
||||
|
|
||||
task := &Task{ |
|
||||
ID: taskID, |
|
||||
Type: "erasure_coding", |
|
||||
VolumeID: volumeID, |
|
||||
Status: "running", |
|
||||
Progress: 0.0, |
|
||||
Started: time.Now(), |
|
||||
} |
|
||||
|
|
||||
w.activeTasks[taskID] = task |
|
||||
|
|
||||
// Simulate EC process phases |
|
||||
phases := []struct { |
|
||||
progress float64 |
|
||||
phase string |
|
||||
duration time.Duration |
|
||||
}{ |
|
||||
{5.0, "Copying volume data locally", 10 * time.Second}, |
|
||||
{25.0, "Marking volume read-only", 2 * time.Second}, |
|
||||
{60.0, "Performing local EC encoding", 30 * time.Second}, |
|
||||
{70.0, "Calculating optimal shard placement", 5 * time.Second}, |
|
||||
{90.0, "Distributing shards to servers", 20 * time.Second}, |
|
||||
{100.0, "Verification and cleanup", 3 * time.Second}, |
|
||||
} |
|
||||
|
|
||||
go func() { |
|
||||
for _, phase := range phases { |
|
||||
if task.Status != "running" { |
|
||||
break |
|
||||
} |
|
||||
|
|
||||
time.Sleep(phase.duration) |
|
||||
task.Progress = phase.progress |
|
||||
log.Printf("Task %s: %.1f%% - %s", taskID, phase.progress, phase.phase) |
|
||||
} |
|
||||
|
|
||||
if task.Status == "running" { |
|
||||
task.Status = "completed" |
|
||||
task.Progress = 100.0 |
|
||||
log.Printf("Task %s completed successfully", taskID) |
|
||||
} |
|
||||
|
|
||||
// Remove from active tasks after completion |
|
||||
time.Sleep(5 * time.Second) |
|
||||
delete(w.activeTasks, taskID) |
|
||||
}() |
|
||||
} |
|
||||
|
|
||||
func (w *Worker) registerWithAdmin() { |
|
||||
ticker := time.NewTicker(30 * time.Second) |
|
||||
go func() { |
|
||||
for { |
|
||||
// Register/heartbeat with admin server |
|
||||
log.Printf("Sending heartbeat to admin server...") |
|
||||
|
|
||||
data := map[string]interface{}{ |
|
||||
"worker_id": w.id, |
|
||||
"address": w.address, |
|
||||
"capabilities": w.capabilities, |
|
||||
"max_concurrent": w.maxConcurrent, |
|
||||
"active_tasks": len(w.activeTasks), |
|
||||
"status": "active", |
|
||||
} |
|
||||
|
|
||||
jsonData, _ := json.Marshal(data) |
|
||||
|
|
||||
// In real implementation, this would be a proper gRPC call |
|
||||
resp, err := http.Post( |
|
||||
fmt.Sprintf("http://%s/register-worker", w.adminAddr), |
|
||||
"application/json", |
|
||||
strings.NewReader(string(jsonData)), |
|
||||
) |
|
||||
if err != nil { |
|
||||
log.Printf("Failed to register with admin: %v", err) |
|
||||
} else { |
|
||||
resp.Body.Close() |
|
||||
log.Printf("Successfully sent heartbeat to admin") |
|
||||
} |
|
||||
|
|
||||
// Simulate requesting new tasks |
|
||||
if len(w.activeTasks) < w.maxConcurrent { |
|
||||
// In real implementation, worker would request tasks from admin |
|
||||
// For simulation, we'll create some tasks periodically |
|
||||
if len(w.activeTasks) == 0 && time.Since(w.startTime) > 1*time.Minute { |
|
||||
taskID := fmt.Sprintf("%s-task-%d", w.id, time.Now().Unix()) |
|
||||
volumeID := 2000 + int(time.Now().Unix()%1000) |
|
||||
w.simulateECTask(taskID, volumeID) |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
<-ticker.C |
|
||||
} |
|
||||
}() |
|
||||
} |
|
||||
|
|
||||
func main() { |
|
||||
workerID := os.Getenv("WORKER_ID") |
|
||||
if workerID == "" { |
|
||||
workerID = "worker-1" |
|
||||
} |
|
||||
|
|
||||
adminAddr := os.Getenv("ADMIN_ADDRESS") |
|
||||
if adminAddr == "" { |
|
||||
adminAddr = "admin:9900" |
|
||||
} |
|
||||
|
|
||||
address := os.Getenv("WORKER_ADDRESS") |
|
||||
if address == "" { |
|
||||
address = "worker:9001" |
|
||||
} |
|
||||
|
|
||||
capabilities := strings.Split(os.Getenv("CAPABILITIES"), ",") |
|
||||
if len(capabilities) == 0 || capabilities[0] == "" { |
|
||||
capabilities = []string{"erasure_coding"} |
|
||||
} |
|
||||
|
|
||||
worker := &Worker{ |
|
||||
id: workerID, |
|
||||
adminAddr: adminAddr, |
|
||||
address: address, |
|
||||
capabilities: capabilities, |
|
||||
maxConcurrent: 2, |
|
||||
workDir: "/work", |
|
||||
startTime: time.Now(), |
|
||||
activeTasks: make(map[string]*Task), |
|
||||
} |
|
||||
|
|
||||
http.HandleFunc("/health", worker.healthHandler) |
|
||||
http.HandleFunc("/status", worker.statusHandler) |
|
||||
|
|
||||
// Start registration and heartbeat |
|
||||
worker.registerWithAdmin() |
|
||||
|
|
||||
log.Printf("Worker %s starting on address %s", workerID, address) |
|
||||
log.Printf("Admin address: %s", adminAddr) |
|
||||
log.Printf("Capabilities: %v", capabilities) |
|
||||
|
|
||||
port := ":9001" |
|
||||
if strings.Contains(address, ":") { |
|
||||
parts := strings.Split(address, ":") |
|
||||
port = ":" + parts[1] |
|
||||
} |
|
||||
|
|
||||
if err := http.ListenAndServe(port, nil); err != nil { |
|
||||
log.Fatal("Worker failed to start:", err) |
|
||||
} |
|
||||
} |
|
||||
EOF |
|
||||
|
|
||||
# Compile and run the worker |
|
||||
cd /tmp |
|
||||
go mod init worker |
|
||||
go run worker.go |
|
||||
@ -1,67 +0,0 @@ |
|||||
#!/bin/sh |
|
||||
|
|
||||
set -e |
|
||||
|
|
||||
echo "Starting SeaweedFS EC Worker (gRPC)..." |
|
||||
echo "Worker ID: $WORKER_ID" |
|
||||
echo "Admin gRPC Address: $ADMIN_GRPC_ADDRESS" |
|
||||
|
|
||||
# Wait for admin to be ready |
|
||||
echo "Waiting for admin to be ready..." |
|
||||
until curl -f http://admin:9900/health > /dev/null 2>&1; do |
|
||||
echo "Admin not ready, waiting..." |
|
||||
sleep 5 |
|
||||
done |
|
||||
echo "Admin is ready!" |
|
||||
|
|
||||
# Install protobuf compiler and Go protobuf plugins |
|
||||
apk add --no-cache protobuf protobuf-dev |
|
||||
|
|
||||
# Set up Go environment |
|
||||
export GOPATH=/tmp/go |
|
||||
export PATH=$PATH:$GOPATH/bin |
|
||||
mkdir -p $GOPATH/src $GOPATH/bin $GOPATH/pkg |
|
||||
|
|
||||
# Install Go protobuf plugins |
|
||||
cd /tmp |
|
||||
go mod init worker-client |
|
||||
|
|
||||
# Create a basic go.mod with required dependencies |
|
||||
cat > go.mod << 'EOF' |
|
||||
module worker-client |
|
||||
|
|
||||
go 1.24 |
|
||||
|
|
||||
require ( |
|
||||
google.golang.org/grpc v1.65.0 |
|
||||
google.golang.org/protobuf v1.34.2 |
|
||||
) |
|
||||
EOF |
|
||||
|
|
||||
go mod tidy |
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest |
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest |
|
||||
|
|
||||
# Add Go bin to PATH |
|
||||
export PATH=$PATH:$(go env GOPATH)/bin |
|
||||
|
|
||||
# Create directory structure for protobuf |
|
||||
mkdir -p worker_pb |
|
||||
|
|
||||
# Copy the worker client source and existing worker protobuf file |
|
||||
cp /worker_grpc_client.go . |
|
||||
cp /worker.proto . |
|
||||
|
|
||||
# Generate Go code from the existing worker protobuf |
|
||||
echo "Generating gRPC code from worker.proto..." |
|
||||
protoc --go_out=. --go_opt=paths=source_relative \ |
|
||||
--go-grpc_out=. --go-grpc_opt=paths=source_relative \ |
|
||||
worker.proto |
|
||||
|
|
||||
# Build and run the worker |
|
||||
echo "Building worker..." |
|
||||
go mod tidy |
|
||||
go build -o worker-client worker_grpc_client.go |
|
||||
|
|
||||
echo "Starting worker..." |
|
||||
exec ./worker-client |
|
||||
@ -1,67 +0,0 @@ |
|||||
package main |
|
||||
|
|
||||
import ( |
|
||||
"flag" |
|
||||
"fmt" |
|
||||
"os" |
|
||||
"os/signal" |
|
||||
"syscall" |
|
||||
"time" |
|
||||
|
|
||||
"github.com/seaweedfs/seaweedfs/weed/glog" |
|
||||
"github.com/seaweedfs/seaweedfs/weed/worker" |
|
||||
) |
|
||||
|
|
||||
var ( |
|
||||
workerID = flag.String("worker.id", "", "Worker ID (required)") |
|
||||
adminAddr = flag.String("admin.address", "localhost:9090", "Admin server address") |
|
||||
grpcAddr = flag.String("grpc.address", "localhost:18000", "Worker gRPC address") |
|
||||
logLevel = flag.Int("log.level", 1, "Log level (0-4)") |
|
||||
) |
|
||||
|
|
||||
func main() { |
|
||||
flag.Parse() |
|
||||
|
|
||||
// Validate required flags
|
|
||||
if *workerID == "" { |
|
||||
fmt.Fprintf(os.Stderr, "Error: worker.id is required\n") |
|
||||
flag.Usage() |
|
||||
os.Exit(1) |
|
||||
} |
|
||||
|
|
||||
// Set log level
|
|
||||
flag.Set("v", fmt.Sprintf("%d", *logLevel)) |
|
||||
|
|
||||
glog.Infof("Starting SeaweedFS EC Worker") |
|
||||
glog.Infof("Worker ID: %s", *workerID) |
|
||||
glog.Infof("Admin Address: %s", *adminAddr) |
|
||||
glog.Infof("gRPC Address: %s", *grpcAddr) |
|
||||
|
|
||||
// Create worker
|
|
||||
ecWorker := worker.NewECWorker(*workerID, *adminAddr, *grpcAddr) |
|
||||
|
|
||||
// Start worker
|
|
||||
err := ecWorker.Start() |
|
||||
if err != nil { |
|
||||
glog.Fatalf("Failed to start worker: %v", err) |
|
||||
} |
|
||||
|
|
||||
// Wait for shutdown signal
|
|
||||
waitForShutdown(ecWorker) |
|
||||
|
|
||||
glog.Infof("Worker %s shutdown complete", *workerID) |
|
||||
} |
|
||||
|
|
||||
// waitForShutdown waits for shutdown signal and gracefully stops the worker
|
|
||||
func waitForShutdown(worker *worker.ECWorker) { |
|
||||
sigCh := make(chan os.Signal, 1) |
|
||||
signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) |
|
||||
|
|
||||
<-sigCh |
|
||||
glog.Infof("Shutdown signal received, stopping worker...") |
|
||||
|
|
||||
worker.Stop() |
|
||||
|
|
||||
// Give a moment for cleanup
|
|
||||
time.Sleep(2 * time.Second) |
|
||||
} |
|
||||
Write
Preview
Loading…
Cancel
Save
Reference in new issue