You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

465 lines
12 KiB

package main
import (
"bytes"
"context"
"encoding/json"
"flag"
"fmt"
"io"
"log"
"net/http"
"os"
"os/signal"
"strings"
"sync"
"syscall"
"time"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/seaweedfs/seaweedfs/test/kafka/kafka-client-loadtest/internal/config"
"github.com/seaweedfs/seaweedfs/test/kafka/kafka-client-loadtest/internal/consumer"
"github.com/seaweedfs/seaweedfs/test/kafka/kafka-client-loadtest/internal/metrics"
"github.com/seaweedfs/seaweedfs/test/kafka/kafka-client-loadtest/internal/producer"
"github.com/seaweedfs/seaweedfs/test/kafka/kafka-client-loadtest/internal/schema"
)
var (
configFile = flag.String("config", "/config/loadtest.yaml", "Path to configuration file")
testMode = flag.String("mode", "", "Test mode override (producer|consumer|comprehensive)")
duration = flag.Duration("duration", 0, "Test duration override")
help = flag.Bool("help", false, "Show help")
)
func main() {
flag.Parse()
if *help {
printHelp()
return
}
// Load configuration
cfg, err := config.Load(*configFile)
if err != nil {
log.Fatalf("Failed to load configuration: %v", err)
}
// Override configuration with environment variables and flags
cfg.ApplyOverrides(*testMode, *duration)
// Initialize metrics
metricsCollector := metrics.NewCollector()
// Start metrics HTTP server
go func() {
http.Handle("/metrics", promhttp.Handler())
http.HandleFunc("/health", healthCheck)
http.HandleFunc("/stats", func(w http.ResponseWriter, r *http.Request) {
metricsCollector.WriteStats(w)
})
log.Printf("Starting metrics server on :8080")
if err := http.ListenAndServe(":8080", nil); err != nil {
log.Printf("Metrics server error: %v", err)
}
}()
// Set up signal handling
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
log.Printf("Starting Kafka Client Load Test")
log.Printf("Mode: %s, Duration: %v", cfg.TestMode, cfg.Duration)
log.Printf("Kafka Brokers: %v", cfg.Kafka.BootstrapServers)
log.Printf("Schema Registry: %s", cfg.SchemaRegistry.URL)
log.Printf("Schemas Enabled: %v", cfg.Schemas.Enabled)
// Register schemas if enabled
if cfg.Schemas.Enabled {
log.Printf("Registering schemas with Schema Registry...")
if err := registerSchemas(cfg); err != nil {
log.Fatalf("Failed to register schemas: %v", err)
}
log.Printf("Schemas registered successfully")
}
var wg sync.WaitGroup
// Start test based on mode
var testErr error
switch cfg.TestMode {
case "producer":
testErr = runProducerTest(ctx, cfg, metricsCollector, &wg)
case "consumer":
testErr = runConsumerTest(ctx, cfg, metricsCollector, &wg)
case "comprehensive":
testErr = runComprehensiveTest(ctx, cancel, cfg, metricsCollector, &wg)
default:
log.Fatalf("Unknown test mode: %s", cfg.TestMode)
}
// If test returned an error (e.g., circuit breaker), exit
if testErr != nil {
log.Printf("Test failed with error: %v", testErr)
cancel() // Cancel context to stop any remaining goroutines
return
}
// Wait for completion or signal
done := make(chan struct{})
go func() {
wg.Wait()
close(done)
}()
select {
case <-sigCh:
log.Printf("Received shutdown signal, stopping tests...")
cancel()
// Wait for graceful shutdown with timeout
shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 30*time.Second)
defer shutdownCancel()
select {
case <-done:
log.Printf("All tests completed gracefully")
case <-shutdownCtx.Done():
log.Printf("Shutdown timeout, forcing exit")
}
case <-done:
log.Printf("All tests completed")
}
// Print final statistics
log.Printf("Final Test Statistics:")
metricsCollector.PrintSummary()
}
func runProducerTest(ctx context.Context, cfg *config.Config, collector *metrics.Collector, wg *sync.WaitGroup) error {
log.Printf("Starting producer-only test with %d producers", cfg.Producers.Count)
errChan := make(chan error, cfg.Producers.Count)
for i := 0; i < cfg.Producers.Count; i++ {
wg.Add(1)
go func(id int) {
defer wg.Done()
prod, err := producer.New(cfg, collector, id)
if err != nil {
log.Printf("Failed to create producer %d: %v", id, err)
errChan <- err
return
}
defer prod.Close()
if err := prod.Run(ctx); err != nil {
log.Printf("Producer %d failed: %v", id, err)
errChan <- err
return
}
}(i)
}
// Wait for any producer error
select {
case err := <-errChan:
log.Printf("Producer test failed: %v", err)
return err
default:
return nil
}
}
func runConsumerTest(ctx context.Context, cfg *config.Config, collector *metrics.Collector, wg *sync.WaitGroup) error {
log.Printf("Starting consumer-only test with %d consumers", cfg.Consumers.Count)
errChan := make(chan error, cfg.Consumers.Count)
for i := 0; i < cfg.Consumers.Count; i++ {
wg.Add(1)
go func(id int) {
defer wg.Done()
cons, err := consumer.New(cfg, collector, id)
if err != nil {
log.Printf("Failed to create consumer %d: %v", id, err)
errChan <- err
return
}
defer cons.Close()
cons.Run(ctx)
}(i)
}
// Consumers don't typically return errors in the same way, so just return nil
return nil
}
func runComprehensiveTest(ctx context.Context, cancel context.CancelFunc, cfg *config.Config, collector *metrics.Collector, wg *sync.WaitGroup) error {
log.Printf("Starting comprehensive test with %d producers and %d consumers",
cfg.Producers.Count, cfg.Consumers.Count)
errChan := make(chan error, cfg.Producers.Count)
// Create separate contexts for producers and consumers
producerCtx, producerCancel := context.WithCancel(ctx)
consumerCtx, consumerCancel := context.WithCancel(ctx)
// Start producers
for i := 0; i < cfg.Producers.Count; i++ {
wg.Add(1)
go func(id int) {
defer wg.Done()
prod, err := producer.New(cfg, collector, id)
if err != nil {
log.Printf("Failed to create producer %d: %v", id, err)
errChan <- err
return
}
defer prod.Close()
if err := prod.Run(producerCtx); err != nil {
log.Printf("Producer %d failed: %v", id, err)
errChan <- err
return
}
}(i)
}
// Wait briefly for producers to start producing messages
// Reduced from 5s to 2s to minimize message backlog
time.Sleep(2 * time.Second)
// Start consumers
for i := 0; i < cfg.Consumers.Count; i++ {
wg.Add(1)
go func(id int) {
defer wg.Done()
cons, err := consumer.New(cfg, collector, id)
if err != nil {
log.Printf("Failed to create consumer %d: %v", id, err)
return
}
defer cons.Close()
cons.Run(consumerCtx)
}(i)
}
// Check for producer errors
select {
case err := <-errChan:
log.Printf("Comprehensive test failed due to producer error: %v", err)
producerCancel()
consumerCancel()
return err
default:
// No immediate error, continue
}
// If duration is set, stop producers first, then allow consumers extra time to drain
if cfg.Duration > 0 {
go func() {
timer := time.NewTimer(cfg.Duration)
defer timer.Stop()
select {
case <-timer.C:
log.Printf("Test duration (%v) reached, stopping producers", cfg.Duration)
producerCancel()
// Allow consumers extra time to drain remaining messages
// Calculate drain time based on test duration (minimum 60s, up to test duration)
drainTime := 60 * time.Second
if cfg.Duration > drainTime {
drainTime = cfg.Duration // Match test duration for longer tests
}
log.Printf("Allowing %v for consumers to drain remaining messages...", drainTime)
time.Sleep(drainTime)
log.Printf("Stopping consumers after drain period")
consumerCancel()
cancel()
case <-ctx.Done():
// Context already cancelled
producerCancel()
consumerCancel()
}
}()
} else {
// No duration set, wait for cancellation and ensure cleanup
go func() {
<-ctx.Done()
producerCancel()
consumerCancel()
}()
}
return nil
}
func healthCheck(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
fmt.Fprint(w, "OK")
}
func printHelp() {
fmt.Printf(`Kafka Client Load Test for SeaweedFS
Usage: %s [options]
Options:
-config string
Path to configuration file (default "/config/loadtest.yaml")
-mode string
Test mode override (producer|consumer|comprehensive)
-duration duration
Test duration override
-help
Show this help message
Environment Variables:
KAFKA_BOOTSTRAP_SERVERS Comma-separated list of Kafka brokers
SCHEMA_REGISTRY_URL URL of the Schema Registry
TEST_DURATION Test duration (e.g., "5m", "300s")
TEST_MODE Test mode (producer|consumer|comprehensive)
PRODUCER_COUNT Number of producer instances
CONSUMER_COUNT Number of consumer instances
MESSAGE_RATE Messages per second per producer
MESSAGE_SIZE Message size in bytes
TOPIC_COUNT Number of topics to create
PARTITIONS_PER_TOPIC Number of partitions per topic
VALUE_TYPE Message value type (json/avro/binary)
Test Modes:
producer - Run only producers (generate load)
consumer - Run only consumers (consume existing messages)
comprehensive - Run both producers and consumers simultaneously
Example:
%s -config ./config/loadtest.yaml -mode comprehensive -duration 10m
`, os.Args[0], os.Args[0])
}
// registerSchemas registers schemas with Schema Registry for all topics
func registerSchemas(cfg *config.Config) error {
// Wait for Schema Registry to be ready
if err := waitForSchemaRegistry(cfg.SchemaRegistry.URL); err != nil {
return fmt.Errorf("schema registry not ready: %w", err)
}
// Register schemas for each topic with different formats for variety
topics := cfg.GetTopicNames()
// Determine schema formats - use different formats for different topics
// This provides comprehensive testing of all schema format variations
for i, topic := range topics {
var schemaFormat string
// Distribute topics across three schema formats for comprehensive testing
// Format 0: AVRO (default, most common)
// Format 1: JSON (modern, human-readable)
// Format 2: PROTOBUF (efficient binary format)
switch i % 3 {
case 0:
schemaFormat = "AVRO"
case 1:
schemaFormat = "JSON"
case 2:
schemaFormat = "PROTOBUF"
}
// Allow override from config if specified
if cfg.Producers.SchemaFormat != "" {
schemaFormat = cfg.Producers.SchemaFormat
}
if err := registerTopicSchema(cfg.SchemaRegistry.URL, topic, schemaFormat); err != nil {
return fmt.Errorf("failed to register schema for topic %s (format: %s): %w", topic, schemaFormat, err)
}
log.Printf("Schema registered for topic %s with format: %s", topic, schemaFormat)
}
return nil
}
// waitForSchemaRegistry waits for Schema Registry to be ready
func waitForSchemaRegistry(url string) error {
maxRetries := 30
for i := 0; i < maxRetries; i++ {
resp, err := http.Get(url + "/subjects")
if err == nil && resp.StatusCode == 200 {
resp.Body.Close()
return nil
}
if resp != nil {
resp.Body.Close()
}
time.Sleep(2 * time.Second)
}
return fmt.Errorf("schema registry not ready after %d retries", maxRetries)
}
// registerTopicSchema registers a schema for a specific topic
func registerTopicSchema(registryURL, topicName, schemaFormat string) error {
// Determine schema format, default to AVRO
if schemaFormat == "" {
schemaFormat = "AVRO"
}
var schemaStr string
var schemaType string
switch strings.ToUpper(schemaFormat) {
case "AVRO":
schemaStr = schema.GetAvroSchema()
schemaType = "AVRO"
case "JSON", "JSON_SCHEMA":
schemaStr = schema.GetJSONSchema()
schemaType = "JSON"
case "PROTOBUF":
schemaStr = schema.GetProtobufSchema()
schemaType = "PROTOBUF"
default:
return fmt.Errorf("unsupported schema format: %s", schemaFormat)
}
schemaReq := map[string]interface{}{
"schema": schemaStr,
"schemaType": schemaType,
}
jsonData, err := json.Marshal(schemaReq)
if err != nil {
return err
}
// Register schema for topic value
subject := topicName + "-value"
url := fmt.Sprintf("%s/subjects/%s/versions", registryURL, subject)
client := &http.Client{Timeout: 10 * time.Second}
resp, err := client.Post(url, "application/vnd.schemaregistry.v1+json", bytes.NewBuffer(jsonData))
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
body, _ := io.ReadAll(resp.Body)
return fmt.Errorf("schema registration failed: status=%d, body=%s", resp.StatusCode, string(body))
}
log.Printf("Schema registered for topic %s (format: %s)", topicName, schemaType)
return nil
}