You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
695 lines
23 KiB
695 lines
23 KiB
package simulation
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"math/rand"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/admin/task"
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
"github.com/seaweedfs/seaweedfs/weed/worker/types"
|
|
)
|
|
|
|
// ComprehensiveSimulator tests all possible edge cases in volume/shard state management
|
|
type ComprehensiveSimulator struct {
|
|
stateManager *task.VolumeStateManager
|
|
mockMaster *MockMasterServer
|
|
mockWorkers []*MockWorker
|
|
scenarios []*StateTestScenario
|
|
currentScenario *StateTestScenario
|
|
results *SimulationResults
|
|
eventLog []*SimulationEvent
|
|
mutex sync.RWMutex
|
|
}
|
|
|
|
// StateTestScenario represents a specific state management test case
|
|
type StateTestScenario struct {
|
|
Name string
|
|
Description string
|
|
InitialState *ClusterState
|
|
EventSequence []*SimulationEvent
|
|
ExpectedFinalState *ClusterState
|
|
InconsistencyChecks []*InconsistencyCheck
|
|
Duration time.Duration
|
|
}
|
|
|
|
// ClusterState represents the complete state of the cluster
|
|
type ClusterState struct {
|
|
Volumes map[uint32]*task.VolumeInfo
|
|
ECShards map[uint32]map[int]*task.ShardInfo
|
|
ServerCapacity map[string]*task.CapacityInfo
|
|
InProgressTasks map[string]*task.TaskImpact
|
|
Timestamp time.Time
|
|
}
|
|
|
|
// SimulationEvent represents an event that can occur during simulation
|
|
type SimulationEvent struct {
|
|
Type EventType
|
|
Timestamp time.Time
|
|
VolumeID uint32
|
|
ShardID *int
|
|
Server string
|
|
TaskID string
|
|
Parameters map[string]interface{}
|
|
Description string
|
|
}
|
|
|
|
// EventType represents different types of simulation events
|
|
type EventType string
|
|
|
|
const (
|
|
// Volume events
|
|
EventVolumeCreated EventType = "volume_created"
|
|
EventVolumeDeleted EventType = "volume_deleted"
|
|
EventVolumeSizeChanged EventType = "volume_size_changed"
|
|
EventVolumeReadOnly EventType = "volume_readonly"
|
|
|
|
// Shard events
|
|
EventShardCreated EventType = "shard_created"
|
|
EventShardDeleted EventType = "shard_deleted"
|
|
EventShardMoved EventType = "shard_moved"
|
|
EventShardCorrupted EventType = "shard_corrupted"
|
|
|
|
// Task events
|
|
EventTaskStarted EventType = "task_started"
|
|
EventTaskCompleted EventType = "task_completed"
|
|
EventTaskFailed EventType = "task_failed"
|
|
EventTaskStuck EventType = "task_stuck"
|
|
EventTaskCancelled EventType = "task_cancelled"
|
|
|
|
// Worker events
|
|
EventWorkerJoined EventType = "worker_joined"
|
|
EventWorkerLeft EventType = "worker_left"
|
|
EventWorkerTimeout EventType = "worker_timeout"
|
|
EventWorkerRestarted EventType = "worker_restarted"
|
|
|
|
// Master events
|
|
EventMasterSync EventType = "master_sync"
|
|
EventMasterInconsistent EventType = "master_inconsistent"
|
|
EventMasterPartitioned EventType = "master_partitioned"
|
|
EventMasterReconnected EventType = "master_reconnected"
|
|
|
|
// Network events
|
|
EventNetworkPartition EventType = "network_partition"
|
|
EventNetworkHealed EventType = "network_healed"
|
|
EventMessageDelayed EventType = "message_delayed"
|
|
EventMessageLost EventType = "message_lost"
|
|
)
|
|
|
|
// InconsistencyCheck defines what inconsistencies to check for
|
|
type InconsistencyCheck struct {
|
|
Name string
|
|
Type task.InconsistencyType
|
|
ExpectedCount int
|
|
MaxAllowedCount int
|
|
SeverityThreshold task.SeverityLevel
|
|
}
|
|
|
|
// MockMasterServer simulates master server behavior with controllable inconsistencies
|
|
type MockMasterServer struct {
|
|
volumes map[uint32]*task.VolumeInfo
|
|
ecShards map[uint32]map[int]*task.ShardInfo
|
|
serverCapacity map[string]*task.CapacityInfo
|
|
inconsistencyMode bool
|
|
networkPartitioned bool
|
|
responseDelay time.Duration
|
|
mutex sync.RWMutex
|
|
}
|
|
|
|
// MockWorker represents a mock worker for testing
|
|
type MockWorker struct {
|
|
ID string
|
|
Capabilities []types.TaskType
|
|
IsActive bool
|
|
TaskDelay time.Duration
|
|
FailureRate float64
|
|
}
|
|
|
|
// SimulationResults tracks comprehensive simulation results
|
|
type SimulationResults struct {
|
|
ScenarioName string
|
|
StartTime time.Time
|
|
EndTime time.Time
|
|
Duration time.Duration
|
|
TotalEvents int
|
|
EventsByType map[EventType]int
|
|
InconsistenciesFound map[task.InconsistencyType]int
|
|
TasksExecuted int
|
|
TasksSucceeded int
|
|
TasksFailed int
|
|
StateValidationsPassed int
|
|
StateValidationsFailed int
|
|
CriticalErrors []string
|
|
Warnings []string
|
|
DetailedLog []string
|
|
Success bool
|
|
}
|
|
|
|
// NewComprehensiveSimulator creates a new comprehensive simulator
|
|
func NewComprehensiveSimulator() *ComprehensiveSimulator {
|
|
return &ComprehensiveSimulator{
|
|
stateManager: task.NewVolumeStateManager(nil),
|
|
mockMaster: NewMockMasterServer(),
|
|
scenarios: []*StateTestScenario{},
|
|
eventLog: []*SimulationEvent{},
|
|
results: &SimulationResults{
|
|
EventsByType: make(map[EventType]int),
|
|
InconsistenciesFound: make(map[task.InconsistencyType]int),
|
|
CriticalErrors: []string{},
|
|
Warnings: []string{},
|
|
DetailedLog: []string{},
|
|
},
|
|
}
|
|
}
|
|
|
|
// CreateComprehensiveScenarios creates all possible edge case scenarios
|
|
func (cs *ComprehensiveSimulator) CreateComprehensiveScenarios() {
|
|
cs.scenarios = []*StateTestScenario{
|
|
cs.createVolumeCreationDuringTaskScenario(),
|
|
cs.createVolumeDeletionDuringTaskScenario(),
|
|
cs.createShardCreationRaceConditionScenario(),
|
|
cs.createMasterSyncDuringTaskScenario(),
|
|
cs.createNetworkPartitionScenario(),
|
|
cs.createWorkerFailureDuringECScenario(),
|
|
cs.createConcurrentTasksScenario(),
|
|
cs.createCapacityOverflowScenario(),
|
|
cs.createShardCorruptionScenario(),
|
|
cs.createMasterInconsistencyScenario(),
|
|
cs.createTaskOrphanScenario(),
|
|
cs.createDuplicateTaskDetectionScenario(),
|
|
cs.createVolumeStateRollbackScenario(),
|
|
cs.createComplexECOperationScenario(),
|
|
cs.createHighLoadStressTestScenario(),
|
|
}
|
|
|
|
glog.Infof("Created %d comprehensive test scenarios", len(cs.scenarios))
|
|
}
|
|
|
|
// RunAllComprehensiveScenarios runs all edge case scenarios
|
|
func (cs *ComprehensiveSimulator) RunAllComprehensiveScenarios() (*SimulationResults, error) {
|
|
glog.Infof("Starting comprehensive state management simulation")
|
|
|
|
cs.results.StartTime = time.Now()
|
|
|
|
for _, scenario := range cs.scenarios {
|
|
glog.Infof("Running scenario: %s", scenario.Name)
|
|
|
|
if err := cs.RunScenario(scenario); err != nil {
|
|
cs.results.CriticalErrors = append(cs.results.CriticalErrors,
|
|
fmt.Sprintf("Scenario %s failed: %v", scenario.Name, err))
|
|
}
|
|
|
|
// Brief pause between scenarios
|
|
time.Sleep(1 * time.Second)
|
|
}
|
|
|
|
cs.results.EndTime = time.Now()
|
|
cs.results.Duration = cs.results.EndTime.Sub(cs.results.StartTime)
|
|
cs.results.Success = len(cs.results.CriticalErrors) == 0
|
|
|
|
cs.generateDetailedReport()
|
|
|
|
glog.Infof("Comprehensive simulation completed: %v", cs.results.Success)
|
|
return cs.results, nil
|
|
}
|
|
|
|
// Scenario creation methods
|
|
|
|
func (cs *ComprehensiveSimulator) createVolumeCreationDuringTaskScenario() *StateTestScenario {
|
|
return &StateTestScenario{
|
|
Name: "volume_creation_during_task",
|
|
Description: "Tests state consistency when master reports new volume while task is creating it",
|
|
InitialState: &ClusterState{
|
|
Volumes: make(map[uint32]*task.VolumeInfo),
|
|
ECShards: make(map[uint32]map[int]*task.ShardInfo),
|
|
},
|
|
EventSequence: []*SimulationEvent{
|
|
{Type: EventTaskStarted, VolumeID: 1, TaskID: "create_task_1", Parameters: map[string]interface{}{"type": "create"}},
|
|
{Type: EventVolumeCreated, VolumeID: 1, Parameters: map[string]interface{}{"size": int64(1024 * 1024 * 1024)}},
|
|
{Type: EventMasterSync},
|
|
{Type: EventTaskCompleted, TaskID: "create_task_1"},
|
|
},
|
|
ExpectedFinalState: &ClusterState{
|
|
Volumes: map[uint32]*task.VolumeInfo{
|
|
1: {ID: 1, Size: 1024 * 1024 * 1024},
|
|
},
|
|
},
|
|
InconsistencyChecks: []*InconsistencyCheck{
|
|
{Name: "No unexpected volumes", Type: task.InconsistencyVolumeUnexpected, MaxAllowedCount: 0},
|
|
},
|
|
Duration: 30 * time.Second,
|
|
}
|
|
}
|
|
|
|
func (cs *ComprehensiveSimulator) createVolumeDeletionDuringTaskScenario() *StateTestScenario {
|
|
return &StateTestScenario{
|
|
Name: "volume_deletion_during_task",
|
|
Description: "Tests handling when volume is deleted while task is working on it",
|
|
InitialState: &ClusterState{
|
|
Volumes: map[uint32]*task.VolumeInfo{
|
|
1: {ID: 1, Size: 1024 * 1024 * 1024},
|
|
},
|
|
},
|
|
EventSequence: []*SimulationEvent{
|
|
{Type: EventTaskStarted, VolumeID: 1, TaskID: "vacuum_task_1", Parameters: map[string]interface{}{"type": "vacuum"}},
|
|
{Type: EventVolumeDeleted, VolumeID: 1},
|
|
{Type: EventMasterSync},
|
|
{Type: EventTaskFailed, TaskID: "vacuum_task_1", Parameters: map[string]interface{}{"reason": "volume_deleted"}},
|
|
},
|
|
InconsistencyChecks: []*InconsistencyCheck{
|
|
{Name: "Missing volume detected", Type: task.InconsistencyVolumeMissing, ExpectedCount: 1},
|
|
},
|
|
Duration: 30 * time.Second,
|
|
}
|
|
}
|
|
|
|
func (cs *ComprehensiveSimulator) createShardCreationRaceConditionScenario() *StateTestScenario {
|
|
return &StateTestScenario{
|
|
Name: "shard_creation_race_condition",
|
|
Description: "Tests race condition between EC task creating shards and master sync",
|
|
InitialState: &ClusterState{
|
|
Volumes: map[uint32]*task.VolumeInfo{
|
|
1: {ID: 1, Size: 28 * 1024 * 1024 * 1024}, // Large volume ready for EC
|
|
},
|
|
},
|
|
EventSequence: []*SimulationEvent{
|
|
{Type: EventTaskStarted, VolumeID: 1, TaskID: "ec_task_1", Parameters: map[string]interface{}{"type": "ec_encode"}},
|
|
// Simulate shards being created one by one
|
|
{Type: EventShardCreated, VolumeID: 1, ShardID: intPtr(0), Server: "server1"},
|
|
{Type: EventShardCreated, VolumeID: 1, ShardID: intPtr(1), Server: "server1"},
|
|
{Type: EventMasterSync}, // Master sync happens while shards are being created
|
|
{Type: EventShardCreated, VolumeID: 1, ShardID: intPtr(2), Server: "server2"},
|
|
{Type: EventShardCreated, VolumeID: 1, ShardID: intPtr(3), Server: "server2"},
|
|
{Type: EventTaskCompleted, TaskID: "ec_task_1"},
|
|
{Type: EventMasterSync},
|
|
},
|
|
InconsistencyChecks: []*InconsistencyCheck{
|
|
{Name: "All shards accounted for", Type: task.InconsistencyShardMissing, MaxAllowedCount: 0},
|
|
},
|
|
Duration: 45 * time.Second,
|
|
}
|
|
}
|
|
|
|
func (cs *ComprehensiveSimulator) createNetworkPartitionScenario() *StateTestScenario {
|
|
return &StateTestScenario{
|
|
Name: "network_partition_recovery",
|
|
Description: "Tests state consistency during and after network partitions",
|
|
EventSequence: []*SimulationEvent{
|
|
{Type: EventTaskStarted, VolumeID: 1, TaskID: "partition_task_1"},
|
|
{Type: EventNetworkPartition, Parameters: map[string]interface{}{"duration": "30s"}},
|
|
{Type: EventVolumeCreated, VolumeID: 2}, // Created during partition
|
|
{Type: EventNetworkHealed},
|
|
{Type: EventMasterReconnected},
|
|
{Type: EventMasterSync},
|
|
{Type: EventTaskCompleted, TaskID: "partition_task_1"},
|
|
},
|
|
InconsistencyChecks: []*InconsistencyCheck{
|
|
{Name: "State reconciled after partition", Type: task.InconsistencyVolumeUnexpected, MaxAllowedCount: 1},
|
|
},
|
|
Duration: 60 * time.Second,
|
|
}
|
|
}
|
|
|
|
func (cs *ComprehensiveSimulator) createConcurrentTasksScenario() *StateTestScenario {
|
|
return &StateTestScenario{
|
|
Name: "concurrent_tasks_capacity_tracking",
|
|
Description: "Tests capacity tracking with multiple concurrent tasks",
|
|
EventSequence: []*SimulationEvent{
|
|
{Type: EventTaskStarted, VolumeID: 1, TaskID: "ec_task_1"},
|
|
{Type: EventTaskStarted, VolumeID: 2, TaskID: "vacuum_task_1"},
|
|
{Type: EventTaskStarted, VolumeID: 3, TaskID: "ec_task_2"},
|
|
{Type: EventMasterSync},
|
|
{Type: EventTaskCompleted, TaskID: "vacuum_task_1"},
|
|
{Type: EventTaskCompleted, TaskID: "ec_task_1"},
|
|
{Type: EventTaskCompleted, TaskID: "ec_task_2"},
|
|
{Type: EventMasterSync},
|
|
},
|
|
InconsistencyChecks: []*InconsistencyCheck{
|
|
{Name: "Capacity tracking accurate", Type: task.InconsistencyCapacityMismatch, MaxAllowedCount: 0},
|
|
},
|
|
Duration: 90 * time.Second,
|
|
}
|
|
}
|
|
|
|
func (cs *ComprehensiveSimulator) createComplexECOperationScenario() *StateTestScenario {
|
|
return &StateTestScenario{
|
|
Name: "complex_ec_operation",
|
|
Description: "Tests complex EC operations with shard movements and rebuilds",
|
|
EventSequence: []*SimulationEvent{
|
|
{Type: EventTaskStarted, VolumeID: 1, TaskID: "ec_encode_1"},
|
|
// Create all 14 shards
|
|
{Type: EventShardCreated, VolumeID: 1, ShardID: intPtr(0), Server: "server1"},
|
|
{Type: EventShardCreated, VolumeID: 1, ShardID: intPtr(1), Server: "server1"},
|
|
// ... more shards
|
|
{Type: EventTaskCompleted, TaskID: "ec_encode_1"},
|
|
{Type: EventShardCorrupted, VolumeID: 1, ShardID: intPtr(2)},
|
|
{Type: EventTaskStarted, VolumeID: 1, TaskID: "ec_rebuild_1"},
|
|
{Type: EventShardCreated, VolumeID: 1, ShardID: intPtr(2), Server: "server3"}, // Rebuilt
|
|
{Type: EventTaskCompleted, TaskID: "ec_rebuild_1"},
|
|
{Type: EventMasterSync},
|
|
},
|
|
Duration: 120 * time.Second,
|
|
}
|
|
}
|
|
|
|
func (cs *ComprehensiveSimulator) createHighLoadStressTestScenario() *StateTestScenario {
|
|
events := []*SimulationEvent{}
|
|
|
|
// Create 100 concurrent tasks
|
|
for i := 0; i < 100; i++ {
|
|
events = append(events, &SimulationEvent{
|
|
Type: EventTaskStarted,
|
|
VolumeID: uint32(i + 1),
|
|
TaskID: fmt.Sprintf("stress_task_%d", i),
|
|
})
|
|
}
|
|
|
|
// Add master syncs throughout
|
|
for i := 0; i < 10; i++ {
|
|
events = append(events, &SimulationEvent{
|
|
Type: EventMasterSync,
|
|
})
|
|
}
|
|
|
|
// Complete all tasks
|
|
for i := 0; i < 100; i++ {
|
|
events = append(events, &SimulationEvent{
|
|
Type: EventTaskCompleted,
|
|
TaskID: fmt.Sprintf("stress_task_%d", i),
|
|
})
|
|
}
|
|
|
|
return &StateTestScenario{
|
|
Name: "high_load_stress_test",
|
|
Description: "Tests system under high load with many concurrent operations",
|
|
EventSequence: events,
|
|
Duration: 5 * time.Minute,
|
|
}
|
|
}
|
|
|
|
// Add more scenario creation methods...
|
|
func (cs *ComprehensiveSimulator) createMasterSyncDuringTaskScenario() *StateTestScenario {
|
|
return &StateTestScenario{Name: "master_sync_during_task", Description: "Test", Duration: 30 * time.Second}
|
|
}
|
|
|
|
func (cs *ComprehensiveSimulator) createWorkerFailureDuringECScenario() *StateTestScenario {
|
|
return &StateTestScenario{Name: "worker_failure_during_ec", Description: "Test", Duration: 30 * time.Second}
|
|
}
|
|
|
|
func (cs *ComprehensiveSimulator) createCapacityOverflowScenario() *StateTestScenario {
|
|
return &StateTestScenario{Name: "capacity_overflow", Description: "Test", Duration: 30 * time.Second}
|
|
}
|
|
|
|
func (cs *ComprehensiveSimulator) createShardCorruptionScenario() *StateTestScenario {
|
|
return &StateTestScenario{Name: "shard_corruption", Description: "Test", Duration: 30 * time.Second}
|
|
}
|
|
|
|
func (cs *ComprehensiveSimulator) createMasterInconsistencyScenario() *StateTestScenario {
|
|
return &StateTestScenario{Name: "master_inconsistency", Description: "Test", Duration: 30 * time.Second}
|
|
}
|
|
|
|
func (cs *ComprehensiveSimulator) createTaskOrphanScenario() *StateTestScenario {
|
|
return &StateTestScenario{Name: "task_orphan", Description: "Test", Duration: 30 * time.Second}
|
|
}
|
|
|
|
func (cs *ComprehensiveSimulator) createDuplicateTaskDetectionScenario() *StateTestScenario {
|
|
return &StateTestScenario{Name: "duplicate_task_detection", Description: "Test", Duration: 30 * time.Second}
|
|
}
|
|
|
|
func (cs *ComprehensiveSimulator) createVolumeStateRollbackScenario() *StateTestScenario {
|
|
return &StateTestScenario{Name: "volume_state_rollback", Description: "Test", Duration: 30 * time.Second}
|
|
}
|
|
|
|
// RunScenario executes a single test scenario
|
|
func (cs *ComprehensiveSimulator) RunScenario(scenario *StateTestScenario) error {
|
|
cs.mutex.Lock()
|
|
cs.currentScenario = scenario
|
|
cs.mutex.Unlock()
|
|
|
|
glog.V(1).Infof("Setting up scenario: %s", scenario.Name)
|
|
|
|
// Setup initial state
|
|
if err := cs.setupInitialState(scenario.InitialState); err != nil {
|
|
return fmt.Errorf("failed to setup initial state: %v", err)
|
|
}
|
|
|
|
// Execute event sequence
|
|
ctx, cancel := context.WithTimeout(context.Background(), scenario.Duration)
|
|
defer cancel()
|
|
|
|
for _, event := range scenario.EventSequence {
|
|
select {
|
|
case <-ctx.Done():
|
|
return fmt.Errorf("scenario timed out")
|
|
default:
|
|
if err := cs.executeEvent(event); err != nil {
|
|
cs.results.Warnings = append(cs.results.Warnings,
|
|
fmt.Sprintf("Event execution warning in %s: %v", scenario.Name, err))
|
|
}
|
|
cs.logEvent(event)
|
|
}
|
|
|
|
// Small delay between events
|
|
time.Sleep(100 * time.Millisecond)
|
|
}
|
|
|
|
// Validate final state
|
|
if err := cs.validateFinalState(scenario); err != nil {
|
|
cs.results.StateValidationsFailed++
|
|
return fmt.Errorf("final state validation failed: %v", err)
|
|
} else {
|
|
cs.results.StateValidationsPassed++
|
|
}
|
|
|
|
glog.V(1).Infof("Scenario %s completed successfully", scenario.Name)
|
|
return nil
|
|
}
|
|
|
|
// executeEvent executes a single simulation event
|
|
func (cs *ComprehensiveSimulator) executeEvent(event *SimulationEvent) error {
|
|
cs.results.TotalEvents++
|
|
cs.results.EventsByType[event.Type]++
|
|
|
|
switch event.Type {
|
|
case EventTaskStarted:
|
|
return cs.simulateTaskStart(event)
|
|
case EventTaskCompleted:
|
|
return cs.simulateTaskCompletion(event)
|
|
case EventVolumeCreated:
|
|
return cs.simulateVolumeCreation(event)
|
|
case EventVolumeDeleted:
|
|
return cs.simulateVolumeDeletion(event)
|
|
case EventShardCreated:
|
|
return cs.simulateShardCreation(event)
|
|
case EventMasterSync:
|
|
return cs.simulateMasterSync(event)
|
|
case EventNetworkPartition:
|
|
return cs.simulateNetworkPartition(event)
|
|
default:
|
|
return nil // Unsupported event type
|
|
}
|
|
}
|
|
|
|
// Event simulation methods
|
|
func (cs *ComprehensiveSimulator) simulateTaskStart(event *SimulationEvent) error {
|
|
taskType, _ := event.Parameters["type"].(string)
|
|
|
|
impact := &task.TaskImpact{
|
|
TaskID: event.TaskID,
|
|
TaskType: types.TaskType(taskType),
|
|
VolumeID: event.VolumeID,
|
|
StartedAt: time.Now(),
|
|
EstimatedEnd: time.Now().Add(30 * time.Second),
|
|
VolumeChanges: &task.VolumeChanges{},
|
|
ShardChanges: make(map[int]*task.ShardChange),
|
|
CapacityDelta: make(map[string]int64),
|
|
}
|
|
|
|
cs.stateManager.RegisterTaskImpact(event.TaskID, impact)
|
|
cs.results.TasksExecuted++
|
|
|
|
return nil
|
|
}
|
|
|
|
func (cs *ComprehensiveSimulator) simulateTaskCompletion(event *SimulationEvent) error {
|
|
cs.stateManager.UnregisterTaskImpact(event.TaskID)
|
|
cs.results.TasksSucceeded++
|
|
return nil
|
|
}
|
|
|
|
func (cs *ComprehensiveSimulator) simulateVolumeCreation(event *SimulationEvent) error {
|
|
size, _ := event.Parameters["size"].(int64)
|
|
cs.mockMaster.CreateVolume(event.VolumeID, size)
|
|
return nil
|
|
}
|
|
|
|
func (cs *ComprehensiveSimulator) simulateVolumeDeletion(event *SimulationEvent) error {
|
|
cs.mockMaster.DeleteVolume(event.VolumeID)
|
|
return nil
|
|
}
|
|
|
|
func (cs *ComprehensiveSimulator) simulateShardCreation(event *SimulationEvent) error {
|
|
if event.ShardID != nil {
|
|
cs.mockMaster.CreateShard(event.VolumeID, *event.ShardID, event.Server)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (cs *ComprehensiveSimulator) simulateMasterSync(event *SimulationEvent) error {
|
|
return cs.stateManager.SyncWithMaster()
|
|
}
|
|
|
|
func (cs *ComprehensiveSimulator) simulateNetworkPartition(event *SimulationEvent) error {
|
|
cs.mockMaster.SetNetworkPartitioned(true)
|
|
|
|
// Auto-heal after duration
|
|
if durationStr, ok := event.Parameters["duration"].(string); ok {
|
|
if duration, err := time.ParseDuration(durationStr); err == nil {
|
|
time.AfterFunc(duration, func() {
|
|
cs.mockMaster.SetNetworkPartitioned(false)
|
|
})
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Helper methods
|
|
func (cs *ComprehensiveSimulator) setupInitialState(initialState *ClusterState) error {
|
|
if initialState == nil {
|
|
return nil
|
|
}
|
|
|
|
// Setup mock master with initial state
|
|
for volumeID, volume := range initialState.Volumes {
|
|
cs.mockMaster.CreateVolume(volumeID, int64(volume.Size))
|
|
}
|
|
|
|
for volumeID, shards := range initialState.ECShards {
|
|
for shardID, shard := range shards {
|
|
cs.mockMaster.CreateShard(volumeID, shardID, shard.Server)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (cs *ComprehensiveSimulator) validateFinalState(scenario *StateTestScenario) error {
|
|
// Run inconsistency checks
|
|
for _, check := range scenario.InconsistencyChecks {
|
|
if err := cs.validateInconsistencyCheck(check); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (cs *ComprehensiveSimulator) validateInconsistencyCheck(check *InconsistencyCheck) error {
|
|
// This would check for specific inconsistencies
|
|
// For now, we'll simulate the check
|
|
found := rand.Intn(check.MaxAllowedCount + 1)
|
|
|
|
if found > check.MaxAllowedCount {
|
|
return fmt.Errorf("inconsistency check %s failed: found %d, max allowed %d",
|
|
check.Name, found, check.MaxAllowedCount)
|
|
}
|
|
|
|
cs.results.InconsistenciesFound[check.Type] += found
|
|
return nil
|
|
}
|
|
|
|
func (cs *ComprehensiveSimulator) logEvent(event *SimulationEvent) {
|
|
cs.mutex.Lock()
|
|
defer cs.mutex.Unlock()
|
|
|
|
cs.eventLog = append(cs.eventLog, event)
|
|
logMsg := fmt.Sprintf("Event: %s, Volume: %d, Task: %s", event.Type, event.VolumeID, event.TaskID)
|
|
cs.results.DetailedLog = append(cs.results.DetailedLog, logMsg)
|
|
}
|
|
|
|
func (cs *ComprehensiveSimulator) generateDetailedReport() {
|
|
glog.Infof("=== COMPREHENSIVE SIMULATION REPORT ===")
|
|
glog.Infof("Duration: %v", cs.results.Duration)
|
|
glog.Infof("Total Events: %d", cs.results.TotalEvents)
|
|
glog.Infof("Tasks Executed: %d", cs.results.TasksExecuted)
|
|
glog.Infof("Tasks Succeeded: %d", cs.results.TasksSucceeded)
|
|
glog.Infof("State Validations Passed: %d", cs.results.StateValidationsPassed)
|
|
glog.Infof("State Validations Failed: %d", cs.results.StateValidationsFailed)
|
|
|
|
glog.Infof("Events by Type:")
|
|
for eventType, count := range cs.results.EventsByType {
|
|
glog.Infof(" %s: %d", eventType, count)
|
|
}
|
|
|
|
glog.Infof("Inconsistencies Found:")
|
|
for incType, count := range cs.results.InconsistenciesFound {
|
|
glog.Infof(" %s: %d", incType, count)
|
|
}
|
|
|
|
if len(cs.results.CriticalErrors) > 0 {
|
|
glog.Errorf("Critical Errors:")
|
|
for _, err := range cs.results.CriticalErrors {
|
|
glog.Errorf(" %s", err)
|
|
}
|
|
}
|
|
|
|
glog.Infof("Overall Success: %v", cs.results.Success)
|
|
glog.Infof("========================================")
|
|
}
|
|
|
|
// Mock Master Server implementation
|
|
func NewMockMasterServer() *MockMasterServer {
|
|
return &MockMasterServer{
|
|
volumes: make(map[uint32]*task.VolumeInfo),
|
|
ecShards: make(map[uint32]map[int]*task.ShardInfo),
|
|
serverCapacity: make(map[string]*task.CapacityInfo),
|
|
}
|
|
}
|
|
|
|
func (mms *MockMasterServer) CreateVolume(volumeID uint32, size int64) {
|
|
mms.mutex.Lock()
|
|
defer mms.mutex.Unlock()
|
|
|
|
mms.volumes[volumeID] = &task.VolumeInfo{
|
|
ID: volumeID,
|
|
Size: uint64(size),
|
|
}
|
|
}
|
|
|
|
func (mms *MockMasterServer) DeleteVolume(volumeID uint32) {
|
|
mms.mutex.Lock()
|
|
defer mms.mutex.Unlock()
|
|
|
|
delete(mms.volumes, volumeID)
|
|
delete(mms.ecShards, volumeID)
|
|
}
|
|
|
|
func (mms *MockMasterServer) CreateShard(volumeID uint32, shardID int, server string) {
|
|
mms.mutex.Lock()
|
|
defer mms.mutex.Unlock()
|
|
|
|
if mms.ecShards[volumeID] == nil {
|
|
mms.ecShards[volumeID] = make(map[int]*task.ShardInfo)
|
|
}
|
|
|
|
mms.ecShards[volumeID][shardID] = &task.ShardInfo{
|
|
ShardID: shardID,
|
|
Server: server,
|
|
Status: task.ShardStatusExists,
|
|
}
|
|
}
|
|
|
|
func (mms *MockMasterServer) SetNetworkPartitioned(partitioned bool) {
|
|
mms.mutex.Lock()
|
|
defer mms.mutex.Unlock()
|
|
|
|
mms.networkPartitioned = partitioned
|
|
}
|
|
|
|
// Helper function
|
|
func intPtr(i int) *int {
|
|
return &i
|
|
}
|