From 45ce18266a322c08b4d851770bd6d7d90d20ed47 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Wed, 4 Mar 2026 00:40:40 -0800 Subject: [PATCH] Disable master maintenance scripts when admin server runs (#8499) * Disable master maintenance scripts when admin server runs * Stop defaulting master maintenance scripts * Apply suggestion from @gemini-code-assist[bot] Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * Apply suggestion from @gemini-code-assist[bot] Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * Clarify master scripts are disabled by default * Skip master maintenance scripts when admin server is connected * Restore default master maintenance scripts * Document admin server skip for master maintenance scripts --------- Co-authored-by: Copilot Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- docker/compose/master-cloud.toml | 1 + weed/admin/dash/admin_lock_manager.go | 3 +- weed/admin/dash/admin_presence_lock.go | 61 ++++++++++++++++++++++++++ weed/admin/dash/admin_server.go | 9 ++++ weed/cluster/admin_locks.go | 6 +++ weed/command/scaffold/master.toml | 1 + weed/server/master_server.go | 12 +++++ 7 files changed, 92 insertions(+), 1 deletion(-) create mode 100644 weed/admin/dash/admin_presence_lock.go create mode 100644 weed/cluster/admin_locks.go diff --git a/docker/compose/master-cloud.toml b/docker/compose/master-cloud.toml index 74c22d26e..29324772c 100644 --- a/docker/compose/master-cloud.toml +++ b/docker/compose/master-cloud.toml @@ -7,6 +7,7 @@ [master.maintenance] # periodically run these scripts are the same as running them from 'weed shell' +# Scripts are skipped while an admin server is connected. scripts = """ lock ec.encode -fullPercent=95 -quietFor=1h diff --git a/weed/admin/dash/admin_lock_manager.go b/weed/admin/dash/admin_lock_manager.go index 42ebd9c85..41ec0ed1b 100644 --- a/weed/admin/dash/admin_lock_manager.go +++ b/weed/admin/dash/admin_lock_manager.go @@ -4,12 +4,13 @@ import ( "sync" "time" + "github.com/seaweedfs/seaweedfs/weed/cluster" "github.com/seaweedfs/seaweedfs/weed/wdclient" "github.com/seaweedfs/seaweedfs/weed/wdclient/exclusive_locks" ) const ( - adminLockName = "shell" + adminLockName = cluster.AdminShellLockName adminLockClientName = "admin-plugin" ) diff --git a/weed/admin/dash/admin_presence_lock.go b/weed/admin/dash/admin_presence_lock.go new file mode 100644 index 000000000..0e3fa3fe1 --- /dev/null +++ b/weed/admin/dash/admin_presence_lock.go @@ -0,0 +1,61 @@ +package dash + +import ( + "time" + + "github.com/seaweedfs/seaweedfs/weed/cluster" + "github.com/seaweedfs/seaweedfs/weed/wdclient" + "github.com/seaweedfs/seaweedfs/weed/wdclient/exclusive_locks" +) + +const adminPresenceClientName = "admin-server" + +type adminPresenceLock struct { + locker *exclusive_locks.ExclusiveLocker + stopCh chan struct{} +} + +func newAdminPresenceLock(masterClient *wdclient.MasterClient) *adminPresenceLock { + if masterClient == nil { + return nil + } + return &adminPresenceLock{ + locker: exclusive_locks.NewExclusiveLocker(masterClient, cluster.AdminServerPresenceLockName), + stopCh: make(chan struct{}), + } +} + +func (l *adminPresenceLock) Start() { + if l == nil || l.locker == nil { + return + } + l.locker.SetMessage("admin server connected") + go func() { + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() + for { + if !l.locker.IsLocked() { + l.locker.RequestLock(adminPresenceClientName) + } + select { + case <-l.stopCh: + return + case <-ticker.C: + } + } + }() +} + +func (l *adminPresenceLock) Stop() { + if l == nil { + return + } + select { + case <-l.stopCh: + default: + close(l.stopCh) + } + if l.locker != nil { + l.locker.ReleaseLock() + } +} diff --git a/weed/admin/dash/admin_server.go b/weed/admin/dash/admin_server.go index 2bade9dec..2ecadaec4 100644 --- a/weed/admin/dash/admin_server.go +++ b/weed/admin/dash/admin_server.go @@ -99,6 +99,7 @@ type AdminServer struct { maintenanceManager *maintenance.MaintenanceManager plugin *adminplugin.Plugin pluginLock *AdminLockManager + adminPresenceLock *adminPresenceLock expireJobHandler func(jobID string, reason string) (*adminplugin.TrackedJob, bool, error) // Topic retention purger @@ -137,6 +138,10 @@ func NewAdminServer(masters string, templateFS http.FileSystem, dataDir string, go masterClient.KeepConnectedToMaster(ctx) lockManager := NewAdminLockManager(masterClient, adminLockClientName) + presenceLock := newAdminPresenceLock(masterClient) + if presenceLock != nil { + presenceLock.Start() + } server := &AdminServer{ masterClient: masterClient, @@ -150,6 +155,7 @@ func NewAdminServer(masters string, templateFS http.FileSystem, dataDir string, s3TablesManager: newS3TablesManager(), icebergPort: icebergPort, pluginLock: lockManager, + adminPresenceLock: presenceLock, } // Initialize topic retention purger @@ -1286,6 +1292,9 @@ func (s *AdminServer) Shutdown() { // Stop maintenance manager s.StopMaintenanceManager() + if s.adminPresenceLock != nil { + s.adminPresenceLock.Stop() + } if s.plugin != nil { s.plugin.Shutdown() diff --git a/weed/cluster/admin_locks.go b/weed/cluster/admin_locks.go new file mode 100644 index 000000000..b368d3ae8 --- /dev/null +++ b/weed/cluster/admin_locks.go @@ -0,0 +1,6 @@ +package cluster + +const ( + AdminShellLockName = "shell" + AdminServerPresenceLockName = "admin-server" +) diff --git a/weed/command/scaffold/master.toml b/weed/command/scaffold/master.toml index 2a24cdea7..38443410b 100644 --- a/weed/command/scaffold/master.toml +++ b/weed/command/scaffold/master.toml @@ -6,6 +6,7 @@ [master.maintenance] # periodically run these scripts are the same as running them from 'weed shell' +# Scripts are skipped while an admin server is connected. scripts = """{{DEFAULT_MAINTENANCE_SCRIPTS}}""" sleep_minutes = 17 # sleep minutes between each script execution diff --git a/weed/server/master_server.go b/weed/server/master_server.go index c4ddc7aa2..429458c5b 100644 --- a/weed/server/master_server.go +++ b/weed/server/master_server.go @@ -333,6 +333,14 @@ func (ms *MasterServer) proxyToLeader(f http.HandlerFunc) http.HandlerFunc { } } +func (ms *MasterServer) isAdminServerConnected() bool { + if ms == nil || ms.adminLocks == nil { + return false + } + _, _, isLocked := ms.adminLocks.isLocked(cluster.AdminServerPresenceLockName) + return isLocked +} + func (ms *MasterServer) startAdminScripts() { v := util.GetViper() v.SetDefault("master.maintenance.scripts", maintenance.DefaultMasterMaintenanceScripts) @@ -371,6 +379,10 @@ func (ms *MasterServer) startAdminScripts() { for { time.Sleep(time.Duration(sleepMinutes) * time.Minute) if ms.Topo.IsLeader() && ms.MasterClient.GetMaster(context.Background()) != "" { + if ms.isAdminServerConnected() { + glog.V(1).Infof("Skipping master maintenance scripts because admin server is connected") + continue + } shellOptions.FilerAddress = ms.GetOneFiler(cluster.FilerGroupName(*shellOptions.FilerGroup)) if shellOptions.FilerAddress == "" { continue