diff --git a/.github/workflows/telemetry-integration.yml b/.github/workflows/telemetry-integration.yml new file mode 100644 index 000000000..1540256f1 --- /dev/null +++ b/.github/workflows/telemetry-integration.yml @@ -0,0 +1,46 @@ +name: Telemetry Integration Tests + +on: + push: + branches: [ master ] + paths: + - 'telemetry/**' + - 'weed/telemetry/**' + - '.github/workflows/telemetry-integration.yml' + pull_request: + branches: [ master ] + paths: + - 'telemetry/**' + - 'weed/telemetry/**' + - '.github/workflows/telemetry-integration.yml' + +permissions: + contents: read + +jobs: + telemetry-integration-test: + runs-on: ubuntu-latest + timeout-minutes: 5 + + steps: + - name: Checkout code + uses: actions/checkout@v6 + + - name: Set up Go + uses: actions/setup-go@v6 + with: + go-version-file: 'go.mod' + + - name: Build telemetry server + run: cd telemetry/server && go build -o telemetry-server . + + - name: Run telemetry integration test + run: go run telemetry/test/integration.go + + - name: Upload test logs on failure + if: failure() + uses: actions/upload-artifact@v7 + with: + name: telemetry-test-logs + path: telemetry-server-test.log + retention-days: 7 diff --git a/.gitignore b/.gitignore index 0ea9a06b0..a3ea87971 100644 --- a/.gitignore +++ b/.gitignore @@ -141,4 +141,5 @@ test/s3/iam/.test_env /test/erasure_coding/admin_dockertest/tmp /test/erasure_coding/admin_dockertest/task_logs weed_bin +telemetry/server/telemetry-server .aider* diff --git a/telemetry/server/api/handlers.go b/telemetry/server/api/handlers.go index 0ff00330b..c480a9771 100644 --- a/telemetry/server/api/handlers.go +++ b/telemetry/server/api/handlers.go @@ -54,7 +54,7 @@ func (h *Handler) CollectTelemetry(w http.ResponseWriter, r *http.Request) { } // Validate required fields - if data.ClusterId == "" || data.Version == "" || data.Os == "" { + if data.TopologyId == "" || data.Version == "" || data.Os == "" { http.Error(w, "Missing required fields", http.StatusBadRequest) return } diff --git a/telemetry/server/go.mod b/telemetry/server/go.mod index f555d0bba..01f46902c 100644 --- a/telemetry/server/go.mod +++ b/telemetry/server/go.mod @@ -1,8 +1,6 @@ module github.com/seaweedfs/seaweedfs/telemetry/server -go 1.25 - -toolchain go1.25.0 +go 1.25.0 require ( github.com/prometheus/client_golang v1.23.2 @@ -15,10 +13,10 @@ require ( github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/prometheus/client_model v0.6.2 // indirect - github.com/prometheus/common v0.66.1 // indirect - github.com/prometheus/procfs v0.19.2 // indirect - go.yaml.in/yaml/v2 v2.4.2 // indirect - golang.org/x/sys v0.39.0 // indirect + github.com/prometheus/common v0.67.2 // indirect + github.com/prometheus/procfs v0.20.1 // indirect + go.yaml.in/yaml/v2 v2.4.3 // indirect + golang.org/x/sys v0.42.0 // indirect ) replace github.com/seaweedfs/seaweedfs => ../.. diff --git a/telemetry/server/go.sum b/telemetry/server/go.sum index b9e086f24..ee6dedcc2 100644 --- a/telemetry/server/go.sum +++ b/telemetry/server/go.sum @@ -6,8 +6,8 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= -github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= -github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= +github.com/klauspost/compress v1.18.4 h1:RPhnKRAQ4Fh8zU2FY/6ZFDwTVTxgJ/EMydqSTzE9a2c= +github.com/klauspost/compress v1.18.4/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= @@ -22,24 +22,21 @@ github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= -github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs= -github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA= -github.com/prometheus/procfs v0.17.0 h1:FuLQ+05u4ZI+SS/w9+BWEM2TXiHKsUQ9TADiRH7DuK0= -github.com/prometheus/procfs v0.17.0/go.mod h1:oPQLaDAMRbA+u8H5Pbfq+dl3VDAvHxMUOVhe0wYB2zw= -github.com/prometheus/procfs v0.19.2/go.mod h1:M0aotyiemPhBCM0z5w87kL22CxfcH05ZpYlu+b4J7mw= +github.com/prometheus/common v0.67.2 h1:PcBAckGFTIHt2+L3I33uNRTlKTplNzFctXcWhPyAEN8= +github.com/prometheus/common v0.67.2/go.mod h1:63W3KZb1JOKgcjlIr64WW/LvFGAqKPj0atm+knVGEko= +github.com/prometheus/procfs v0.20.1 h1:XwbrGOIplXW/AU3YhIhLODXMJYyC1isLFfYCsTEycfc= +github.com/prometheus/procfs v0.20.1/go.mod h1:o9EMBZGRyvDrSPH1RqdxhojkuXstoe4UlK79eF5TGGo= github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= -go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI= -go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU= -golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k= -golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= -golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= -google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc= -google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= +go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0= +go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8= +golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo= +golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= +google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= diff --git a/telemetry/server/storage/prometheus.go b/telemetry/server/storage/prometheus.go index 0b911227a..7e62ed8ad 100644 --- a/telemetry/server/storage/prometheus.go +++ b/telemetry/server/storage/prometheus.go @@ -82,7 +82,7 @@ func (s *PrometheusStorage) StoreTelemetry(data *proto.TelemetryData) error { // Update Prometheus metrics labels := prometheus.Labels{ - "cluster_id": data.ClusterId, + "cluster_id": data.TopologyId, "version": data.Version, "os": data.Os, } @@ -94,7 +94,7 @@ func (s *PrometheusStorage) StoreTelemetry(data *proto.TelemetryData) error { s.brokerCount.With(labels).Set(float64(data.BrokerCount)) infoLabels := prometheus.Labels{ - "cluster_id": data.ClusterId, + "cluster_id": data.TopologyId, "version": data.Version, "os": data.Os, } @@ -103,7 +103,7 @@ func (s *PrometheusStorage) StoreTelemetry(data *proto.TelemetryData) error { s.telemetryReceived.Inc() // Store in memory for API endpoints - s.instances[data.ClusterId] = &telemetryData{ + s.instances[data.TopologyId] = &telemetryData{ TelemetryData: data, ReceivedAt: time.Now().UTC(), } @@ -219,7 +219,7 @@ func (s *PrometheusStorage) CleanupOldInstances(maxAge time.Duration) { // Remove from Prometheus metrics labels := prometheus.Labels{ - "cluster_id": instance.TelemetryData.ClusterId, + "cluster_id": instance.TelemetryData.TopologyId, "version": instance.TelemetryData.Version, "os": instance.TelemetryData.Os, } @@ -228,6 +228,7 @@ func (s *PrometheusStorage) CleanupOldInstances(maxAge time.Duration) { s.totalVolumeCount.Delete(labels) s.filerCount.Delete(labels) s.brokerCount.Delete(labels) + s.clusterInfo.Delete(labels) } } diff --git a/telemetry/test/integration.go b/telemetry/test/integration.go index 463806f15..f75a3ae89 100644 --- a/telemetry/test/integration.go +++ b/telemetry/test/integration.go @@ -85,16 +85,25 @@ func startTelemetryServer() (*exec.Cmd, error) { return nil, fmt.Errorf("failed to get working directory: %v", err) } - // Navigate to the server directory (from main seaweedfs directory) - serverDir := filepath.Join(testDir, "telemetry", "server") - - cmd := exec.Command("go", "run", ".", - "-port="+serverPort, + // Use pre-built binary if available (faster in CI), otherwise fall back to go run + args := []string{ + "-port=" + serverPort, "-dashboard=false", "-cleanup=1m", - "-max-age=1h") + "-max-age=1h", + } - cmd.Dir = serverDir + serverBin := filepath.Join(testDir, "telemetry", "server", "telemetry-server") + var cmd *exec.Cmd + if _, err := os.Stat(serverBin); err == nil { + fmt.Printf("Using pre-built binary: %s\n", serverBin) + cmd = exec.Command(serverBin, args...) + } else { + fmt.Println("No pre-built binary found, using go run") + serverDir := filepath.Join(testDir, "telemetry", "server") + cmd = exec.Command("go", append([]string{"run", "."}, args...)...) + cmd.Dir = serverDir + } // Create log files for server output logFile, err := os.Create("telemetry-server-test.log") @@ -174,9 +183,9 @@ func testProtobufMarshaling() error { } // Verify data - if testData2.ClusterId != testData.ClusterId { - return fmt.Errorf("protobuf data mismatch: expected %s, got %s", - testData.ClusterId, testData2.ClusterId) + if testData2.TopologyId != testData.TopologyId { + return fmt.Errorf("TopologyId mismatch: expected %s, got %s", + testData.TopologyId, testData2.TopologyId) } if testData2.VolumeServerCount != testData.VolumeServerCount { @@ -190,6 +199,7 @@ func testProtobufMarshaling() error { func testTelemetryClient() error { // Create telemetry client client := telemetry.NewClient(serverURL+"/api/collect", true) + client.SetTopologyId("test-topology-12345") // Create test data using protobuf format testData := &proto.TelemetryData{