Browse Source

fix(telemetry): use correct TopologyId field in integration test (#8714)

* fix(telemetry): use correct TopologyId field in integration test

The proto field was renamed from cluster_id to topology_id but the
integration test was not updated, causing a compilation error.

* ci: add telemetry integration test workflow

Runs the telemetry integration test (server startup, protobuf
marshaling, client send, metrics/stats/instances API checks) on
changes to telemetry/ or weed/telemetry/.

* fix(telemetry): improve error message specificity in integration test

* fix(ci): pre-build telemetry server binary for integration test

go run compiles the server on the fly, which exceeds the 15s startup
timeout in CI. Build the binary first so the test starts instantly.

* fix(telemetry): fix ClusterId references in server and CI build path

- Replace ClusterId with TopologyId in server storage and API handler
  (same rename as the integration test fix)
- Fix CI build: telemetry server has its own go.mod, so build from
  within its directory

* ci(telemetry): add least-privilege permissions to workflow

Scope the workflow token to read-only repository contents, matching
the convention used in go.yml.

* fix(telemetry): set TopologyId in client integration test

The client only populates TopologyId when SetTopologyId has been
called. The test was missing this call, causing the server to reject
the request with 400 (missing required field).

* fix(telemetry): delete clusterInfo metric on instance cleanup

The cleanup loop removed all per-instance metrics except clusterInfo,
leaking that label set after eviction.
pull/8716/head
Chris Lu 1 day ago
committed by GitHub
parent
commit
ba855f9962
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 46
      .github/workflows/telemetry-integration.yml
  2. 1
      .gitignore
  3. 2
      telemetry/server/api/handlers.go
  4. 12
      telemetry/server/go.mod
  5. 25
      telemetry/server/go.sum
  6. 9
      telemetry/server/storage/prometheus.go
  7. 30
      telemetry/test/integration.go

46
.github/workflows/telemetry-integration.yml

@ -0,0 +1,46 @@
name: Telemetry Integration Tests
on:
push:
branches: [ master ]
paths:
- 'telemetry/**'
- 'weed/telemetry/**'
- '.github/workflows/telemetry-integration.yml'
pull_request:
branches: [ master ]
paths:
- 'telemetry/**'
- 'weed/telemetry/**'
- '.github/workflows/telemetry-integration.yml'
permissions:
contents: read
jobs:
telemetry-integration-test:
runs-on: ubuntu-latest
timeout-minutes: 5
steps:
- name: Checkout code
uses: actions/checkout@v6
- name: Set up Go
uses: actions/setup-go@v6
with:
go-version-file: 'go.mod'
- name: Build telemetry server
run: cd telemetry/server && go build -o telemetry-server .
- name: Run telemetry integration test
run: go run telemetry/test/integration.go
- name: Upload test logs on failure
if: failure()
uses: actions/upload-artifact@v7
with:
name: telemetry-test-logs
path: telemetry-server-test.log
retention-days: 7

1
.gitignore

@ -141,4 +141,5 @@ test/s3/iam/.test_env
/test/erasure_coding/admin_dockertest/tmp
/test/erasure_coding/admin_dockertest/task_logs
weed_bin
telemetry/server/telemetry-server
.aider*

2
telemetry/server/api/handlers.go

@ -54,7 +54,7 @@ func (h *Handler) CollectTelemetry(w http.ResponseWriter, r *http.Request) {
}
// Validate required fields
if data.ClusterId == "" || data.Version == "" || data.Os == "" {
if data.TopologyId == "" || data.Version == "" || data.Os == "" {
http.Error(w, "Missing required fields", http.StatusBadRequest)
return
}

12
telemetry/server/go.mod

@ -1,8 +1,6 @@
module github.com/seaweedfs/seaweedfs/telemetry/server
go 1.25
toolchain go1.25.0
go 1.25.0
require (
github.com/prometheus/client_golang v1.23.2
@ -15,10 +13,10 @@ require (
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/prometheus/client_model v0.6.2 // indirect
github.com/prometheus/common v0.66.1 // indirect
github.com/prometheus/procfs v0.19.2 // indirect
go.yaml.in/yaml/v2 v2.4.2 // indirect
golang.org/x/sys v0.39.0 // indirect
github.com/prometheus/common v0.67.2 // indirect
github.com/prometheus/procfs v0.20.1 // indirect
go.yaml.in/yaml/v2 v2.4.3 // indirect
golang.org/x/sys v0.42.0 // indirect
)
replace github.com/seaweedfs/seaweedfs => ../..

25
telemetry/server/go.sum

@ -6,8 +6,8 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
github.com/klauspost/compress v1.18.4 h1:RPhnKRAQ4Fh8zU2FY/6ZFDwTVTxgJ/EMydqSTzE9a2c=
github.com/klauspost/compress v1.18.4/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
@ -22,24 +22,21 @@ github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h
github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs=
github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA=
github.com/prometheus/procfs v0.17.0 h1:FuLQ+05u4ZI+SS/w9+BWEM2TXiHKsUQ9TADiRH7DuK0=
github.com/prometheus/procfs v0.17.0/go.mod h1:oPQLaDAMRbA+u8H5Pbfq+dl3VDAvHxMUOVhe0wYB2zw=
github.com/prometheus/procfs v0.19.2/go.mod h1:M0aotyiemPhBCM0z5w87kL22CxfcH05ZpYlu+b4J7mw=
github.com/prometheus/common v0.67.2 h1:PcBAckGFTIHt2+L3I33uNRTlKTplNzFctXcWhPyAEN8=
github.com/prometheus/common v0.67.2/go.mod h1:63W3KZb1JOKgcjlIr64WW/LvFGAqKPj0atm+knVGEko=
github.com/prometheus/procfs v0.20.1 h1:XwbrGOIplXW/AU3YhIhLODXMJYyC1isLFfYCsTEycfc=
github.com/prometheus/procfs v0.20.1/go.mod h1:o9EMBZGRyvDrSPH1RqdxhojkuXstoe4UlK79eF5TGGo=
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI=
go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU=
golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k=
golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc=
google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0=
go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8=
golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=
google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=

9
telemetry/server/storage/prometheus.go

@ -82,7 +82,7 @@ func (s *PrometheusStorage) StoreTelemetry(data *proto.TelemetryData) error {
// Update Prometheus metrics
labels := prometheus.Labels{
"cluster_id": data.ClusterId,
"cluster_id": data.TopologyId,
"version": data.Version,
"os": data.Os,
}
@ -94,7 +94,7 @@ func (s *PrometheusStorage) StoreTelemetry(data *proto.TelemetryData) error {
s.brokerCount.With(labels).Set(float64(data.BrokerCount))
infoLabels := prometheus.Labels{
"cluster_id": data.ClusterId,
"cluster_id": data.TopologyId,
"version": data.Version,
"os": data.Os,
}
@ -103,7 +103,7 @@ func (s *PrometheusStorage) StoreTelemetry(data *proto.TelemetryData) error {
s.telemetryReceived.Inc()
// Store in memory for API endpoints
s.instances[data.ClusterId] = &telemetryData{
s.instances[data.TopologyId] = &telemetryData{
TelemetryData: data,
ReceivedAt: time.Now().UTC(),
}
@ -219,7 +219,7 @@ func (s *PrometheusStorage) CleanupOldInstances(maxAge time.Duration) {
// Remove from Prometheus metrics
labels := prometheus.Labels{
"cluster_id": instance.TelemetryData.ClusterId,
"cluster_id": instance.TelemetryData.TopologyId,
"version": instance.TelemetryData.Version,
"os": instance.TelemetryData.Os,
}
@ -228,6 +228,7 @@ func (s *PrometheusStorage) CleanupOldInstances(maxAge time.Duration) {
s.totalVolumeCount.Delete(labels)
s.filerCount.Delete(labels)
s.brokerCount.Delete(labels)
s.clusterInfo.Delete(labels)
}
}

30
telemetry/test/integration.go

@ -85,16 +85,25 @@ func startTelemetryServer() (*exec.Cmd, error) {
return nil, fmt.Errorf("failed to get working directory: %v", err)
}
// Navigate to the server directory (from main seaweedfs directory)
serverDir := filepath.Join(testDir, "telemetry", "server")
cmd := exec.Command("go", "run", ".",
"-port="+serverPort,
// Use pre-built binary if available (faster in CI), otherwise fall back to go run
args := []string{
"-port=" + serverPort,
"-dashboard=false",
"-cleanup=1m",
"-max-age=1h")
"-max-age=1h",
}
cmd.Dir = serverDir
serverBin := filepath.Join(testDir, "telemetry", "server", "telemetry-server")
var cmd *exec.Cmd
if _, err := os.Stat(serverBin); err == nil {
fmt.Printf("Using pre-built binary: %s\n", serverBin)
cmd = exec.Command(serverBin, args...)
} else {
fmt.Println("No pre-built binary found, using go run")
serverDir := filepath.Join(testDir, "telemetry", "server")
cmd = exec.Command("go", append([]string{"run", "."}, args...)...)
cmd.Dir = serverDir
}
// Create log files for server output
logFile, err := os.Create("telemetry-server-test.log")
@ -174,9 +183,9 @@ func testProtobufMarshaling() error {
}
// Verify data
if testData2.ClusterId != testData.ClusterId {
return fmt.Errorf("protobuf data mismatch: expected %s, got %s",
testData.ClusterId, testData2.ClusterId)
if testData2.TopologyId != testData.TopologyId {
return fmt.Errorf("TopologyId mismatch: expected %s, got %s",
testData.TopologyId, testData2.TopologyId)
}
if testData2.VolumeServerCount != testData.VolumeServerCount {
@ -190,6 +199,7 @@ func testProtobufMarshaling() error {
func testTelemetryClient() error {
// Create telemetry client
client := telemetry.NewClient(serverURL+"/api/collect", true)
client.SetTopologyId("test-topology-12345")
// Create test data using protobuf format
testData := &proto.TelemetryData{

Loading…
Cancel
Save