Browse Source
add telemetry (#6926)
add telemetry (#6926)
* add telemetry * fix go mod * add default telemetry server url * Update README.md * replace with broker count instead of s3 count * Update telemetry.pb.go * github action to deploypull/6931/head
committed by
GitHub
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
23 changed files with 3657 additions and 0 deletions
-
157.github/workflows/deploy_telemetry.yml
-
271telemetry/DEPLOYMENT.md
-
351telemetry/README.md
-
55telemetry/docker-compose.yml
-
734telemetry/grafana-dashboard.json
-
12telemetry/grafana-provisioning/dashboards/dashboards.yml
-
9telemetry/grafana-provisioning/datasources/prometheus.yml
-
15telemetry/prometheus.yml
-
398telemetry/proto/telemetry.pb.go
-
52telemetry/proto/telemetry.proto
-
18telemetry/server/Dockerfile
-
97telemetry/server/Makefile
-
152telemetry/server/api/handlers.go
-
278telemetry/server/dashboard/dashboard.go
-
31telemetry/server/go.sum
-
111telemetry/server/main.go
-
245telemetry/server/storage/prometheus.go
-
315telemetry/test/integration.go
-
6weed/command/master.go
-
2weed/command/server.go
-
30weed/server/master_server.go
-
100weed/telemetry/client.go
-
218weed/telemetry/collector.go
@ -0,0 +1,157 @@ |
|||
# This workflow will build and deploy the SeaweedFS telemetry server |
|||
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-go |
|||
|
|||
name: Deploy Telemetry Server |
|||
|
|||
on: |
|||
push: |
|||
branches: [ "master" ] |
|||
paths: |
|||
- 'telemetry/**' |
|||
workflow_dispatch: |
|||
inputs: |
|||
setup: |
|||
description: 'Run first-time server setup' |
|||
required: true |
|||
type: boolean |
|||
default: false |
|||
deploy: |
|||
description: 'Deploy telemetry server to remote server' |
|||
required: true |
|||
type: boolean |
|||
default: false |
|||
|
|||
jobs: |
|||
deploy: |
|||
runs-on: ubuntu-latest |
|||
steps: |
|||
- uses: actions/checkout@v4 |
|||
|
|||
- name: Set up Go |
|||
uses: actions/setup-go@v4 |
|||
with: |
|||
go-version: '1.24' |
|||
|
|||
- name: Build Telemetry Server |
|||
run: | |
|||
go mod tidy |
|||
cd telemetry/server |
|||
GOOS=linux GOARCH=amd64 go build -o telemetry-server main.go |
|||
|
|||
- name: First-time Server Setup |
|||
if: github.event_name == 'workflow_dispatch' && inputs.setup |
|||
env: |
|||
SSH_PRIVATE_KEY: ${{ secrets.TELEMETRY_SSH_PRIVATE_KEY }} |
|||
REMOTE_HOST: ${{ secrets.TELEMETRY_HOST }} |
|||
REMOTE_USER: ${{ secrets.TELEMETRY_USER }} |
|||
run: | |
|||
mkdir -p ~/.ssh |
|||
echo "$SSH_PRIVATE_KEY" > ~/.ssh/deploy_key |
|||
chmod 600 ~/.ssh/deploy_key |
|||
echo "Host *" > ~/.ssh/config |
|||
echo " StrictHostKeyChecking no" >> ~/.ssh/config |
|||
|
|||
# Create all required directories with proper permissions |
|||
ssh -i ~/.ssh/deploy_key $REMOTE_USER@$REMOTE_HOST " |
|||
mkdir -p ~/seaweedfs-telemetry/bin ~/seaweedfs-telemetry/logs ~/seaweedfs-telemetry/data ~/seaweedfs-telemetry/tmp && \ |
|||
chmod 755 ~/seaweedfs-telemetry/logs && \ |
|||
chmod 755 ~/seaweedfs-telemetry/data && \ |
|||
touch ~/seaweedfs-telemetry/logs/telemetry.log ~/seaweedfs-telemetry/logs/telemetry.error.log && \ |
|||
chmod 644 ~/seaweedfs-telemetry/logs/*.log" |
|||
|
|||
# Create systemd service file |
|||
echo " |
|||
[Unit] |
|||
Description=SeaweedFS Telemetry Server |
|||
After=network.target |
|||
|
|||
[Service] |
|||
Type=simple |
|||
User=$REMOTE_USER |
|||
WorkingDirectory=/home/$REMOTE_USER/seaweedfs-telemetry |
|||
ExecStart=/home/$REMOTE_USER/seaweedfs-telemetry/bin/telemetry-server -port=8353 |
|||
Restart=always |
|||
RestartSec=5 |
|||
StandardOutput=append:/home/$REMOTE_USER/seaweedfs-telemetry/logs/telemetry.log |
|||
StandardError=append:/home/$REMOTE_USER/seaweedfs-telemetry/logs/telemetry.error.log |
|||
|
|||
[Install] |
|||
WantedBy=multi-user.target" > telemetry.service |
|||
|
|||
# Setup logrotate configuration |
|||
echo "# SeaweedFS Telemetry service log rotation |
|||
/home/$REMOTE_USER/seaweedfs-telemetry/logs/*.log { |
|||
daily |
|||
rotate 30 |
|||
compress |
|||
delaycompress |
|||
missingok |
|||
notifempty |
|||
create 644 $REMOTE_USER $REMOTE_USER |
|||
postrotate |
|||
systemctl restart telemetry.service |
|||
endscript |
|||
}" > telemetry_logrotate |
|||
|
|||
# Copy Grafana dashboard and Prometheus config |
|||
scp -i ~/.ssh/deploy_key telemetry/grafana-dashboard.json $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/ |
|||
scp -i ~/.ssh/deploy_key telemetry/prometheus.yml $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/ |
|||
|
|||
# Copy and install service and logrotate files |
|||
scp -i ~/.ssh/deploy_key telemetry.service telemetry_logrotate $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/ |
|||
ssh -i ~/.ssh/deploy_key $REMOTE_USER@$REMOTE_HOST " |
|||
sudo mv ~/seaweedfs-telemetry/telemetry.service /etc/systemd/system/ && \ |
|||
sudo mv ~/seaweedfs-telemetry/telemetry_logrotate /etc/logrotate.d/seaweedfs-telemetry && \ |
|||
sudo systemctl daemon-reload && \ |
|||
sudo systemctl enable telemetry.service" |
|||
|
|||
rm -f ~/.ssh/deploy_key |
|||
|
|||
- name: Deploy Telemetry Server to Remote Server |
|||
if: (github.event_name == 'push' && contains(github.ref, 'refs/heads/master')) || (github.event_name == 'workflow_dispatch' && inputs.deploy) |
|||
env: |
|||
SSH_PRIVATE_KEY: ${{ secrets.TELEMETRY_SSH_PRIVATE_KEY }} |
|||
REMOTE_HOST: ${{ secrets.TELEMETRY_HOST }} |
|||
REMOTE_USER: ${{ secrets.TELEMETRY_USER }} |
|||
run: | |
|||
mkdir -p ~/.ssh |
|||
echo "$SSH_PRIVATE_KEY" > ~/.ssh/deploy_key |
|||
chmod 600 ~/.ssh/deploy_key |
|||
echo "Host *" > ~/.ssh/config |
|||
echo " StrictHostKeyChecking no" >> ~/.ssh/config |
|||
|
|||
# Create temp directory and copy binary |
|||
ssh -i ~/.ssh/deploy_key $REMOTE_USER@$REMOTE_HOST "mkdir -p ~/seaweedfs-telemetry/tmp" |
|||
scp -i ~/.ssh/deploy_key telemetry/server/telemetry-server $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/tmp/ |
|||
|
|||
# Copy updated configuration files |
|||
scp -i ~/.ssh/deploy_key telemetry/grafana-dashboard.json $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/ |
|||
scp -i ~/.ssh/deploy_key telemetry/prometheus.yml $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/ |
|||
|
|||
# Stop service, move binary, and restart |
|||
ssh -i ~/.ssh/deploy_key $REMOTE_USER@$REMOTE_HOST " |
|||
sudo systemctl stop telemetry.service || true && \ |
|||
mkdir -p ~/seaweedfs-telemetry/bin && \ |
|||
mv ~/seaweedfs-telemetry/tmp/telemetry-server ~/seaweedfs-telemetry/bin/ && \ |
|||
chmod +x ~/seaweedfs-telemetry/bin/telemetry-server && \ |
|||
sudo systemctl start telemetry.service && \ |
|||
sudo systemctl status telemetry.service" |
|||
|
|||
# Verify deployment |
|||
ssh -i ~/.ssh/deploy_key $REMOTE_USER@$REMOTE_HOST " |
|||
echo 'Waiting for service to start...' |
|||
sleep 5 |
|||
curl -f http://localhost:8353/health || echo 'Health check failed'" |
|||
|
|||
rm -f ~/.ssh/deploy_key |
|||
|
|||
- name: Notify Deployment Status |
|||
if: always() |
|||
run: | |
|||
if [ "${{ job.status }}" == "success" ]; then |
|||
echo "✅ Telemetry server deployment successful" |
|||
echo "Dashboard: http://${{ secrets.TELEMETRY_HOST }}:8353" |
|||
echo "Metrics: http://${{ secrets.TELEMETRY_HOST }}:8353/metrics" |
|||
else |
|||
echo "❌ Telemetry server deployment failed" |
|||
fi |
@ -0,0 +1,271 @@ |
|||
# SeaweedFS Telemetry Server Deployment |
|||
|
|||
This document describes how to deploy the SeaweedFS telemetry server to a remote server using GitHub Actions. |
|||
|
|||
## Prerequisites |
|||
|
|||
1. A remote Linux server with: |
|||
- SSH access |
|||
- systemd (for service management) |
|||
- Optional: Prometheus and Grafana (for monitoring) |
|||
|
|||
2. GitHub repository secrets configured (see [Setup GitHub Secrets](#setup-github-secrets) below): |
|||
- `TELEMETRY_SSH_PRIVATE_KEY`: SSH private key for accessing the remote server |
|||
- `TELEMETRY_HOST`: Remote server hostname or IP address |
|||
- `TELEMETRY_USER`: Username for SSH access |
|||
|
|||
## Setup GitHub Secrets |
|||
|
|||
Before using the deployment workflow, you need to configure the required secrets in your GitHub repository. |
|||
|
|||
### Step 1: Generate SSH Key Pair |
|||
|
|||
On your local machine, generate a new SSH key pair specifically for deployment: |
|||
|
|||
```bash |
|||
# Generate a new SSH key pair |
|||
ssh-keygen -t ed25519 -C "seaweedfs-telemetry-deploy" -f ~/.ssh/seaweedfs_telemetry_deploy |
|||
|
|||
# This creates two files: |
|||
# ~/.ssh/seaweedfs_telemetry_deploy (private key) |
|||
# ~/.ssh/seaweedfs_telemetry_deploy.pub (public key) |
|||
``` |
|||
|
|||
### Step 2: Configure Remote Server |
|||
|
|||
Copy the public key to your remote server: |
|||
|
|||
```bash |
|||
# Copy public key to remote server |
|||
ssh-copy-id -i ~/.ssh/seaweedfs_telemetry_deploy.pub user@your-server.com |
|||
|
|||
# Or manually append to authorized_keys |
|||
cat ~/.ssh/seaweedfs_telemetry_deploy.pub | ssh user@your-server.com "mkdir -p ~/.ssh && cat >> ~/.ssh/authorized_keys" |
|||
``` |
|||
|
|||
Test the SSH connection: |
|||
|
|||
```bash |
|||
# Test SSH connection with the new key |
|||
ssh -i ~/.ssh/seaweedfs_telemetry_deploy user@your-server.com "echo 'SSH connection successful'" |
|||
``` |
|||
|
|||
### Step 3: Add Secrets to GitHub Repository |
|||
|
|||
1. Go to your GitHub repository |
|||
2. Click on **Settings** tab |
|||
3. In the sidebar, click **Secrets and variables** → **Actions** |
|||
4. Click **New repository secret** for each of the following: |
|||
|
|||
#### TELEMETRY_SSH_PRIVATE_KEY |
|||
|
|||
```bash |
|||
# Display the private key content |
|||
cat ~/.ssh/seaweedfs_telemetry_deploy |
|||
``` |
|||
|
|||
- **Name**: `TELEMETRY_SSH_PRIVATE_KEY` |
|||
- **Value**: Copy the entire private key content, including the `-----BEGIN OPENSSH PRIVATE KEY-----` and `-----END OPENSSH PRIVATE KEY-----` lines |
|||
|
|||
#### TELEMETRY_HOST |
|||
|
|||
- **Name**: `TELEMETRY_HOST` |
|||
- **Value**: Your server's hostname or IP address (e.g., `telemetry.example.com` or `192.168.1.100`) |
|||
|
|||
#### TELEMETRY_USER |
|||
|
|||
- **Name**: `TELEMETRY_USER` |
|||
- **Value**: The username on the remote server (e.g., `ubuntu`, `deploy`, or your username) |
|||
|
|||
### Step 4: Verify Configuration |
|||
|
|||
Create a simple test workflow or manually trigger the deployment to verify the secrets are working correctly. |
|||
|
|||
### Security Best Practices |
|||
|
|||
1. **Dedicated SSH Key**: Use a separate SSH key only for deployment |
|||
2. **Limited Permissions**: Create a dedicated user on the remote server with minimal required permissions |
|||
3. **Key Rotation**: Regularly rotate SSH keys |
|||
4. **Server Access**: Restrict SSH access to specific IP ranges if possible |
|||
|
|||
### Example Server Setup |
|||
|
|||
If you're setting up a new server, here's a basic configuration: |
|||
|
|||
```bash |
|||
# On the remote server, create a dedicated user for deployment |
|||
sudo useradd -m -s /bin/bash seaweedfs-deploy |
|||
sudo usermod -aG sudo seaweedfs-deploy # Only if sudo access is needed |
|||
|
|||
# Switch to the deployment user |
|||
sudo su - seaweedfs-deploy |
|||
|
|||
# Create SSH directory |
|||
mkdir -p ~/.ssh |
|||
chmod 700 ~/.ssh |
|||
|
|||
# Add your public key (paste the content of seaweedfs_telemetry_deploy.pub) |
|||
nano ~/.ssh/authorized_keys |
|||
chmod 600 ~/.ssh/authorized_keys |
|||
``` |
|||
|
|||
### Troubleshooting |
|||
|
|||
#### SSH Connection Issues |
|||
|
|||
```bash |
|||
# Test SSH connection manually |
|||
ssh -i ~/.ssh/seaweedfs_telemetry_deploy -v user@your-server.com |
|||
|
|||
# Check SSH key permissions |
|||
ls -la ~/.ssh/seaweedfs_telemetry_deploy* |
|||
# Should show: -rw------- for private key, -rw-r--r-- for public key |
|||
``` |
|||
|
|||
#### GitHub Actions Fails |
|||
|
|||
1. **Check secrets**: Ensure all three secrets are properly set in GitHub |
|||
2. **Verify SSH key**: Make sure the entire private key (including headers/footers) is copied |
|||
3. **Test connectivity**: Manually SSH to the server from your local machine |
|||
4. **Check user permissions**: Ensure the remote user has necessary permissions |
|||
|
|||
## GitHub Actions Workflow |
|||
|
|||
The deployment workflow (`.github/workflows/deploy_telemetry.yml`) provides two main operations: |
|||
|
|||
### 1. First-time Setup |
|||
|
|||
Run this once to set up the remote server: |
|||
|
|||
1. Go to GitHub Actions in your repository |
|||
2. Select "Deploy Telemetry Server" workflow |
|||
3. Click "Run workflow" |
|||
4. Check "Run first-time server setup" |
|||
5. Click "Run workflow" |
|||
|
|||
This will: |
|||
- Create necessary directories on the remote server |
|||
- Set up systemd service configuration |
|||
- Configure log rotation |
|||
- Upload Grafana dashboard and Prometheus configuration |
|||
|
|||
|
|||
### 2. Deploy Updates |
|||
|
|||
Deployments happen automatically when: |
|||
- Code is pushed to the `master` branch with changes in the `telemetry/` directory |
|||
|
|||
Or manually trigger deployment: |
|||
1. Go to GitHub Actions in your repository |
|||
2. Select "Deploy Telemetry Server" workflow |
|||
3. Click "Run workflow" |
|||
4. Check "Deploy telemetry server to remote server" |
|||
5. Click "Run workflow" |
|||
|
|||
## Server Directory Structure |
|||
|
|||
After setup, the remote server will have: |
|||
|
|||
``` |
|||
~/seaweedfs-telemetry/ |
|||
├── bin/ |
|||
│ └── telemetry-server # Binary executable |
|||
├── logs/ |
|||
│ ├── telemetry.log # Application logs |
|||
│ └── telemetry.error.log # Error logs |
|||
├── data/ # Data directory (if needed) |
|||
├── grafana-dashboard.json # Grafana dashboard configuration |
|||
└── prometheus.yml # Prometheus configuration |
|||
``` |
|||
|
|||
## Service Management |
|||
|
|||
The telemetry server runs as a systemd service: |
|||
|
|||
```bash |
|||
# Check service status |
|||
sudo systemctl status telemetry.service |
|||
|
|||
# View logs |
|||
sudo journalctl -u telemetry.service -f |
|||
|
|||
# Restart service |
|||
sudo systemctl restart telemetry.service |
|||
|
|||
# Stop/start service |
|||
sudo systemctl stop telemetry.service |
|||
sudo systemctl start telemetry.service |
|||
``` |
|||
|
|||
## Accessing the Service |
|||
|
|||
After deployment, the telemetry server will be available at: |
|||
|
|||
- **Dashboard**: `http://your-server:8353` |
|||
- **API**: `http://your-server:8353/api/*` |
|||
- **Metrics**: `http://your-server:8353/metrics` |
|||
- **Health Check**: `http://your-server:8353/health` |
|||
|
|||
## Optional: Prometheus and Grafana Integration |
|||
|
|||
### Prometheus Setup |
|||
|
|||
1. Install Prometheus on your server |
|||
2. Update `/etc/prometheus/prometheus.yml` to include: |
|||
```yaml |
|||
scrape_configs: |
|||
- job_name: 'seaweedfs-telemetry' |
|||
static_configs: |
|||
- targets: ['localhost:8353'] |
|||
metrics_path: '/metrics' |
|||
``` |
|||
|
|||
### Grafana Setup |
|||
|
|||
1. Install Grafana on your server |
|||
2. Import the dashboard from `~/seaweedfs-telemetry/grafana-dashboard.json` |
|||
3. Configure Prometheus as a data source pointing to your Prometheus instance |
|||
|
|||
## Troubleshooting |
|||
|
|||
### Deployment Fails |
|||
|
|||
1. Check GitHub Actions logs for detailed error messages |
|||
2. Verify SSH connectivity: `ssh user@host` |
|||
3. Ensure all required secrets are configured in GitHub |
|||
|
|||
### Service Won't Start |
|||
|
|||
1. Check service logs: `sudo journalctl -u telemetry.service` |
|||
2. Verify binary permissions: `ls -la ~/seaweedfs-telemetry/bin/` |
|||
3. Test binary manually: `~/seaweedfs-telemetry/bin/telemetry-server -help` |
|||
|
|||
### Port Conflicts |
|||
|
|||
If port 8353 is already in use: |
|||
|
|||
1. Edit the systemd service: `sudo systemctl edit telemetry.service` |
|||
2. Add override configuration: |
|||
```ini |
|||
[Service] |
|||
ExecStart= |
|||
ExecStart=/home/user/seaweedfs-telemetry/bin/telemetry-server -port=8354 |
|||
``` |
|||
3. Reload and restart: `sudo systemctl daemon-reload && sudo systemctl restart telemetry.service` |
|||
|
|||
## Security Considerations |
|||
|
|||
1. **Firewall**: Consider restricting access to telemetry ports |
|||
2. **SSH Keys**: Use dedicated SSH keys with minimal permissions |
|||
3. **User Permissions**: Run the service as a non-privileged user |
|||
4. **Network**: Consider running on internal networks only |
|||
|
|||
## Monitoring |
|||
|
|||
Monitor the deployment and service health: |
|||
|
|||
- **GitHub Actions**: Check workflow runs for deployment status |
|||
- **System Logs**: `sudo journalctl -u telemetry.service` |
|||
- **Application Logs**: `tail -f ~/seaweedfs-telemetry/logs/telemetry.log` |
|||
- **Health Endpoint**: `curl http://localhost:8353/health` |
|||
- **Metrics**: `curl http://localhost:8353/metrics` |
@ -0,0 +1,351 @@ |
|||
# SeaweedFS Telemetry System |
|||
|
|||
A privacy-respecting telemetry system for SeaweedFS that collects cluster-level usage statistics and provides visualization through Prometheus and Grafana. |
|||
|
|||
## Features |
|||
|
|||
- **Privacy-First Design**: Uses in-memory cluster IDs (regenerated on restart), no personal data collection |
|||
- **Prometheus Integration**: Native Prometheus metrics for monitoring and alerting |
|||
- **Grafana Dashboards**: Pre-built dashboards for data visualization |
|||
- **Protocol Buffers**: Efficient binary data transmission for optimal performance |
|||
- **Opt-in Only**: Disabled by default, requires explicit configuration |
|||
- **Docker Compose**: Complete monitoring stack deployment |
|||
- **Automatic Cleanup**: Configurable data retention policies |
|||
|
|||
## Architecture |
|||
|
|||
``` |
|||
SeaweedFS Cluster → Telemetry Client → Telemetry Server → Prometheus → Grafana |
|||
(protobuf) (metrics) (queries) |
|||
``` |
|||
|
|||
## Data Transmission |
|||
|
|||
The telemetry system uses **Protocol Buffers exclusively** for efficient binary data transmission: |
|||
|
|||
- **Compact Format**: 30-50% smaller than JSON |
|||
- **Fast Serialization**: Better performance than text-based formats |
|||
- **Type Safety**: Strong typing with generated Go structs |
|||
- **Schema Evolution**: Built-in versioning support |
|||
|
|||
### Protobuf Schema |
|||
|
|||
```protobuf |
|||
message TelemetryData { |
|||
string cluster_id = 1; // In-memory generated UUID |
|||
string version = 2; // SeaweedFS version |
|||
string os = 3; // Operating system |
|||
repeated string features = 4; // Enabled features |
|||
string deployment = 5; // Deployment type |
|||
int32 volume_server_count = 6; // Number of volume servers |
|||
uint64 total_disk_bytes = 7; // Total disk usage |
|||
int32 total_volume_count = 8; // Total volume count |
|||
int64 timestamp = 9; // Collection timestamp |
|||
} |
|||
``` |
|||
|
|||
## Privacy Approach |
|||
|
|||
- **No Personal Data**: No hostnames, IP addresses, or user information |
|||
- **In-Memory IDs**: Cluster IDs are generated in-memory and change on restart |
|||
- **Aggregated Data**: Only cluster-level statistics, no individual file/user data |
|||
- **Opt-in Only**: Telemetry is disabled by default |
|||
- **Transparent**: Open source implementation, clear data collection policy |
|||
|
|||
## Collected Data |
|||
|
|||
| Field | Description | Example | |
|||
|-------|-------------|---------| |
|||
| `cluster_id` | In-memory UUID (changes on restart) | `a1b2c3d4-...` | |
|||
| `version` | SeaweedFS version | `3.45` | |
|||
| `os` | Operating system and architecture | `linux/amd64` | |
|||
| `features` | Enabled components | `["filer", "s3api"]` | |
|||
| `deployment` | Deployment type | `cluster` | |
|||
| `volume_server_count` | Number of volume servers | `5` | |
|||
| `total_disk_bytes` | Total disk usage across cluster | `1073741824` | |
|||
| `total_volume_count` | Total number of volumes | `120` | |
|||
| `timestamp` | When data was collected | `1640995200` | |
|||
|
|||
## Quick Start |
|||
|
|||
### 1. Deploy Telemetry Server |
|||
|
|||
```bash |
|||
# Clone and start the complete monitoring stack |
|||
git clone https://github.com/seaweedfs/seaweedfs.git |
|||
cd seaweedfs/telemetry |
|||
docker-compose up -d |
|||
|
|||
# Or run the server directly |
|||
cd server |
|||
go run . -port=8080 -dashboard=true |
|||
``` |
|||
|
|||
### 2. Configure SeaweedFS |
|||
|
|||
```bash |
|||
# Enable telemetry in SeaweedFS master (uses default telemetry.seaweedfs.com:3091) |
|||
weed master -telemetry=true |
|||
|
|||
# Or in server mode |
|||
weed server -telemetry=true |
|||
|
|||
# Or specify custom telemetry server |
|||
weed master -telemetry=true -telemetry.url=http://localhost:8080/api/collect |
|||
``` |
|||
|
|||
### 3. Access Dashboards |
|||
|
|||
- **Telemetry Server**: http://localhost:8080 |
|||
- **Prometheus**: http://localhost:9090 |
|||
- **Grafana**: http://localhost:3000 (admin/admin) |
|||
|
|||
## Configuration |
|||
|
|||
### SeaweedFS Master/Server |
|||
|
|||
```bash |
|||
# Enable telemetry |
|||
-telemetry=true |
|||
|
|||
# Set custom telemetry server URL (optional, defaults to telemetry.seaweedfs.com:3091) |
|||
-telemetry.url=http://your-telemetry-server:8080/api/collect |
|||
``` |
|||
|
|||
### Telemetry Server |
|||
|
|||
```bash |
|||
# Server configuration |
|||
-port=8080 # Server port |
|||
-dashboard=true # Enable built-in dashboard |
|||
-cleanup=24h # Cleanup interval |
|||
-max-age=720h # Maximum data retention (30 days) |
|||
|
|||
# Example |
|||
./telemetry-server -port=8080 -dashboard=true -cleanup=24h -max-age=720h |
|||
``` |
|||
|
|||
## Prometheus Metrics |
|||
|
|||
The telemetry server exposes these Prometheus metrics: |
|||
|
|||
### Cluster Metrics |
|||
- `seaweedfs_telemetry_total_clusters`: Total unique clusters (30 days) |
|||
- `seaweedfs_telemetry_active_clusters`: Active clusters (7 days) |
|||
|
|||
### Per-Cluster Metrics |
|||
- `seaweedfs_telemetry_volume_servers{cluster_id, version, os, deployment}`: Volume servers per cluster |
|||
- `seaweedfs_telemetry_disk_bytes{cluster_id, version, os, deployment}`: Disk usage per cluster |
|||
- `seaweedfs_telemetry_volume_count{cluster_id, version, os, deployment}`: Volume count per cluster |
|||
- `seaweedfs_telemetry_filer_count{cluster_id, version, os, deployment}`: Filer servers per cluster |
|||
- `seaweedfs_telemetry_broker_count{cluster_id, version, os, deployment}`: Broker servers per cluster |
|||
- `seaweedfs_telemetry_cluster_info{cluster_id, version, os, deployment, features}`: Cluster metadata |
|||
|
|||
### Server Metrics |
|||
- `seaweedfs_telemetry_reports_received_total`: Total telemetry reports received |
|||
|
|||
## API Endpoints |
|||
|
|||
### Data Collection |
|||
```bash |
|||
# Submit telemetry data (protobuf only) |
|||
POST /api/collect |
|||
Content-Type: application/x-protobuf |
|||
[TelemetryRequest protobuf data] |
|||
``` |
|||
|
|||
### Statistics (JSON for dashboard/debugging) |
|||
```bash |
|||
# Get aggregated statistics |
|||
GET /api/stats |
|||
|
|||
# Get recent cluster instances |
|||
GET /api/instances?limit=100 |
|||
|
|||
# Get metrics over time |
|||
GET /api/metrics?days=30 |
|||
``` |
|||
|
|||
### Monitoring |
|||
```bash |
|||
# Prometheus metrics |
|||
GET /metrics |
|||
``` |
|||
|
|||
## Docker Deployment |
|||
|
|||
### Complete Stack (Recommended) |
|||
|
|||
```yaml |
|||
# docker-compose.yml |
|||
version: '3.8' |
|||
services: |
|||
telemetry-server: |
|||
build: ./server |
|||
ports: |
|||
- "8080:8080" |
|||
command: ["-port=8080", "-dashboard=true", "-cleanup=24h"] |
|||
|
|||
prometheus: |
|||
image: prom/prometheus:latest |
|||
ports: |
|||
- "9090:9090" |
|||
volumes: |
|||
- ./prometheus.yml:/etc/prometheus/prometheus.yml |
|||
|
|||
grafana: |
|||
image: grafana/grafana:latest |
|||
ports: |
|||
- "3000:3000" |
|||
environment: |
|||
- GF_SECURITY_ADMIN_PASSWORD=admin |
|||
volumes: |
|||
- ./grafana-provisioning:/etc/grafana/provisioning |
|||
- ./grafana-dashboard.json:/var/lib/grafana/dashboards/seaweedfs.json |
|||
``` |
|||
|
|||
```bash |
|||
# Deploy the stack |
|||
docker-compose up -d |
|||
|
|||
# Scale telemetry server if needed |
|||
docker-compose up -d --scale telemetry-server=3 |
|||
``` |
|||
|
|||
### Server Only |
|||
|
|||
```bash |
|||
# Build and run telemetry server |
|||
cd server |
|||
docker build -t seaweedfs-telemetry . |
|||
docker run -p 8080:8080 seaweedfs-telemetry -port=8080 -dashboard=true |
|||
``` |
|||
|
|||
## Development |
|||
|
|||
### Protocol Buffer Development |
|||
|
|||
```bash |
|||
# Generate protobuf code |
|||
cd telemetry |
|||
protoc --go_out=. --go_opt=paths=source_relative proto/telemetry.proto |
|||
|
|||
# The generated code is already included in the repository |
|||
``` |
|||
|
|||
### Build from Source |
|||
|
|||
```bash |
|||
# Build telemetry server |
|||
cd telemetry/server |
|||
go build -o telemetry-server . |
|||
|
|||
# Build SeaweedFS with telemetry support |
|||
cd ../.. |
|||
go build -o weed ./weed |
|||
``` |
|||
|
|||
### Testing |
|||
|
|||
```bash |
|||
# Test telemetry server |
|||
cd telemetry/server |
|||
go test ./... |
|||
|
|||
# Test protobuf communication (requires protobuf tools) |
|||
# See telemetry client code for examples |
|||
``` |
|||
|
|||
## Grafana Dashboard |
|||
|
|||
The included Grafana dashboard provides: |
|||
|
|||
- **Overview**: Total and active clusters, version distribution |
|||
- **Resource Usage**: Volume servers and disk usage over time |
|||
- **Deployments**: Deployment type and OS distribution |
|||
- **Growth Trends**: Historical growth patterns |
|||
|
|||
### Custom Queries |
|||
|
|||
```promql |
|||
# Total active clusters |
|||
seaweedfs_telemetry_active_clusters |
|||
|
|||
# Disk usage by version |
|||
sum by (version) (seaweedfs_telemetry_disk_bytes) |
|||
|
|||
# Volume servers by deployment type |
|||
sum by (deployment) (seaweedfs_telemetry_volume_servers) |
|||
|
|||
# Filer servers by version |
|||
sum by (version) (seaweedfs_telemetry_filer_count) |
|||
|
|||
# Broker servers across all clusters |
|||
sum(seaweedfs_telemetry_broker_count) |
|||
|
|||
# Growth rate (weekly) |
|||
increase(seaweedfs_telemetry_total_clusters[7d]) |
|||
``` |
|||
|
|||
## Security Considerations |
|||
|
|||
- **Network Security**: Use HTTPS in production environments |
|||
- **Access Control**: Implement authentication for Grafana and Prometheus |
|||
- **Data Retention**: Configure appropriate retention policies |
|||
- **Monitoring**: Monitor the telemetry infrastructure itself |
|||
|
|||
## Troubleshooting |
|||
|
|||
### Common Issues |
|||
|
|||
**SeaweedFS not sending data:** |
|||
```bash |
|||
# Check telemetry configuration |
|||
weed master -h | grep telemetry |
|||
|
|||
# Verify connectivity |
|||
curl -v http://your-telemetry-server:8080/api/collect |
|||
``` |
|||
|
|||
**Server not receiving data:** |
|||
```bash |
|||
# Check server logs |
|||
docker-compose logs telemetry-server |
|||
|
|||
# Verify metrics endpoint |
|||
curl http://localhost:8080/metrics |
|||
``` |
|||
|
|||
**Prometheus not scraping:** |
|||
```bash |
|||
# Check Prometheus targets |
|||
curl http://localhost:9090/api/v1/targets |
|||
|
|||
# Verify configuration |
|||
docker-compose logs prometheus |
|||
``` |
|||
|
|||
### Debugging |
|||
|
|||
```bash |
|||
# Enable verbose logging in SeaweedFS |
|||
weed master -v=2 -telemetry=true |
|||
|
|||
# Check telemetry server metrics |
|||
curl http://localhost:8080/metrics | grep seaweedfs_telemetry |
|||
|
|||
# Test data flow |
|||
curl http://localhost:8080/api/stats |
|||
``` |
|||
|
|||
## Contributing |
|||
|
|||
1. Fork the repository |
|||
2. Create a feature branch |
|||
3. Make your changes |
|||
4. Add tests if applicable |
|||
5. Submit a pull request |
|||
|
|||
## License |
|||
|
|||
This telemetry system is part of SeaweedFS and follows the same Apache 2.0 license. |
@ -0,0 +1,55 @@ |
|||
version: '3.8' |
|||
|
|||
services: |
|||
telemetry-server: |
|||
build: ./server |
|||
ports: |
|||
- "8080:8080" |
|||
command: [ |
|||
"./telemetry-server", |
|||
"-port=8080", |
|||
"-dashboard=false", # Disable built-in dashboard, use Grafana |
|||
"-log=true", |
|||
"-cors=true" |
|||
] |
|||
networks: |
|||
- telemetry |
|||
|
|||
prometheus: |
|||
image: prom/prometheus:latest |
|||
ports: |
|||
- "9090:9090" |
|||
volumes: |
|||
- ./prometheus.yml:/etc/prometheus/prometheus.yml |
|||
- prometheus_data:/prometheus |
|||
command: |
|||
- '--config.file=/etc/prometheus/prometheus.yml' |
|||
- '--storage.tsdb.path=/prometheus' |
|||
- '--web.console.libraries=/etc/prometheus/console_libraries' |
|||
- '--web.console.templates=/etc/prometheus/consoles' |
|||
- '--storage.tsdb.retention.time=200h' |
|||
- '--web.enable-lifecycle' |
|||
networks: |
|||
- telemetry |
|||
|
|||
grafana: |
|||
image: grafana/grafana:latest |
|||
ports: |
|||
- "3000:3000" |
|||
environment: |
|||
- GF_SECURITY_ADMIN_PASSWORD=admin |
|||
- GF_USERS_ALLOW_SIGN_UP=false |
|||
volumes: |
|||
- grafana_data:/var/lib/grafana |
|||
- ./grafana-dashboard.json:/var/lib/grafana/dashboards/seaweedfs-telemetry.json |
|||
- ./grafana-provisioning:/etc/grafana/provisioning |
|||
networks: |
|||
- telemetry |
|||
|
|||
volumes: |
|||
prometheus_data: |
|||
grafana_data: |
|||
|
|||
networks: |
|||
telemetry: |
|||
driver: bridge |
@ -0,0 +1,734 @@ |
|||
{ |
|||
"annotations": { |
|||
"list": [ |
|||
{ |
|||
"builtIn": 1, |
|||
"datasource": { |
|||
"type": "grafana", |
|||
"uid": "-- Grafana --" |
|||
}, |
|||
"enable": true, |
|||
"hide": true, |
|||
"iconColor": "rgba(0, 211, 255, 1)", |
|||
"name": "Annotations & Alerts", |
|||
"type": "dashboard" |
|||
} |
|||
] |
|||
}, |
|||
"editable": true, |
|||
"fiscalYearStartMonth": 0, |
|||
"graphTooltip": 0, |
|||
"id": null, |
|||
"links": [], |
|||
"liveNow": false, |
|||
"panels": [ |
|||
{ |
|||
"datasource": { |
|||
"type": "prometheus", |
|||
"uid": "${DS_PROMETHEUS}" |
|||
}, |
|||
"fieldConfig": { |
|||
"defaults": { |
|||
"color": { |
|||
"mode": "thresholds" |
|||
}, |
|||
"custom": { |
|||
"align": "auto", |
|||
"cellOptions": { |
|||
"type": "auto" |
|||
}, |
|||
"inspect": false |
|||
}, |
|||
"mappings": [], |
|||
"thresholds": { |
|||
"mode": "absolute", |
|||
"steps": [ |
|||
{ |
|||
"color": "green", |
|||
"value": null |
|||
}, |
|||
{ |
|||
"color": "red", |
|||
"value": 80 |
|||
} |
|||
] |
|||
} |
|||
}, |
|||
"overrides": [] |
|||
}, |
|||
"gridPos": { |
|||
"h": 8, |
|||
"w": 12, |
|||
"x": 0, |
|||
"y": 0 |
|||
}, |
|||
"id": 1, |
|||
"options": { |
|||
"showHeader": true |
|||
}, |
|||
"pluginVersion": "10.0.0", |
|||
"targets": [ |
|||
{ |
|||
"datasource": { |
|||
"type": "prometheus", |
|||
"uid": "${DS_PROMETHEUS}" |
|||
}, |
|||
"expr": "seaweedfs_telemetry_total_clusters", |
|||
"format": "time_series", |
|||
"refId": "A" |
|||
} |
|||
], |
|||
"title": "Total SeaweedFS Clusters", |
|||
"type": "stat" |
|||
}, |
|||
{ |
|||
"datasource": { |
|||
"type": "prometheus", |
|||
"uid": "${DS_PROMETHEUS}" |
|||
}, |
|||
"fieldConfig": { |
|||
"defaults": { |
|||
"color": { |
|||
"mode": "thresholds" |
|||
}, |
|||
"custom": { |
|||
"align": "auto", |
|||
"cellOptions": { |
|||
"type": "auto" |
|||
}, |
|||
"inspect": false |
|||
}, |
|||
"mappings": [], |
|||
"thresholds": { |
|||
"mode": "absolute", |
|||
"steps": [ |
|||
{ |
|||
"color": "green", |
|||
"value": null |
|||
}, |
|||
{ |
|||
"color": "red", |
|||
"value": 80 |
|||
} |
|||
] |
|||
} |
|||
}, |
|||
"overrides": [] |
|||
}, |
|||
"gridPos": { |
|||
"h": 8, |
|||
"w": 12, |
|||
"x": 12, |
|||
"y": 0 |
|||
}, |
|||
"id": 2, |
|||
"options": { |
|||
"showHeader": true |
|||
}, |
|||
"pluginVersion": "10.0.0", |
|||
"targets": [ |
|||
{ |
|||
"datasource": { |
|||
"type": "prometheus", |
|||
"uid": "${DS_PROMETHEUS}" |
|||
}, |
|||
"expr": "seaweedfs_telemetry_active_clusters", |
|||
"format": "time_series", |
|||
"refId": "A" |
|||
} |
|||
], |
|||
"title": "Active Clusters (7 days)", |
|||
"type": "stat" |
|||
}, |
|||
{ |
|||
"datasource": { |
|||
"type": "prometheus", |
|||
"uid": "${DS_PROMETHEUS}" |
|||
}, |
|||
"fieldConfig": { |
|||
"defaults": { |
|||
"color": { |
|||
"mode": "palette-classic" |
|||
}, |
|||
"custom": { |
|||
"hideFrom": { |
|||
"legend": false, |
|||
"tooltip": false, |
|||
"vis": false |
|||
} |
|||
}, |
|||
"mappings": [] |
|||
}, |
|||
"overrides": [] |
|||
}, |
|||
"gridPos": { |
|||
"h": 8, |
|||
"w": 12, |
|||
"x": 0, |
|||
"y": 8 |
|||
}, |
|||
"id": 3, |
|||
"options": { |
|||
"legend": { |
|||
"displayMode": "visible", |
|||
"placement": "bottom", |
|||
"showLegend": true |
|||
}, |
|||
"pieType": "pie", |
|||
"reduceOptions": { |
|||
"values": false, |
|||
"calcs": [ |
|||
"lastNotNull" |
|||
], |
|||
"fields": "" |
|||
}, |
|||
"tooltip": { |
|||
"mode": "single", |
|||
"sort": "none" |
|||
} |
|||
}, |
|||
"targets": [ |
|||
{ |
|||
"datasource": { |
|||
"type": "prometheus", |
|||
"uid": "${DS_PROMETHEUS}" |
|||
}, |
|||
"expr": "count by (version) (seaweedfs_telemetry_cluster_info)", |
|||
"format": "time_series", |
|||
"legendFormat": "{{version}}", |
|||
"refId": "A" |
|||
} |
|||
], |
|||
"title": "SeaweedFS Version Distribution", |
|||
"type": "piechart" |
|||
}, |
|||
{ |
|||
"datasource": { |
|||
"type": "prometheus", |
|||
"uid": "${DS_PROMETHEUS}" |
|||
}, |
|||
"fieldConfig": { |
|||
"defaults": { |
|||
"color": { |
|||
"mode": "palette-classic" |
|||
}, |
|||
"custom": { |
|||
"hideFrom": { |
|||
"legend": false, |
|||
"tooltip": false, |
|||
"vis": false |
|||
} |
|||
}, |
|||
"mappings": [] |
|||
}, |
|||
"overrides": [] |
|||
}, |
|||
"gridPos": { |
|||
"h": 8, |
|||
"w": 12, |
|||
"x": 12, |
|||
"y": 8 |
|||
}, |
|||
"id": 4, |
|||
"options": { |
|||
"legend": { |
|||
"displayMode": "visible", |
|||
"placement": "bottom", |
|||
"showLegend": true |
|||
}, |
|||
"pieType": "pie", |
|||
"reduceOptions": { |
|||
"values": false, |
|||
"calcs": [ |
|||
"lastNotNull" |
|||
], |
|||
"fields": "" |
|||
}, |
|||
"tooltip": { |
|||
"mode": "single", |
|||
"sort": "none" |
|||
} |
|||
}, |
|||
"targets": [ |
|||
{ |
|||
"datasource": { |
|||
"type": "prometheus", |
|||
"uid": "${DS_PROMETHEUS}" |
|||
}, |
|||
"expr": "count by (os) (seaweedfs_telemetry_cluster_info)", |
|||
"format": "time_series", |
|||
"legendFormat": "{{os}}", |
|||
"refId": "A" |
|||
} |
|||
], |
|||
"title": "Operating System Distribution", |
|||
"type": "piechart" |
|||
}, |
|||
{ |
|||
"datasource": { |
|||
"type": "prometheus", |
|||
"uid": "${DS_PROMETHEUS}" |
|||
}, |
|||
"fieldConfig": { |
|||
"defaults": { |
|||
"color": { |
|||
"mode": "palette-classic" |
|||
}, |
|||
"custom": { |
|||
"axisLabel": "", |
|||
"axisPlacement": "auto", |
|||
"barAlignment": 0, |
|||
"drawStyle": "line", |
|||
"fillOpacity": 0, |
|||
"gradientMode": "none", |
|||
"hideFrom": { |
|||
"legend": false, |
|||
"tooltip": false, |
|||
"vis": false |
|||
}, |
|||
"lineInterpolation": "linear", |
|||
"lineWidth": 1, |
|||
"pointSize": 5, |
|||
"scaleDistribution": { |
|||
"type": "linear" |
|||
}, |
|||
"showPoints": "auto", |
|||
"spanNulls": false, |
|||
"stacking": { |
|||
"group": "A", |
|||
"mode": "none" |
|||
}, |
|||
"thresholdsStyle": { |
|||
"mode": "off" |
|||
} |
|||
}, |
|||
"mappings": [], |
|||
"thresholds": { |
|||
"mode": "absolute", |
|||
"steps": [ |
|||
{ |
|||
"color": "green", |
|||
"value": null |
|||
}, |
|||
{ |
|||
"color": "red", |
|||
"value": 80 |
|||
} |
|||
] |
|||
} |
|||
}, |
|||
"overrides": [] |
|||
}, |
|||
"gridPos": { |
|||
"h": 8, |
|||
"w": 24, |
|||
"x": 0, |
|||
"y": 16 |
|||
}, |
|||
"id": 5, |
|||
"options": { |
|||
"legend": { |
|||
"calcs": [], |
|||
"displayMode": "list", |
|||
"placement": "bottom", |
|||
"showLegend": true |
|||
}, |
|||
"tooltip": { |
|||
"mode": "single", |
|||
"sort": "none" |
|||
} |
|||
}, |
|||
"targets": [ |
|||
{ |
|||
"datasource": { |
|||
"type": "prometheus", |
|||
"uid": "${DS_PROMETHEUS}" |
|||
}, |
|||
"expr": "sum(seaweedfs_telemetry_volume_servers)", |
|||
"format": "time_series", |
|||
"legendFormat": "Total Volume Servers", |
|||
"refId": "A" |
|||
} |
|||
], |
|||
"title": "Total Volume Servers Over Time", |
|||
"type": "timeseries" |
|||
}, |
|||
{ |
|||
"datasource": { |
|||
"type": "prometheus", |
|||
"uid": "${DS_PROMETHEUS}" |
|||
}, |
|||
"fieldConfig": { |
|||
"defaults": { |
|||
"color": { |
|||
"mode": "palette-classic" |
|||
}, |
|||
"custom": { |
|||
"axisLabel": "", |
|||
"axisPlacement": "auto", |
|||
"barAlignment": 0, |
|||
"drawStyle": "line", |
|||
"fillOpacity": 0, |
|||
"gradientMode": "none", |
|||
"hideFrom": { |
|||
"legend": false, |
|||
"tooltip": false, |
|||
"vis": false |
|||
}, |
|||
"lineInterpolation": "linear", |
|||
"lineWidth": 1, |
|||
"pointSize": 5, |
|||
"scaleDistribution": { |
|||
"type": "linear" |
|||
}, |
|||
"showPoints": "auto", |
|||
"spanNulls": false, |
|||
"stacking": { |
|||
"group": "A", |
|||
"mode": "none" |
|||
}, |
|||
"thresholdsStyle": { |
|||
"mode": "off" |
|||
} |
|||
}, |
|||
"mappings": [], |
|||
"thresholds": { |
|||
"mode": "absolute", |
|||
"steps": [ |
|||
{ |
|||
"color": "green", |
|||
"value": null |
|||
}, |
|||
{ |
|||
"color": "red", |
|||
"value": 80 |
|||
} |
|||
] |
|||
}, |
|||
"unit": "bytes" |
|||
}, |
|||
"overrides": [] |
|||
}, |
|||
"gridPos": { |
|||
"h": 8, |
|||
"w": 12, |
|||
"x": 0, |
|||
"y": 24 |
|||
}, |
|||
"id": 6, |
|||
"options": { |
|||
"legend": { |
|||
"calcs": [], |
|||
"displayMode": "list", |
|||
"placement": "bottom", |
|||
"showLegend": true |
|||
}, |
|||
"tooltip": { |
|||
"mode": "single", |
|||
"sort": "none" |
|||
} |
|||
}, |
|||
"targets": [ |
|||
{ |
|||
"datasource": { |
|||
"type": "prometheus", |
|||
"uid": "${DS_PROMETHEUS}" |
|||
}, |
|||
"expr": "sum(seaweedfs_telemetry_disk_bytes)", |
|||
"format": "time_series", |
|||
"legendFormat": "Total Disk Usage", |
|||
"refId": "A" |
|||
} |
|||
], |
|||
"title": "Total Disk Usage Over Time", |
|||
"type": "timeseries" |
|||
}, |
|||
{ |
|||
"datasource": { |
|||
"type": "prometheus", |
|||
"uid": "${DS_PROMETHEUS}" |
|||
}, |
|||
"fieldConfig": { |
|||
"defaults": { |
|||
"color": { |
|||
"mode": "palette-classic" |
|||
}, |
|||
"custom": { |
|||
"axisLabel": "", |
|||
"axisPlacement": "auto", |
|||
"barAlignment": 0, |
|||
"drawStyle": "line", |
|||
"fillOpacity": 0, |
|||
"gradientMode": "none", |
|||
"hideFrom": { |
|||
"legend": false, |
|||
"tooltip": false, |
|||
"vis": false |
|||
}, |
|||
"lineInterpolation": "linear", |
|||
"lineWidth": 1, |
|||
"pointSize": 5, |
|||
"scaleDistribution": { |
|||
"type": "linear" |
|||
}, |
|||
"showPoints": "auto", |
|||
"spanNulls": false, |
|||
"stacking": { |
|||
"group": "A", |
|||
"mode": "none" |
|||
}, |
|||
"thresholdsStyle": { |
|||
"mode": "off" |
|||
} |
|||
}, |
|||
"mappings": [], |
|||
"thresholds": { |
|||
"mode": "absolute", |
|||
"steps": [ |
|||
{ |
|||
"color": "green", |
|||
"value": null |
|||
}, |
|||
{ |
|||
"color": "red", |
|||
"value": 80 |
|||
} |
|||
] |
|||
} |
|||
}, |
|||
"overrides": [] |
|||
}, |
|||
"gridPos": { |
|||
"h": 8, |
|||
"w": 12, |
|||
"x": 12, |
|||
"y": 24 |
|||
}, |
|||
"id": 7, |
|||
"options": { |
|||
"legend": { |
|||
"calcs": [], |
|||
"displayMode": "list", |
|||
"placement": "bottom", |
|||
"showLegend": true |
|||
}, |
|||
"tooltip": { |
|||
"mode": "single", |
|||
"sort": "none" |
|||
} |
|||
}, |
|||
"targets": [ |
|||
{ |
|||
"datasource": { |
|||
"type": "prometheus", |
|||
"uid": "${DS_PROMETHEUS}" |
|||
}, |
|||
"expr": "sum(seaweedfs_telemetry_volume_count)", |
|||
"format": "time_series", |
|||
"legendFormat": "Total Volume Count", |
|||
"refId": "A" |
|||
} |
|||
], |
|||
"title": "Total Volume Count Over Time", |
|||
"type": "timeseries" |
|||
}, |
|||
{ |
|||
"datasource": { |
|||
"type": "prometheus", |
|||
"uid": "${DS_PROMETHEUS}" |
|||
}, |
|||
"fieldConfig": { |
|||
"defaults": { |
|||
"color": { |
|||
"mode": "palette-classic" |
|||
}, |
|||
"custom": { |
|||
"axisLabel": "", |
|||
"axisPlacement": "auto", |
|||
"barAlignment": 0, |
|||
"drawStyle": "line", |
|||
"fillOpacity": 0, |
|||
"gradientMode": "none", |
|||
"hideFrom": { |
|||
"legend": false, |
|||
"tooltip": false, |
|||
"vis": false |
|||
}, |
|||
"lineInterpolation": "linear", |
|||
"lineWidth": 1, |
|||
"pointSize": 5, |
|||
"scaleDistribution": { |
|||
"type": "linear" |
|||
}, |
|||
"showPoints": "auto", |
|||
"spanNulls": false, |
|||
"stacking": { |
|||
"group": "A", |
|||
"mode": "none" |
|||
}, |
|||
"thresholdsStyle": { |
|||
"mode": "off" |
|||
} |
|||
}, |
|||
"mappings": [], |
|||
"thresholds": { |
|||
"mode": "absolute", |
|||
"steps": [ |
|||
{ |
|||
"color": "green", |
|||
"value": null |
|||
}, |
|||
{ |
|||
"color": "red", |
|||
"value": 80 |
|||
} |
|||
] |
|||
} |
|||
}, |
|||
"overrides": [] |
|||
}, |
|||
"gridPos": { |
|||
"h": 8, |
|||
"w": 12, |
|||
"x": 0, |
|||
"y": 32 |
|||
}, |
|||
"id": 8, |
|||
"options": { |
|||
"legend": { |
|||
"calcs": [], |
|||
"displayMode": "list", |
|||
"placement": "bottom", |
|||
"showLegend": true |
|||
}, |
|||
"tooltip": { |
|||
"mode": "single", |
|||
"sort": "none" |
|||
} |
|||
}, |
|||
"targets": [ |
|||
{ |
|||
"datasource": { |
|||
"type": "prometheus", |
|||
"uid": "${DS_PROMETHEUS}" |
|||
}, |
|||
"expr": "sum(seaweedfs_telemetry_filer_count)", |
|||
"format": "time_series", |
|||
"legendFormat": "Total Filer Count", |
|||
"refId": "A" |
|||
} |
|||
], |
|||
"title": "Total Filer Servers Over Time", |
|||
"type": "timeseries" |
|||
}, |
|||
{ |
|||
"datasource": { |
|||
"type": "prometheus", |
|||
"uid": "${DS_PROMETHEUS}" |
|||
}, |
|||
"fieldConfig": { |
|||
"defaults": { |
|||
"color": { |
|||
"mode": "palette-classic" |
|||
}, |
|||
"custom": { |
|||
"axisLabel": "", |
|||
"axisPlacement": "auto", |
|||
"barAlignment": 0, |
|||
"drawStyle": "line", |
|||
"fillOpacity": 0, |
|||
"gradientMode": "none", |
|||
"hideFrom": { |
|||
"legend": false, |
|||
"tooltip": false, |
|||
"vis": false |
|||
}, |
|||
"lineInterpolation": "linear", |
|||
"lineWidth": 1, |
|||
"pointSize": 5, |
|||
"scaleDistribution": { |
|||
"type": "linear" |
|||
}, |
|||
"showPoints": "auto", |
|||
"spanNulls": false, |
|||
"stacking": { |
|||
"group": "A", |
|||
"mode": "none" |
|||
}, |
|||
"thresholdsStyle": { |
|||
"mode": "off" |
|||
} |
|||
}, |
|||
"mappings": [], |
|||
"thresholds": { |
|||
"mode": "absolute", |
|||
"steps": [ |
|||
{ |
|||
"color": "green", |
|||
"value": null |
|||
}, |
|||
{ |
|||
"color": "red", |
|||
"value": 80 |
|||
} |
|||
] |
|||
} |
|||
}, |
|||
"overrides": [] |
|||
}, |
|||
"gridPos": { |
|||
"h": 8, |
|||
"w": 12, |
|||
"x": 12, |
|||
"y": 32 |
|||
}, |
|||
"id": 9, |
|||
"options": { |
|||
"legend": { |
|||
"calcs": [], |
|||
"displayMode": "list", |
|||
"placement": "bottom", |
|||
"showLegend": true |
|||
}, |
|||
"tooltip": { |
|||
"mode": "single", |
|||
"sort": "none" |
|||
} |
|||
}, |
|||
"targets": [ |
|||
{ |
|||
"datasource": { |
|||
"type": "prometheus", |
|||
"uid": "${DS_PROMETHEUS}" |
|||
}, |
|||
"expr": "sum(seaweedfs_telemetry_broker_count)", |
|||
"format": "time_series", |
|||
"legendFormat": "Total Broker Count", |
|||
"refId": "A" |
|||
} |
|||
], |
|||
"title": "Total Broker Servers Over Time", |
|||
"type": "timeseries" |
|||
} |
|||
], |
|||
"refresh": "5m", |
|||
"schemaVersion": 38, |
|||
"style": "dark", |
|||
"tags": [ |
|||
"seaweedfs", |
|||
"telemetry" |
|||
], |
|||
"templating": { |
|||
"list": [] |
|||
}, |
|||
"time": { |
|||
"from": "now-24h", |
|||
"to": "now" |
|||
}, |
|||
"timepicker": {}, |
|||
"timezone": "", |
|||
"title": "SeaweedFS Telemetry Dashboard", |
|||
"uid": "seaweedfs-telemetry", |
|||
"version": 1, |
|||
"weekStart": "" |
|||
} |
@ -0,0 +1,12 @@ |
|||
apiVersion: 1 |
|||
|
|||
providers: |
|||
- name: 'seaweedfs' |
|||
orgId: 1 |
|||
folder: '' |
|||
type: file |
|||
disableDeletion: false |
|||
updateIntervalSeconds: 10 |
|||
allowUiUpdates: true |
|||
options: |
|||
path: /var/lib/grafana/dashboards |
@ -0,0 +1,9 @@ |
|||
apiVersion: 1 |
|||
|
|||
datasources: |
|||
- name: Prometheus |
|||
type: prometheus |
|||
access: proxy |
|||
url: http://prometheus:9090 |
|||
isDefault: true |
|||
editable: true |
@ -0,0 +1,15 @@ |
|||
global: |
|||
scrape_interval: 15s |
|||
evaluation_interval: 15s |
|||
|
|||
rule_files: |
|||
# - "first_rules.yml" |
|||
# - "second_rules.yml" |
|||
|
|||
scrape_configs: |
|||
- job_name: 'seaweedfs-telemetry' |
|||
static_configs: |
|||
- targets: ['telemetry-server:8080'] |
|||
scrape_interval: 30s |
|||
metrics_path: '/metrics' |
|||
scrape_timeout: 10s |
@ -0,0 +1,398 @@ |
|||
// Code generated by protoc-gen-go. DO NOT EDIT.
|
|||
// versions:
|
|||
// protoc-gen-go v1.34.2
|
|||
// protoc v5.29.3
|
|||
// source: proto/telemetry.proto
|
|||
|
|||
package proto |
|||
|
|||
import ( |
|||
protoreflect "google.golang.org/protobuf/reflect/protoreflect" |
|||
protoimpl "google.golang.org/protobuf/runtime/protoimpl" |
|||
reflect "reflect" |
|||
sync "sync" |
|||
) |
|||
|
|||
const ( |
|||
// Verify that this generated code is sufficiently up-to-date.
|
|||
_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) |
|||
// Verify that runtime/protoimpl is sufficiently up-to-date.
|
|||
_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) |
|||
) |
|||
|
|||
// TelemetryData represents cluster-level telemetry information
|
|||
type TelemetryData struct { |
|||
state protoimpl.MessageState |
|||
sizeCache protoimpl.SizeCache |
|||
unknownFields protoimpl.UnknownFields |
|||
|
|||
// Unique cluster identifier (generated in-memory)
|
|||
ClusterId string `protobuf:"bytes,1,opt,name=cluster_id,json=clusterId,proto3" json:"cluster_id,omitempty"` |
|||
// SeaweedFS version
|
|||
Version string `protobuf:"bytes,2,opt,name=version,proto3" json:"version,omitempty"` |
|||
// Operating system (e.g., "linux/amd64")
|
|||
Os string `protobuf:"bytes,3,opt,name=os,proto3" json:"os,omitempty"` |
|||
// Enabled features (e.g., ["filer", "s3api", "mq"])
|
|||
Features []string `protobuf:"bytes,4,rep,name=features,proto3" json:"features,omitempty"` |
|||
// Deployment type ("standalone", "cluster", "master-only", "volume-only")
|
|||
Deployment string `protobuf:"bytes,5,opt,name=deployment,proto3" json:"deployment,omitempty"` |
|||
// Number of volume servers in the cluster
|
|||
VolumeServerCount int32 `protobuf:"varint,6,opt,name=volume_server_count,json=volumeServerCount,proto3" json:"volume_server_count,omitempty"` |
|||
// Total disk usage across all volume servers (in bytes)
|
|||
TotalDiskBytes uint64 `protobuf:"varint,7,opt,name=total_disk_bytes,json=totalDiskBytes,proto3" json:"total_disk_bytes,omitempty"` |
|||
// Total number of volumes in the cluster
|
|||
TotalVolumeCount int32 `protobuf:"varint,8,opt,name=total_volume_count,json=totalVolumeCount,proto3" json:"total_volume_count,omitempty"` |
|||
// Number of filer servers in the cluster
|
|||
FilerCount int32 `protobuf:"varint,9,opt,name=filer_count,json=filerCount,proto3" json:"filer_count,omitempty"` |
|||
// Number of broker servers in the cluster
|
|||
BrokerCount int32 `protobuf:"varint,10,opt,name=broker_count,json=brokerCount,proto3" json:"broker_count,omitempty"` |
|||
// Unix timestamp when the data was collected
|
|||
Timestamp int64 `protobuf:"varint,11,opt,name=timestamp,proto3" json:"timestamp,omitempty"` |
|||
} |
|||
|
|||
func (x *TelemetryData) Reset() { |
|||
*x = TelemetryData{} |
|||
if protoimpl.UnsafeEnabled { |
|||
mi := &file_proto_telemetry_proto_msgTypes[0] |
|||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) |
|||
ms.StoreMessageInfo(mi) |
|||
} |
|||
} |
|||
|
|||
func (x *TelemetryData) String() string { |
|||
return protoimpl.X.MessageStringOf(x) |
|||
} |
|||
|
|||
func (*TelemetryData) ProtoMessage() {} |
|||
|
|||
func (x *TelemetryData) ProtoReflect() protoreflect.Message { |
|||
mi := &file_proto_telemetry_proto_msgTypes[0] |
|||
if protoimpl.UnsafeEnabled && x != nil { |
|||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) |
|||
if ms.LoadMessageInfo() == nil { |
|||
ms.StoreMessageInfo(mi) |
|||
} |
|||
return ms |
|||
} |
|||
return mi.MessageOf(x) |
|||
} |
|||
|
|||
// Deprecated: Use TelemetryData.ProtoReflect.Descriptor instead.
|
|||
func (*TelemetryData) Descriptor() ([]byte, []int) { |
|||
return file_proto_telemetry_proto_rawDescGZIP(), []int{0} |
|||
} |
|||
|
|||
func (x *TelemetryData) GetClusterId() string { |
|||
if x != nil { |
|||
return x.ClusterId |
|||
} |
|||
return "" |
|||
} |
|||
|
|||
func (x *TelemetryData) GetVersion() string { |
|||
if x != nil { |
|||
return x.Version |
|||
} |
|||
return "" |
|||
} |
|||
|
|||
func (x *TelemetryData) GetOs() string { |
|||
if x != nil { |
|||
return x.Os |
|||
} |
|||
return "" |
|||
} |
|||
|
|||
func (x *TelemetryData) GetFeatures() []string { |
|||
if x != nil { |
|||
return x.Features |
|||
} |
|||
return nil |
|||
} |
|||
|
|||
func (x *TelemetryData) GetDeployment() string { |
|||
if x != nil { |
|||
return x.Deployment |
|||
} |
|||
return "" |
|||
} |
|||
|
|||
func (x *TelemetryData) GetVolumeServerCount() int32 { |
|||
if x != nil { |
|||
return x.VolumeServerCount |
|||
} |
|||
return 0 |
|||
} |
|||
|
|||
func (x *TelemetryData) GetTotalDiskBytes() uint64 { |
|||
if x != nil { |
|||
return x.TotalDiskBytes |
|||
} |
|||
return 0 |
|||
} |
|||
|
|||
func (x *TelemetryData) GetTotalVolumeCount() int32 { |
|||
if x != nil { |
|||
return x.TotalVolumeCount |
|||
} |
|||
return 0 |
|||
} |
|||
|
|||
func (x *TelemetryData) GetFilerCount() int32 { |
|||
if x != nil { |
|||
return x.FilerCount |
|||
} |
|||
return 0 |
|||
} |
|||
|
|||
func (x *TelemetryData) GetBrokerCount() int32 { |
|||
if x != nil { |
|||
return x.BrokerCount |
|||
} |
|||
return 0 |
|||
} |
|||
|
|||
func (x *TelemetryData) GetTimestamp() int64 { |
|||
if x != nil { |
|||
return x.Timestamp |
|||
} |
|||
return 0 |
|||
} |
|||
|
|||
// TelemetryRequest is sent from SeaweedFS clusters to the telemetry server
|
|||
type TelemetryRequest struct { |
|||
state protoimpl.MessageState |
|||
sizeCache protoimpl.SizeCache |
|||
unknownFields protoimpl.UnknownFields |
|||
|
|||
Data *TelemetryData `protobuf:"bytes,1,opt,name=data,proto3" json:"data,omitempty"` |
|||
} |
|||
|
|||
func (x *TelemetryRequest) Reset() { |
|||
*x = TelemetryRequest{} |
|||
if protoimpl.UnsafeEnabled { |
|||
mi := &file_proto_telemetry_proto_msgTypes[1] |
|||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) |
|||
ms.StoreMessageInfo(mi) |
|||
} |
|||
} |
|||
|
|||
func (x *TelemetryRequest) String() string { |
|||
return protoimpl.X.MessageStringOf(x) |
|||
} |
|||
|
|||
func (*TelemetryRequest) ProtoMessage() {} |
|||
|
|||
func (x *TelemetryRequest) ProtoReflect() protoreflect.Message { |
|||
mi := &file_proto_telemetry_proto_msgTypes[1] |
|||
if protoimpl.UnsafeEnabled && x != nil { |
|||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) |
|||
if ms.LoadMessageInfo() == nil { |
|||
ms.StoreMessageInfo(mi) |
|||
} |
|||
return ms |
|||
} |
|||
return mi.MessageOf(x) |
|||
} |
|||
|
|||
// Deprecated: Use TelemetryRequest.ProtoReflect.Descriptor instead.
|
|||
func (*TelemetryRequest) Descriptor() ([]byte, []int) { |
|||
return file_proto_telemetry_proto_rawDescGZIP(), []int{1} |
|||
} |
|||
|
|||
func (x *TelemetryRequest) GetData() *TelemetryData { |
|||
if x != nil { |
|||
return x.Data |
|||
} |
|||
return nil |
|||
} |
|||
|
|||
// TelemetryResponse is returned by the telemetry server
|
|||
type TelemetryResponse struct { |
|||
state protoimpl.MessageState |
|||
sizeCache protoimpl.SizeCache |
|||
unknownFields protoimpl.UnknownFields |
|||
|
|||
Success bool `protobuf:"varint,1,opt,name=success,proto3" json:"success,omitempty"` |
|||
Message string `protobuf:"bytes,2,opt,name=message,proto3" json:"message,omitempty"` |
|||
} |
|||
|
|||
func (x *TelemetryResponse) Reset() { |
|||
*x = TelemetryResponse{} |
|||
if protoimpl.UnsafeEnabled { |
|||
mi := &file_proto_telemetry_proto_msgTypes[2] |
|||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) |
|||
ms.StoreMessageInfo(mi) |
|||
} |
|||
} |
|||
|
|||
func (x *TelemetryResponse) String() string { |
|||
return protoimpl.X.MessageStringOf(x) |
|||
} |
|||
|
|||
func (*TelemetryResponse) ProtoMessage() {} |
|||
|
|||
func (x *TelemetryResponse) ProtoReflect() protoreflect.Message { |
|||
mi := &file_proto_telemetry_proto_msgTypes[2] |
|||
if protoimpl.UnsafeEnabled && x != nil { |
|||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) |
|||
if ms.LoadMessageInfo() == nil { |
|||
ms.StoreMessageInfo(mi) |
|||
} |
|||
return ms |
|||
} |
|||
return mi.MessageOf(x) |
|||
} |
|||
|
|||
// Deprecated: Use TelemetryResponse.ProtoReflect.Descriptor instead.
|
|||
func (*TelemetryResponse) Descriptor() ([]byte, []int) { |
|||
return file_proto_telemetry_proto_rawDescGZIP(), []int{2} |
|||
} |
|||
|
|||
func (x *TelemetryResponse) GetSuccess() bool { |
|||
if x != nil { |
|||
return x.Success |
|||
} |
|||
return false |
|||
} |
|||
|
|||
func (x *TelemetryResponse) GetMessage() string { |
|||
if x != nil { |
|||
return x.Message |
|||
} |
|||
return "" |
|||
} |
|||
|
|||
var File_proto_telemetry_proto protoreflect.FileDescriptor |
|||
|
|||
var file_proto_telemetry_proto_rawDesc = []byte{ |
|||
0x0a, 0x15, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x74, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, |
|||
0x79, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x09, 0x74, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, |
|||
0x72, 0x79, 0x22, 0xfe, 0x02, 0x0a, 0x0d, 0x54, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, 0x79, |
|||
0x44, 0x61, 0x74, 0x61, 0x12, 0x1d, 0x0a, 0x0a, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, |
|||
0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, |
|||
0x72, 0x49, 0x64, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, |
|||
0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x0e, 0x0a, |
|||
0x02, 0x6f, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x6f, 0x73, 0x12, 0x1a, 0x0a, |
|||
0x08, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x09, 0x52, |
|||
0x08, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x12, 0x1e, 0x0a, 0x0a, 0x64, 0x65, 0x70, |
|||
0x6c, 0x6f, 0x79, 0x6d, 0x65, 0x6e, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x64, |
|||
0x65, 0x70, 0x6c, 0x6f, 0x79, 0x6d, 0x65, 0x6e, 0x74, 0x12, 0x2e, 0x0a, 0x13, 0x76, 0x6f, 0x6c, |
|||
0x75, 0x6d, 0x65, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, |
|||
0x18, 0x06, 0x20, 0x01, 0x28, 0x05, 0x52, 0x11, 0x76, 0x6f, 0x6c, 0x75, 0x6d, 0x65, 0x53, 0x65, |
|||
0x72, 0x76, 0x65, 0x72, 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x12, 0x28, 0x0a, 0x10, 0x74, 0x6f, 0x74, |
|||
0x61, 0x6c, 0x5f, 0x64, 0x69, 0x73, 0x6b, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x07, 0x20, |
|||
0x01, 0x28, 0x04, 0x52, 0x0e, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x44, 0x69, 0x73, 0x6b, 0x42, 0x79, |
|||
0x74, 0x65, 0x73, 0x12, 0x2c, 0x0a, 0x12, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x76, 0x6f, 0x6c, |
|||
0x75, 0x6d, 0x65, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x08, 0x20, 0x01, 0x28, 0x05, 0x52, |
|||
0x10, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x56, 0x6f, 0x6c, 0x75, 0x6d, 0x65, 0x43, 0x6f, 0x75, 0x6e, |
|||
0x74, 0x12, 0x1f, 0x0a, 0x0b, 0x66, 0x69, 0x6c, 0x65, 0x72, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, |
|||
0x18, 0x09, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x66, 0x69, 0x6c, 0x65, 0x72, 0x43, 0x6f, 0x75, |
|||
0x6e, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x62, 0x72, 0x6f, 0x6b, 0x65, 0x72, 0x5f, 0x63, 0x6f, 0x75, |
|||
0x6e, 0x74, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x62, 0x72, 0x6f, 0x6b, 0x65, 0x72, |
|||
0x43, 0x6f, 0x75, 0x6e, 0x74, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, |
|||
0x6d, 0x70, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, |
|||
0x61, 0x6d, 0x70, 0x22, 0x40, 0x0a, 0x10, 0x54, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, 0x79, |
|||
0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x2c, 0x0a, 0x04, 0x64, 0x61, 0x74, 0x61, 0x18, |
|||
0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x74, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, |
|||
0x79, 0x2e, 0x54, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, 0x79, 0x44, 0x61, 0x74, 0x61, 0x52, |
|||
0x04, 0x64, 0x61, 0x74, 0x61, 0x22, 0x47, 0x0a, 0x11, 0x54, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, |
|||
0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75, |
|||
0x63, 0x63, 0x65, 0x73, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x73, 0x75, 0x63, |
|||
0x63, 0x65, 0x73, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, |
|||
0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x42, 0x30, |
|||
0x5a, 0x2e, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x73, 0x65, 0x61, |
|||
0x77, 0x65, 0x65, 0x64, 0x66, 0x73, 0x2f, 0x73, 0x65, 0x61, 0x77, 0x65, 0x65, 0x64, 0x66, 0x73, |
|||
0x2f, 0x74, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, 0x79, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, |
|||
0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, |
|||
} |
|||
|
|||
var ( |
|||
file_proto_telemetry_proto_rawDescOnce sync.Once |
|||
file_proto_telemetry_proto_rawDescData = file_proto_telemetry_proto_rawDesc |
|||
) |
|||
|
|||
func file_proto_telemetry_proto_rawDescGZIP() []byte { |
|||
file_proto_telemetry_proto_rawDescOnce.Do(func() { |
|||
file_proto_telemetry_proto_rawDescData = protoimpl.X.CompressGZIP(file_proto_telemetry_proto_rawDescData) |
|||
}) |
|||
return file_proto_telemetry_proto_rawDescData |
|||
} |
|||
|
|||
var file_proto_telemetry_proto_msgTypes = make([]protoimpl.MessageInfo, 3) |
|||
var file_proto_telemetry_proto_goTypes = []any{ |
|||
(*TelemetryData)(nil), // 0: telemetry.TelemetryData
|
|||
(*TelemetryRequest)(nil), // 1: telemetry.TelemetryRequest
|
|||
(*TelemetryResponse)(nil), // 2: telemetry.TelemetryResponse
|
|||
} |
|||
var file_proto_telemetry_proto_depIdxs = []int32{ |
|||
0, // 0: telemetry.TelemetryRequest.data:type_name -> telemetry.TelemetryData
|
|||
1, // [1:1] is the sub-list for method output_type
|
|||
1, // [1:1] is the sub-list for method input_type
|
|||
1, // [1:1] is the sub-list for extension type_name
|
|||
1, // [1:1] is the sub-list for extension extendee
|
|||
0, // [0:1] is the sub-list for field type_name
|
|||
} |
|||
|
|||
func init() { file_proto_telemetry_proto_init() } |
|||
func file_proto_telemetry_proto_init() { |
|||
if File_proto_telemetry_proto != nil { |
|||
return |
|||
} |
|||
if !protoimpl.UnsafeEnabled { |
|||
file_proto_telemetry_proto_msgTypes[0].Exporter = func(v any, i int) any { |
|||
switch v := v.(*TelemetryData); i { |
|||
case 0: |
|||
return &v.state |
|||
case 1: |
|||
return &v.sizeCache |
|||
case 2: |
|||
return &v.unknownFields |
|||
default: |
|||
return nil |
|||
} |
|||
} |
|||
file_proto_telemetry_proto_msgTypes[1].Exporter = func(v any, i int) any { |
|||
switch v := v.(*TelemetryRequest); i { |
|||
case 0: |
|||
return &v.state |
|||
case 1: |
|||
return &v.sizeCache |
|||
case 2: |
|||
return &v.unknownFields |
|||
default: |
|||
return nil |
|||
} |
|||
} |
|||
file_proto_telemetry_proto_msgTypes[2].Exporter = func(v any, i int) any { |
|||
switch v := v.(*TelemetryResponse); i { |
|||
case 0: |
|||
return &v.state |
|||
case 1: |
|||
return &v.sizeCache |
|||
case 2: |
|||
return &v.unknownFields |
|||
default: |
|||
return nil |
|||
} |
|||
} |
|||
} |
|||
type x struct{} |
|||
out := protoimpl.TypeBuilder{ |
|||
File: protoimpl.DescBuilder{ |
|||
GoPackagePath: reflect.TypeOf(x{}).PkgPath(), |
|||
RawDescriptor: file_proto_telemetry_proto_rawDesc, |
|||
NumEnums: 0, |
|||
NumMessages: 3, |
|||
NumExtensions: 0, |
|||
NumServices: 0, |
|||
}, |
|||
GoTypes: file_proto_telemetry_proto_goTypes, |
|||
DependencyIndexes: file_proto_telemetry_proto_depIdxs, |
|||
MessageInfos: file_proto_telemetry_proto_msgTypes, |
|||
}.Build() |
|||
File_proto_telemetry_proto = out.File |
|||
file_proto_telemetry_proto_rawDesc = nil |
|||
file_proto_telemetry_proto_goTypes = nil |
|||
file_proto_telemetry_proto_depIdxs = nil |
|||
} |
@ -0,0 +1,52 @@ |
|||
syntax = "proto3"; |
|||
|
|||
package telemetry; |
|||
|
|||
option go_package = "github.com/seaweedfs/seaweedfs/telemetry/proto"; |
|||
|
|||
// TelemetryData represents cluster-level telemetry information |
|||
message TelemetryData { |
|||
// Unique cluster identifier (generated in-memory) |
|||
string cluster_id = 1; |
|||
|
|||
// SeaweedFS version |
|||
string version = 2; |
|||
|
|||
// Operating system (e.g., "linux/amd64") |
|||
string os = 3; |
|||
|
|||
// Enabled features (e.g., ["filer", "s3api", "mq"]) |
|||
repeated string features = 4; |
|||
|
|||
// Deployment type ("standalone", "cluster", "master-only", "volume-only") |
|||
string deployment = 5; |
|||
|
|||
// Number of volume servers in the cluster |
|||
int32 volume_server_count = 6; |
|||
|
|||
// Total disk usage across all volume servers (in bytes) |
|||
uint64 total_disk_bytes = 7; |
|||
|
|||
// Total number of volumes in the cluster |
|||
int32 total_volume_count = 8; |
|||
|
|||
// Number of filer servers in the cluster |
|||
int32 filer_count = 9; |
|||
|
|||
// Number of broker servers in the cluster |
|||
int32 broker_count = 10; |
|||
|
|||
// Unix timestamp when the data was collected |
|||
int64 timestamp = 11; |
|||
} |
|||
|
|||
// TelemetryRequest is sent from SeaweedFS clusters to the telemetry server |
|||
message TelemetryRequest { |
|||
TelemetryData data = 1; |
|||
} |
|||
|
|||
// TelemetryResponse is returned by the telemetry server |
|||
message TelemetryResponse { |
|||
bool success = 1; |
|||
string message = 2; |
|||
} |
@ -0,0 +1,18 @@ |
|||
FROM golang:1.21-alpine AS builder |
|||
|
|||
WORKDIR /app |
|||
COPY go.mod go.sum ./ |
|||
RUN go mod download |
|||
|
|||
COPY . . |
|||
RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -ldflags '-extldflags "-static"' -o telemetry-server . |
|||
|
|||
FROM alpine:latest |
|||
RUN apk --no-cache add ca-certificates |
|||
WORKDIR /root/ |
|||
|
|||
COPY --from=builder /app/telemetry-server . |
|||
|
|||
EXPOSE 8080 |
|||
|
|||
CMD ["./telemetry-server"] |
@ -0,0 +1,97 @@ |
|||
.PHONY: build run clean test deps proto integration-test test-all |
|||
|
|||
# Build the telemetry server
|
|||
build: |
|||
go build -o telemetry-server . |
|||
|
|||
# Run the server in development mode
|
|||
run: |
|||
go run . -port=8080 -dashboard=true -cleanup=1h -max-age=24h |
|||
|
|||
# Run the server in production mode
|
|||
run-prod: |
|||
./telemetry-server -port=8080 -dashboard=true -cleanup=24h -max-age=720h |
|||
|
|||
# Clean build artifacts
|
|||
clean: |
|||
rm -f telemetry-server |
|||
rm -f ../test/telemetry-server-test.log |
|||
go clean |
|||
|
|||
# Run unit tests
|
|||
test: |
|||
go test ./... |
|||
|
|||
# Run integration tests
|
|||
integration-test: |
|||
@echo "🧪 Running telemetry integration tests..." |
|||
cd ../../ && go run telemetry/test/integration.go |
|||
|
|||
# Run all tests (unit + integration)
|
|||
test-all: test integration-test |
|||
|
|||
# Install dependencies
|
|||
deps: |
|||
go mod download |
|||
go mod tidy |
|||
|
|||
# Generate protobuf code (requires protoc)
|
|||
proto: |
|||
cd .. && protoc --go_out=. --go_opt=paths=source_relative proto/telemetry.proto |
|||
|
|||
# Build Docker image
|
|||
docker-build: |
|||
docker build -t seaweedfs-telemetry . |
|||
|
|||
# Run with Docker
|
|||
docker-run: |
|||
docker run -p 8080:8080 seaweedfs-telemetry -port=8080 -dashboard=true |
|||
|
|||
# Development with auto-reload (requires air: go install github.com/cosmtrek/air@latest)
|
|||
dev: |
|||
air |
|||
|
|||
# Check if protoc is available
|
|||
check-protoc: |
|||
@which protoc > /dev/null || (echo "protoc is required for proto generation. Install from https://grpc.io/docs/protoc-installation/" && exit 1) |
|||
|
|||
# Full development setup
|
|||
setup: check-protoc deps proto build |
|||
|
|||
# Run a quick smoke test
|
|||
smoke-test: build |
|||
@echo "🔥 Running smoke test..." |
|||
@timeout 10s ./telemetry-server -port=18081 > /dev/null 2>&1 & \
|
|||
SERVER_PID=$$!; \
|
|||
sleep 2; \
|
|||
if curl -s http://localhost:18081/health > /dev/null; then \
|
|||
echo "✅ Smoke test passed - server responds to health check"; \
|
|||
else \
|
|||
echo "❌ Smoke test failed - server not responding"; \
|
|||
exit 1; \
|
|||
fi; \
|
|||
kill $$SERVER_PID 2>/dev/null || true |
|||
|
|||
# Continuous integration target
|
|||
ci: deps proto build test integration-test |
|||
@echo "🎉 All CI tests passed!" |
|||
|
|||
# Help
|
|||
help: |
|||
@echo "Available targets:" |
|||
@echo " build - Build the telemetry server binary" |
|||
@echo " run - Run server in development mode" |
|||
@echo " run-prod - Run server in production mode" |
|||
@echo " clean - Clean build artifacts" |
|||
@echo " test - Run unit tests" |
|||
@echo " integration-test- Run integration tests" |
|||
@echo " test-all - Run all tests (unit + integration)" |
|||
@echo " deps - Install Go dependencies" |
|||
@echo " proto - Generate protobuf code" |
|||
@echo " docker-build - Build Docker image" |
|||
@echo " docker-run - Run with Docker" |
|||
@echo " dev - Run with auto-reload (requires air)" |
|||
@echo " smoke-test - Quick server health check" |
|||
@echo " setup - Full development setup" |
|||
@echo " ci - Continuous integration (all tests)" |
|||
@echo " help - Show this help" |
@ -0,0 +1,152 @@ |
|||
package api |
|||
|
|||
import ( |
|||
"encoding/json" |
|||
"io" |
|||
"net/http" |
|||
"strconv" |
|||
"time" |
|||
|
|||
"github.com/seaweedfs/seaweedfs/telemetry/proto" |
|||
"github.com/seaweedfs/seaweedfs/telemetry/server/storage" |
|||
protobuf "google.golang.org/protobuf/proto" |
|||
) |
|||
|
|||
type Handler struct { |
|||
storage *storage.PrometheusStorage |
|||
} |
|||
|
|||
func NewHandler(storage *storage.PrometheusStorage) *Handler { |
|||
return &Handler{storage: storage} |
|||
} |
|||
|
|||
func (h *Handler) CollectTelemetry(w http.ResponseWriter, r *http.Request) { |
|||
if r.Method != http.MethodPost { |
|||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) |
|||
return |
|||
} |
|||
|
|||
contentType := r.Header.Get("Content-Type") |
|||
|
|||
// Only accept protobuf content type
|
|||
if contentType != "application/x-protobuf" && contentType != "application/protobuf" { |
|||
http.Error(w, "Content-Type must be application/x-protobuf", http.StatusUnsupportedMediaType) |
|||
return |
|||
} |
|||
|
|||
// Read protobuf request
|
|||
body, err := io.ReadAll(r.Body) |
|||
if err != nil { |
|||
http.Error(w, "Failed to read request body", http.StatusBadRequest) |
|||
return |
|||
} |
|||
|
|||
req := &proto.TelemetryRequest{} |
|||
if err := protobuf.Unmarshal(body, req); err != nil { |
|||
http.Error(w, "Invalid protobuf data", http.StatusBadRequest) |
|||
return |
|||
} |
|||
|
|||
data := req.Data |
|||
if data == nil { |
|||
http.Error(w, "Missing telemetry data", http.StatusBadRequest) |
|||
return |
|||
} |
|||
|
|||
// Validate required fields
|
|||
if data.ClusterId == "" || data.Version == "" || data.Os == "" { |
|||
http.Error(w, "Missing required fields", http.StatusBadRequest) |
|||
return |
|||
} |
|||
|
|||
// Set timestamp if not provided
|
|||
if data.Timestamp == 0 { |
|||
data.Timestamp = time.Now().Unix() |
|||
} |
|||
|
|||
// Store the telemetry data
|
|||
if err := h.storage.StoreTelemetry(data); err != nil { |
|||
http.Error(w, "Failed to store data", http.StatusInternalServerError) |
|||
return |
|||
} |
|||
|
|||
// Return protobuf response
|
|||
resp := &proto.TelemetryResponse{ |
|||
Success: true, |
|||
Message: "Telemetry data received", |
|||
} |
|||
|
|||
respData, err := protobuf.Marshal(resp) |
|||
if err != nil { |
|||
http.Error(w, "Failed to marshal response", http.StatusInternalServerError) |
|||
return |
|||
} |
|||
|
|||
w.Header().Set("Content-Type", "application/x-protobuf") |
|||
w.WriteHeader(http.StatusOK) |
|||
w.Write(respData) |
|||
} |
|||
|
|||
func (h *Handler) GetStats(w http.ResponseWriter, r *http.Request) { |
|||
if r.Method != http.MethodGet { |
|||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) |
|||
return |
|||
} |
|||
|
|||
stats, err := h.storage.GetStats() |
|||
if err != nil { |
|||
http.Error(w, "Failed to get stats", http.StatusInternalServerError) |
|||
return |
|||
} |
|||
|
|||
w.Header().Set("Content-Type", "application/json") |
|||
json.NewEncoder(w).Encode(stats) |
|||
} |
|||
|
|||
func (h *Handler) GetInstances(w http.ResponseWriter, r *http.Request) { |
|||
if r.Method != http.MethodGet { |
|||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) |
|||
return |
|||
} |
|||
|
|||
limitStr := r.URL.Query().Get("limit") |
|||
limit := 100 // default
|
|||
if limitStr != "" { |
|||
if l, err := strconv.Atoi(limitStr); err == nil && l > 0 && l <= 1000 { |
|||
limit = l |
|||
} |
|||
} |
|||
|
|||
instances, err := h.storage.GetInstances(limit) |
|||
if err != nil { |
|||
http.Error(w, "Failed to get instances", http.StatusInternalServerError) |
|||
return |
|||
} |
|||
|
|||
w.Header().Set("Content-Type", "application/json") |
|||
json.NewEncoder(w).Encode(instances) |
|||
} |
|||
|
|||
func (h *Handler) GetMetrics(w http.ResponseWriter, r *http.Request) { |
|||
if r.Method != http.MethodGet { |
|||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) |
|||
return |
|||
} |
|||
|
|||
daysStr := r.URL.Query().Get("days") |
|||
days := 30 // default
|
|||
if daysStr != "" { |
|||
if d, err := strconv.Atoi(daysStr); err == nil && d > 0 && d <= 365 { |
|||
days = d |
|||
} |
|||
} |
|||
|
|||
metrics, err := h.storage.GetMetrics(days) |
|||
if err != nil { |
|||
http.Error(w, "Failed to get metrics", http.StatusInternalServerError) |
|||
return |
|||
} |
|||
|
|||
w.Header().Set("Content-Type", "application/json") |
|||
json.NewEncoder(w).Encode(metrics) |
|||
} |
@ -0,0 +1,278 @@ |
|||
package dashboard |
|||
|
|||
import ( |
|||
"net/http" |
|||
) |
|||
|
|||
type Handler struct{} |
|||
|
|||
func NewHandler() *Handler { |
|||
return &Handler{} |
|||
} |
|||
|
|||
func (h *Handler) ServeIndex(w http.ResponseWriter, r *http.Request) { |
|||
html := `<!DOCTYPE html> |
|||
<html lang="en"> |
|||
<head> |
|||
<meta charset="UTF-8"> |
|||
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|||
<title>SeaweedFS Telemetry Dashboard</title> |
|||
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script> |
|||
<style> |
|||
body { |
|||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; |
|||
margin: 0; |
|||
padding: 20px; |
|||
background-color: #f5f5f5; |
|||
} |
|||
.container { |
|||
max-width: 1200px; |
|||
margin: 0 auto; |
|||
} |
|||
.header { |
|||
background: white; |
|||
padding: 20px; |
|||
border-radius: 8px; |
|||
margin-bottom: 20px; |
|||
box-shadow: 0 2px 4px rgba(0,0,0,0.1); |
|||
} |
|||
.stats-grid { |
|||
display: grid; |
|||
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); |
|||
gap: 20px; |
|||
margin-bottom: 20px; |
|||
} |
|||
.stat-card { |
|||
background: white; |
|||
padding: 20px; |
|||
border-radius: 8px; |
|||
box-shadow: 0 2px 4px rgba(0,0,0,0.1); |
|||
} |
|||
.stat-value { |
|||
font-size: 2em; |
|||
font-weight: bold; |
|||
color: #2196F3; |
|||
} |
|||
.stat-label { |
|||
color: #666; |
|||
margin-top: 5px; |
|||
} |
|||
.chart-container { |
|||
background: white; |
|||
padding: 20px; |
|||
border-radius: 8px; |
|||
margin-bottom: 20px; |
|||
box-shadow: 0 2px 4px rgba(0,0,0,0.1); |
|||
} |
|||
.chart-title { |
|||
font-size: 1.2em; |
|||
font-weight: bold; |
|||
margin-bottom: 15px; |
|||
} |
|||
.loading { |
|||
text-align: center; |
|||
padding: 40px; |
|||
color: #666; |
|||
} |
|||
.error { |
|||
background: #ffebee; |
|||
color: #c62828; |
|||
padding: 15px; |
|||
border-radius: 4px; |
|||
margin: 10px 0; |
|||
} |
|||
</style> |
|||
</head> |
|||
<body> |
|||
<div class="container"> |
|||
<div class="header"> |
|||
<h1>SeaweedFS Telemetry Dashboard</h1> |
|||
<p>Privacy-respecting usage analytics for SeaweedFS</p> |
|||
</div> |
|||
|
|||
<div id="loading" class="loading">Loading telemetry data...</div> |
|||
<div id="error" class="error" style="display: none;"></div> |
|||
|
|||
<div id="dashboard" style="display: none;"> |
|||
<div class="stats-grid"> |
|||
<div class="stat-card"> |
|||
<div class="stat-value" id="totalInstances">-</div> |
|||
<div class="stat-label">Total Instances (30 days)</div> |
|||
</div> |
|||
<div class="stat-card"> |
|||
<div class="stat-value" id="activeInstances">-</div> |
|||
<div class="stat-label">Active Instances (7 days)</div> |
|||
</div> |
|||
<div class="stat-card"> |
|||
<div class="stat-value" id="totalVersions">-</div> |
|||
<div class="stat-label">Different Versions</div> |
|||
</div> |
|||
<div class="stat-card"> |
|||
<div class="stat-value" id="totalOS">-</div> |
|||
<div class="stat-label">Operating Systems</div> |
|||
</div> |
|||
</div> |
|||
|
|||
<div class="chart-container"> |
|||
<div class="chart-title">Version Distribution</div> |
|||
<canvas id="versionChart" width="400" height="200"></canvas> |
|||
</div> |
|||
|
|||
<div class="chart-container"> |
|||
<div class="chart-title">Operating System Distribution</div> |
|||
<canvas id="osChart" width="400" height="200"></canvas> |
|||
</div> |
|||
|
|||
<div class="chart-container"> |
|||
<div class="chart-title">Deployment Types</div> |
|||
<canvas id="deploymentChart" width="400" height="200"></canvas> |
|||
</div> |
|||
|
|||
<div class="chart-container"> |
|||
<div class="chart-title">Volume Servers Over Time</div> |
|||
<canvas id="serverChart" width="400" height="200"></canvas> |
|||
</div> |
|||
|
|||
<div class="chart-container"> |
|||
<div class="chart-title">Total Disk Usage Over Time</div> |
|||
<canvas id="diskChart" width="400" height="200"></canvas> |
|||
</div> |
|||
</div> |
|||
</div> |
|||
|
|||
<script> |
|||
let charts = {}; |
|||
|
|||
async function loadDashboard() { |
|||
try { |
|||
// Load stats
|
|||
const statsResponse = await fetch('/api/stats'); |
|||
const stats = await statsResponse.json(); |
|||
|
|||
// Load metrics
|
|||
const metricsResponse = await fetch('/api/metrics?days=30'); |
|||
const metrics = await metricsResponse.json(); |
|||
|
|||
updateStats(stats); |
|||
updateCharts(stats, metrics); |
|||
|
|||
document.getElementById('loading').style.display = 'none'; |
|||
document.getElementById('dashboard').style.display = 'block'; |
|||
} catch (error) { |
|||
console.error('Error loading dashboard:', error); |
|||
showError('Failed to load telemetry data: ' + error.message); |
|||
} |
|||
} |
|||
|
|||
function updateStats(stats) { |
|||
document.getElementById('totalInstances').textContent = stats.total_instances || 0; |
|||
document.getElementById('activeInstances').textContent = stats.active_instances || 0; |
|||
document.getElementById('totalVersions').textContent = Object.keys(stats.versions || {}).length; |
|||
document.getElementById('totalOS').textContent = Object.keys(stats.os_distribution || {}).length; |
|||
} |
|||
|
|||
function updateCharts(stats, metrics) { |
|||
// Version chart
|
|||
createPieChart('versionChart', 'Version Distribution', stats.versions || {}); |
|||
|
|||
// OS chart
|
|||
createPieChart('osChart', 'Operating System Distribution', stats.os_distribution || {}); |
|||
|
|||
// Deployment chart
|
|||
createPieChart('deploymentChart', 'Deployment Types', stats.deployments || {}); |
|||
|
|||
// Server count over time
|
|||
if (metrics.dates && metrics.server_counts) { |
|||
createLineChart('serverChart', 'Volume Servers', metrics.dates, metrics.server_counts, '#2196F3'); |
|||
} |
|||
|
|||
// Disk usage over time
|
|||
if (metrics.dates && metrics.disk_usage) { |
|||
const diskUsageGB = metrics.disk_usage.map(bytes => Math.round(bytes / (1024 * 1024 * 1024))); |
|||
createLineChart('diskChart', 'Disk Usage (GB)', metrics.dates, diskUsageGB, '#4CAF50'); |
|||
} |
|||
} |
|||
|
|||
function createPieChart(canvasId, title, data) { |
|||
const ctx = document.getElementById(canvasId).getContext('2d'); |
|||
|
|||
if (charts[canvasId]) { |
|||
charts[canvasId].destroy(); |
|||
} |
|||
|
|||
const labels = Object.keys(data); |
|||
const values = Object.values(data); |
|||
|
|||
charts[canvasId] = new Chart(ctx, { |
|||
type: 'pie', |
|||
data: { |
|||
labels: labels, |
|||
datasets: [{ |
|||
data: values, |
|||
backgroundColor: [ |
|||
'#FF6384', '#36A2EB', '#FFCE56', '#4BC0C0', |
|||
'#9966FF', '#FF9F40', '#FF6384', '#C9CBCF' |
|||
] |
|||
}] |
|||
}, |
|||
options: { |
|||
responsive: true, |
|||
plugins: { |
|||
legend: { |
|||
position: 'bottom' |
|||
} |
|||
} |
|||
} |
|||
}); |
|||
} |
|||
|
|||
function createLineChart(canvasId, label, labels, data, color) { |
|||
const ctx = document.getElementById(canvasId).getContext('2d'); |
|||
|
|||
if (charts[canvasId]) { |
|||
charts[canvasId].destroy(); |
|||
} |
|||
|
|||
charts[canvasId] = new Chart(ctx, { |
|||
type: 'line', |
|||
data: { |
|||
labels: labels, |
|||
datasets: [{ |
|||
label: label, |
|||
data: data, |
|||
borderColor: color, |
|||
backgroundColor: color + '20', |
|||
fill: true, |
|||
tension: 0.1 |
|||
}] |
|||
}, |
|||
options: { |
|||
responsive: true, |
|||
scales: { |
|||
y: { |
|||
beginAtZero: true |
|||
} |
|||
} |
|||
} |
|||
}); |
|||
} |
|||
|
|||
function showError(message) { |
|||
document.getElementById('loading').style.display = 'none'; |
|||
document.getElementById('error').style.display = 'block'; |
|||
document.getElementById('error').textContent = message; |
|||
} |
|||
|
|||
// Load dashboard on page load
|
|||
loadDashboard(); |
|||
|
|||
// Refresh every 5 minutes
|
|||
setInterval(loadDashboard, 5 * 60 * 1000); |
|||
</script> |
|||
</body> |
|||
</html>` |
|||
|
|||
w.Header().Set("Content-Type", "text/html") |
|||
w.WriteHeader(http.StatusOK) |
|||
w.Write([]byte(html)) |
|||
} |
@ -0,0 +1,31 @@ |
|||
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= |
|||
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= |
|||
github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= |
|||
github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= |
|||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= |
|||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= |
|||
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= |
|||
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= |
|||
github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= |
|||
github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= |
|||
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= |
|||
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= |
|||
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= |
|||
github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo= |
|||
github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= |
|||
github.com/prometheus/client_golang v1.17.0 h1:rl2sfwZMtSthVU752MqfjQozy7blglC+1SOtjMAMh+Q= |
|||
github.com/prometheus/client_golang v1.17.0/go.mod h1:VeL+gMmOAxkS2IqfCq0ZmHSL+LjWfWDUmp1mBz9JgUY= |
|||
github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16 h1:v7DLqVdK4VrYkVD5diGdl4sxJurKJEMnODWRJlxV9oM= |
|||
github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16/go.mod h1:oMQmHW1/JoDwqLtg57MGgP/Fb1CJEYF2imWWhWtMkYU= |
|||
github.com/prometheus/common v0.44.0 h1:+5BrQJwiBB9xsMygAB3TNvpQKOwlkc25LbISbrdOOfY= |
|||
github.com/prometheus/common v0.44.0/go.mod h1:ofAIvZbQ1e/nugmZGz4/qCb9Ap1VoSTIO7x0VV9VvuY= |
|||
github.com/prometheus/procfs v0.11.1 h1:xRC8Iq1yyca5ypa9n1EZnWZkt7dwcoRPQwX/5gwaUuI= |
|||
github.com/prometheus/procfs v0.11.1/go.mod h1:eesXgaPo1q7lBpVMoMy0ZOFTth9hBn4W/y0/p/ScXhY= |
|||
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= |
|||
golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM= |
|||
golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= |
|||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= |
|||
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= |
|||
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= |
|||
google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= |
|||
google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= |
@ -0,0 +1,111 @@ |
|||
package main |
|||
|
|||
import ( |
|||
"encoding/json" |
|||
"flag" |
|||
"fmt" |
|||
"log" |
|||
"net/http" |
|||
"time" |
|||
|
|||
"github.com/prometheus/client_golang/prometheus/promhttp" |
|||
"github.com/seaweedfs/seaweedfs/telemetry/server/api" |
|||
"github.com/seaweedfs/seaweedfs/telemetry/server/dashboard" |
|||
"github.com/seaweedfs/seaweedfs/telemetry/server/storage" |
|||
) |
|||
|
|||
var ( |
|||
port = flag.Int("port", 8080, "HTTP server port") |
|||
enableCORS = flag.Bool("cors", true, "Enable CORS for dashboard") |
|||
logRequests = flag.Bool("log", true, "Log incoming requests") |
|||
enableDashboard = flag.Bool("dashboard", true, "Enable built-in dashboard (optional when using Grafana)") |
|||
cleanupInterval = flag.Duration("cleanup", 24*time.Hour, "Cleanup interval for old instances") |
|||
maxInstanceAge = flag.Duration("max-age", 30*24*time.Hour, "Maximum age for instances before cleanup") |
|||
) |
|||
|
|||
func main() { |
|||
flag.Parse() |
|||
|
|||
// Create Prometheus storage instance
|
|||
store := storage.NewPrometheusStorage() |
|||
|
|||
// Start cleanup routine
|
|||
go func() { |
|||
ticker := time.NewTicker(*cleanupInterval) |
|||
defer ticker.Stop() |
|||
for range ticker.C { |
|||
store.CleanupOldInstances(*maxInstanceAge) |
|||
} |
|||
}() |
|||
|
|||
// Setup HTTP handlers
|
|||
mux := http.NewServeMux() |
|||
|
|||
// Prometheus metrics endpoint
|
|||
mux.Handle("/metrics", promhttp.Handler()) |
|||
|
|||
// API endpoints
|
|||
apiHandler := api.NewHandler(store) |
|||
mux.HandleFunc("/api/collect", corsMiddleware(logMiddleware(apiHandler.CollectTelemetry))) |
|||
mux.HandleFunc("/api/stats", corsMiddleware(logMiddleware(apiHandler.GetStats))) |
|||
mux.HandleFunc("/api/instances", corsMiddleware(logMiddleware(apiHandler.GetInstances))) |
|||
mux.HandleFunc("/api/metrics", corsMiddleware(logMiddleware(apiHandler.GetMetrics))) |
|||
|
|||
// Dashboard (optional)
|
|||
if *enableDashboard { |
|||
dashboardHandler := dashboard.NewHandler() |
|||
mux.HandleFunc("/", corsMiddleware(dashboardHandler.ServeIndex)) |
|||
mux.HandleFunc("/dashboard", corsMiddleware(dashboardHandler.ServeIndex)) |
|||
mux.Handle("/static/", http.StripPrefix("/static/", http.FileServer(http.Dir("./static")))) |
|||
} |
|||
|
|||
// Health check
|
|||
mux.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) { |
|||
w.Header().Set("Content-Type", "application/json") |
|||
json.NewEncoder(w).Encode(map[string]string{ |
|||
"status": "ok", |
|||
"time": time.Now().UTC().Format(time.RFC3339), |
|||
}) |
|||
}) |
|||
|
|||
addr := fmt.Sprintf(":%d", *port) |
|||
log.Printf("Starting telemetry server on %s", addr) |
|||
log.Printf("Prometheus metrics: http://localhost%s/metrics", addr) |
|||
if *enableDashboard { |
|||
log.Printf("Dashboard: http://localhost%s/dashboard", addr) |
|||
} |
|||
log.Printf("Cleanup interval: %v, Max instance age: %v", *cleanupInterval, *maxInstanceAge) |
|||
|
|||
if err := http.ListenAndServe(addr, mux); err != nil { |
|||
log.Fatalf("Server failed: %v", err) |
|||
} |
|||
} |
|||
|
|||
func corsMiddleware(next http.HandlerFunc) http.HandlerFunc { |
|||
return func(w http.ResponseWriter, r *http.Request) { |
|||
if *enableCORS { |
|||
w.Header().Set("Access-Control-Allow-Origin", "*") |
|||
w.Header().Set("Access-Control-Allow-Methods", "GET, POST, OPTIONS") |
|||
w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization") |
|||
} |
|||
|
|||
if r.Method == "OPTIONS" { |
|||
w.WriteHeader(http.StatusOK) |
|||
return |
|||
} |
|||
|
|||
next(w, r) |
|||
} |
|||
} |
|||
|
|||
func logMiddleware(next http.HandlerFunc) http.HandlerFunc { |
|||
return func(w http.ResponseWriter, r *http.Request) { |
|||
if *logRequests { |
|||
start := time.Now() |
|||
next(w, r) |
|||
log.Printf("%s %s %s %v", r.Method, r.URL.Path, r.RemoteAddr, time.Since(start)) |
|||
} else { |
|||
next(w, r) |
|||
} |
|||
} |
|||
} |
@ -0,0 +1,245 @@ |
|||
package storage |
|||
|
|||
import ( |
|||
"encoding/json" |
|||
"sync" |
|||
"time" |
|||
|
|||
"github.com/prometheus/client_golang/prometheus" |
|||
"github.com/prometheus/client_golang/prometheus/promauto" |
|||
"github.com/seaweedfs/seaweedfs/telemetry/proto" |
|||
) |
|||
|
|||
type PrometheusStorage struct { |
|||
// Prometheus metrics
|
|||
totalClusters prometheus.Gauge |
|||
activeClusters prometheus.Gauge |
|||
volumeServerCount *prometheus.GaugeVec |
|||
totalDiskBytes *prometheus.GaugeVec |
|||
totalVolumeCount *prometheus.GaugeVec |
|||
filerCount *prometheus.GaugeVec |
|||
brokerCount *prometheus.GaugeVec |
|||
clusterInfo *prometheus.GaugeVec |
|||
telemetryReceived prometheus.Counter |
|||
|
|||
// In-memory storage for API endpoints (if needed)
|
|||
mu sync.RWMutex |
|||
instances map[string]*telemetryData |
|||
stats map[string]interface{} |
|||
} |
|||
|
|||
// telemetryData is an internal struct that includes the received timestamp
|
|||
type telemetryData struct { |
|||
*proto.TelemetryData |
|||
ReceivedAt time.Time `json:"received_at"` |
|||
} |
|||
|
|||
func NewPrometheusStorage() *PrometheusStorage { |
|||
return &PrometheusStorage{ |
|||
totalClusters: promauto.NewGauge(prometheus.GaugeOpts{ |
|||
Name: "seaweedfs_telemetry_total_clusters", |
|||
Help: "Total number of unique SeaweedFS clusters (last 30 days)", |
|||
}), |
|||
activeClusters: promauto.NewGauge(prometheus.GaugeOpts{ |
|||
Name: "seaweedfs_telemetry_active_clusters", |
|||
Help: "Number of active SeaweedFS clusters (last 7 days)", |
|||
}), |
|||
volumeServerCount: promauto.NewGaugeVec(prometheus.GaugeOpts{ |
|||
Name: "seaweedfs_telemetry_volume_servers", |
|||
Help: "Number of volume servers per cluster", |
|||
}, []string{"cluster_id", "version", "os", "deployment"}), |
|||
totalDiskBytes: promauto.NewGaugeVec(prometheus.GaugeOpts{ |
|||
Name: "seaweedfs_telemetry_disk_bytes", |
|||
Help: "Total disk usage in bytes per cluster", |
|||
}, []string{"cluster_id", "version", "os", "deployment"}), |
|||
totalVolumeCount: promauto.NewGaugeVec(prometheus.GaugeOpts{ |
|||
Name: "seaweedfs_telemetry_volume_count", |
|||
Help: "Total number of volumes per cluster", |
|||
}, []string{"cluster_id", "version", "os", "deployment"}), |
|||
filerCount: promauto.NewGaugeVec(prometheus.GaugeOpts{ |
|||
Name: "seaweedfs_telemetry_filer_count", |
|||
Help: "Number of filer servers per cluster", |
|||
}, []string{"cluster_id", "version", "os", "deployment"}), |
|||
brokerCount: promauto.NewGaugeVec(prometheus.GaugeOpts{ |
|||
Name: "seaweedfs_telemetry_broker_count", |
|||
Help: "Number of broker servers per cluster", |
|||
}, []string{"cluster_id", "version", "os", "deployment"}), |
|||
clusterInfo: promauto.NewGaugeVec(prometheus.GaugeOpts{ |
|||
Name: "seaweedfs_telemetry_cluster_info", |
|||
Help: "Cluster information (always 1, labels contain metadata)", |
|||
}, []string{"cluster_id", "version", "os", "deployment", "features"}), |
|||
telemetryReceived: promauto.NewCounter(prometheus.CounterOpts{ |
|||
Name: "seaweedfs_telemetry_reports_received_total", |
|||
Help: "Total number of telemetry reports received", |
|||
}), |
|||
instances: make(map[string]*telemetryData), |
|||
stats: make(map[string]interface{}), |
|||
} |
|||
} |
|||
|
|||
func (s *PrometheusStorage) StoreTelemetry(data *proto.TelemetryData) error { |
|||
s.mu.Lock() |
|||
defer s.mu.Unlock() |
|||
|
|||
// Update Prometheus metrics
|
|||
labels := prometheus.Labels{ |
|||
"cluster_id": data.ClusterId, |
|||
"version": data.Version, |
|||
"os": data.Os, |
|||
"deployment": data.Deployment, |
|||
} |
|||
|
|||
s.volumeServerCount.With(labels).Set(float64(data.VolumeServerCount)) |
|||
s.totalDiskBytes.With(labels).Set(float64(data.TotalDiskBytes)) |
|||
s.totalVolumeCount.With(labels).Set(float64(data.TotalVolumeCount)) |
|||
s.filerCount.With(labels).Set(float64(data.FilerCount)) |
|||
s.brokerCount.With(labels).Set(float64(data.BrokerCount)) |
|||
|
|||
// Features as JSON string for the label
|
|||
featuresJSON, _ := json.Marshal(data.Features) |
|||
infoLabels := prometheus.Labels{ |
|||
"cluster_id": data.ClusterId, |
|||
"version": data.Version, |
|||
"os": data.Os, |
|||
"deployment": data.Deployment, |
|||
"features": string(featuresJSON), |
|||
} |
|||
s.clusterInfo.With(infoLabels).Set(1) |
|||
|
|||
s.telemetryReceived.Inc() |
|||
|
|||
// Store in memory for API endpoints
|
|||
s.instances[data.ClusterId] = &telemetryData{ |
|||
TelemetryData: data, |
|||
ReceivedAt: time.Now().UTC(), |
|||
} |
|||
|
|||
// Update aggregated stats
|
|||
s.updateStats() |
|||
|
|||
return nil |
|||
} |
|||
|
|||
func (s *PrometheusStorage) GetStats() (map[string]interface{}, error) { |
|||
s.mu.RLock() |
|||
defer s.mu.RUnlock() |
|||
|
|||
// Return cached stats
|
|||
result := make(map[string]interface{}) |
|||
for k, v := range s.stats { |
|||
result[k] = v |
|||
} |
|||
return result, nil |
|||
} |
|||
|
|||
func (s *PrometheusStorage) GetInstances(limit int) ([]*telemetryData, error) { |
|||
s.mu.RLock() |
|||
defer s.mu.RUnlock() |
|||
|
|||
var instances []*telemetryData |
|||
count := 0 |
|||
for _, instance := range s.instances { |
|||
if count >= limit { |
|||
break |
|||
} |
|||
instances = append(instances, instance) |
|||
count++ |
|||
} |
|||
|
|||
return instances, nil |
|||
} |
|||
|
|||
func (s *PrometheusStorage) GetMetrics(days int) (map[string]interface{}, error) { |
|||
s.mu.RLock() |
|||
defer s.mu.RUnlock() |
|||
|
|||
// Return current metrics from in-memory storage
|
|||
// Historical data should be queried from Prometheus directly
|
|||
cutoff := time.Now().AddDate(0, 0, -days) |
|||
|
|||
var volumeServers []map[string]interface{} |
|||
var diskUsage []map[string]interface{} |
|||
|
|||
for _, instance := range s.instances { |
|||
if instance.ReceivedAt.After(cutoff) { |
|||
volumeServers = append(volumeServers, map[string]interface{}{ |
|||
"date": instance.ReceivedAt.Format("2006-01-02"), |
|||
"value": instance.TelemetryData.VolumeServerCount, |
|||
}) |
|||
diskUsage = append(diskUsage, map[string]interface{}{ |
|||
"date": instance.ReceivedAt.Format("2006-01-02"), |
|||
"value": instance.TelemetryData.TotalDiskBytes, |
|||
}) |
|||
} |
|||
} |
|||
|
|||
return map[string]interface{}{ |
|||
"volume_servers": volumeServers, |
|||
"disk_usage": diskUsage, |
|||
}, nil |
|||
} |
|||
|
|||
func (s *PrometheusStorage) updateStats() { |
|||
now := time.Now() |
|||
last7Days := now.AddDate(0, 0, -7) |
|||
last30Days := now.AddDate(0, 0, -30) |
|||
|
|||
totalInstances := 0 |
|||
activeInstances := 0 |
|||
versions := make(map[string]int) |
|||
osDistribution := make(map[string]int) |
|||
deployments := make(map[string]int) |
|||
|
|||
for _, instance := range s.instances { |
|||
if instance.ReceivedAt.After(last30Days) { |
|||
totalInstances++ |
|||
} |
|||
if instance.ReceivedAt.After(last7Days) { |
|||
activeInstances++ |
|||
versions[instance.TelemetryData.Version]++ |
|||
osDistribution[instance.TelemetryData.Os]++ |
|||
deployments[instance.TelemetryData.Deployment]++ |
|||
} |
|||
} |
|||
|
|||
// Update Prometheus gauges
|
|||
s.totalClusters.Set(float64(totalInstances)) |
|||
s.activeClusters.Set(float64(activeInstances)) |
|||
|
|||
// Update cached stats for API
|
|||
s.stats = map[string]interface{}{ |
|||
"total_instances": totalInstances, |
|||
"active_instances": activeInstances, |
|||
"versions": versions, |
|||
"os_distribution": osDistribution, |
|||
"deployments": deployments, |
|||
} |
|||
} |
|||
|
|||
// CleanupOldInstances removes instances older than the specified duration
|
|||
func (s *PrometheusStorage) CleanupOldInstances(maxAge time.Duration) { |
|||
s.mu.Lock() |
|||
defer s.mu.Unlock() |
|||
|
|||
cutoff := time.Now().Add(-maxAge) |
|||
for instanceID, instance := range s.instances { |
|||
if instance.ReceivedAt.Before(cutoff) { |
|||
delete(s.instances, instanceID) |
|||
|
|||
// Remove from Prometheus metrics
|
|||
labels := prometheus.Labels{ |
|||
"cluster_id": instance.TelemetryData.ClusterId, |
|||
"version": instance.TelemetryData.Version, |
|||
"os": instance.TelemetryData.Os, |
|||
"deployment": instance.TelemetryData.Deployment, |
|||
} |
|||
s.volumeServerCount.Delete(labels) |
|||
s.totalDiskBytes.Delete(labels) |
|||
s.totalVolumeCount.Delete(labels) |
|||
s.filerCount.Delete(labels) |
|||
s.brokerCount.Delete(labels) |
|||
} |
|||
} |
|||
|
|||
s.updateStats() |
|||
} |
@ -0,0 +1,315 @@ |
|||
package main |
|||
|
|||
import ( |
|||
"context" |
|||
"fmt" |
|||
"io" |
|||
"log" |
|||
"net/http" |
|||
"os" |
|||
"os/exec" |
|||
"path/filepath" |
|||
"strings" |
|||
"syscall" |
|||
"time" |
|||
|
|||
"github.com/seaweedfs/seaweedfs/telemetry/proto" |
|||
"github.com/seaweedfs/seaweedfs/weed/telemetry" |
|||
protobuf "google.golang.org/protobuf/proto" |
|||
) |
|||
|
|||
const ( |
|||
serverPort = "18080" // Use different port to avoid conflicts
|
|||
serverURL = "http://localhost:" + serverPort |
|||
) |
|||
|
|||
func main() { |
|||
fmt.Println("🧪 Starting SeaweedFS Telemetry Integration Test") |
|||
|
|||
// Start telemetry server
|
|||
fmt.Println("📡 Starting telemetry server...") |
|||
serverCmd, err := startTelemetryServer() |
|||
if err != nil { |
|||
log.Fatalf("❌ Failed to start telemetry server: %v", err) |
|||
} |
|||
defer stopServer(serverCmd) |
|||
|
|||
// Wait for server to start
|
|||
if !waitForServer(serverURL+"/health", 15*time.Second) { |
|||
log.Fatal("❌ Telemetry server failed to start") |
|||
} |
|||
fmt.Println("✅ Telemetry server started successfully") |
|||
|
|||
// Test protobuf marshaling first
|
|||
fmt.Println("🔧 Testing protobuf marshaling...") |
|||
if err := testProtobufMarshaling(); err != nil { |
|||
log.Fatalf("❌ Protobuf marshaling test failed: %v", err) |
|||
} |
|||
fmt.Println("✅ Protobuf marshaling test passed") |
|||
|
|||
// Test protobuf client
|
|||
fmt.Println("🔄 Testing protobuf telemetry client...") |
|||
if err := testTelemetryClient(); err != nil { |
|||
log.Fatalf("❌ Telemetry client test failed: %v", err) |
|||
} |
|||
fmt.Println("✅ Telemetry client test passed") |
|||
|
|||
// Test server metrics endpoint
|
|||
fmt.Println("📊 Testing Prometheus metrics endpoint...") |
|||
if err := testMetricsEndpoint(); err != nil { |
|||
log.Fatalf("❌ Metrics endpoint test failed: %v", err) |
|||
} |
|||
fmt.Println("✅ Metrics endpoint test passed") |
|||
|
|||
// Test stats API
|
|||
fmt.Println("📈 Testing stats API...") |
|||
if err := testStatsAPI(); err != nil { |
|||
log.Fatalf("❌ Stats API test failed: %v", err) |
|||
} |
|||
fmt.Println("✅ Stats API test passed") |
|||
|
|||
// Test instances API
|
|||
fmt.Println("📋 Testing instances API...") |
|||
if err := testInstancesAPI(); err != nil { |
|||
log.Fatalf("❌ Instances API test failed: %v", err) |
|||
} |
|||
fmt.Println("✅ Instances API test passed") |
|||
|
|||
fmt.Println("🎉 All telemetry integration tests passed!") |
|||
} |
|||
|
|||
func startTelemetryServer() (*exec.Cmd, error) { |
|||
// Get the directory where this test is running
|
|||
testDir, err := os.Getwd() |
|||
if err != nil { |
|||
return nil, fmt.Errorf("failed to get working directory: %v", err) |
|||
} |
|||
|
|||
// Navigate to the server directory (from main seaweedfs directory)
|
|||
serverDir := filepath.Join(testDir, "telemetry", "server") |
|||
|
|||
cmd := exec.Command("go", "run", ".", |
|||
"-port="+serverPort, |
|||
"-dashboard=false", |
|||
"-cleanup=1m", |
|||
"-max-age=1h") |
|||
|
|||
cmd.Dir = serverDir |
|||
|
|||
// Create log files for server output
|
|||
logFile, err := os.Create("telemetry-server-test.log") |
|||
if err != nil { |
|||
return nil, fmt.Errorf("failed to create log file: %v", err) |
|||
} |
|||
|
|||
cmd.Stdout = logFile |
|||
cmd.Stderr = logFile |
|||
|
|||
if err := cmd.Start(); err != nil { |
|||
return nil, fmt.Errorf("failed to start server: %v", err) |
|||
} |
|||
|
|||
return cmd, nil |
|||
} |
|||
|
|||
func stopServer(cmd *exec.Cmd) { |
|||
if cmd != nil && cmd.Process != nil { |
|||
cmd.Process.Signal(syscall.SIGTERM) |
|||
cmd.Wait() |
|||
|
|||
// Clean up log file
|
|||
os.Remove("telemetry-server-test.log") |
|||
} |
|||
} |
|||
|
|||
func waitForServer(url string, timeout time.Duration) bool { |
|||
ctx, cancel := context.WithTimeout(context.Background(), timeout) |
|||
defer cancel() |
|||
|
|||
fmt.Printf("⏳ Waiting for server at %s...\n", url) |
|||
|
|||
for { |
|||
select { |
|||
case <-ctx.Done(): |
|||
return false |
|||
default: |
|||
resp, err := http.Get(url) |
|||
if err == nil { |
|||
resp.Body.Close() |
|||
if resp.StatusCode == http.StatusOK { |
|||
return true |
|||
} |
|||
} |
|||
time.Sleep(500 * time.Millisecond) |
|||
} |
|||
} |
|||
} |
|||
|
|||
func testProtobufMarshaling() error { |
|||
// Test protobuf marshaling/unmarshaling
|
|||
testData := &proto.TelemetryData{ |
|||
ClusterId: "test-cluster-12345", |
|||
Version: "test-3.45", |
|||
Os: "linux/amd64", |
|||
Features: []string{"filer", "s3api"}, |
|||
Deployment: "test", |
|||
VolumeServerCount: 2, |
|||
TotalDiskBytes: 1000000, |
|||
TotalVolumeCount: 10, |
|||
FilerCount: 1, |
|||
BrokerCount: 1, |
|||
Timestamp: time.Now().Unix(), |
|||
} |
|||
|
|||
// Marshal
|
|||
data, err := protobuf.Marshal(testData) |
|||
if err != nil { |
|||
return fmt.Errorf("failed to marshal protobuf: %v", err) |
|||
} |
|||
|
|||
fmt.Printf(" Protobuf size: %d bytes\n", len(data)) |
|||
|
|||
// Unmarshal
|
|||
testData2 := &proto.TelemetryData{} |
|||
if err := protobuf.Unmarshal(data, testData2); err != nil { |
|||
return fmt.Errorf("failed to unmarshal protobuf: %v", err) |
|||
} |
|||
|
|||
// Verify data
|
|||
if testData2.ClusterId != testData.ClusterId { |
|||
return fmt.Errorf("protobuf data mismatch: expected %s, got %s", |
|||
testData.ClusterId, testData2.ClusterId) |
|||
} |
|||
|
|||
if testData2.VolumeServerCount != testData.VolumeServerCount { |
|||
return fmt.Errorf("volume server count mismatch: expected %d, got %d", |
|||
testData.VolumeServerCount, testData2.VolumeServerCount) |
|||
} |
|||
|
|||
return nil |
|||
} |
|||
|
|||
func testTelemetryClient() error { |
|||
// Create telemetry client
|
|||
client := telemetry.NewClient(serverURL+"/api/collect", true) |
|||
|
|||
// Create test data using protobuf format
|
|||
testData := &proto.TelemetryData{ |
|||
Version: "test-3.45", |
|||
Os: "linux/amd64", |
|||
Features: []string{"filer", "s3api", "mq"}, |
|||
Deployment: "integration-test", |
|||
VolumeServerCount: 3, |
|||
TotalDiskBytes: 1073741824, // 1GB
|
|||
TotalVolumeCount: 50, |
|||
FilerCount: 2, |
|||
BrokerCount: 1, |
|||
Timestamp: time.Now().Unix(), |
|||
} |
|||
|
|||
// Send telemetry data
|
|||
if err := client.SendTelemetry(testData); err != nil { |
|||
return fmt.Errorf("failed to send telemetry: %v", err) |
|||
} |
|||
|
|||
fmt.Printf(" Sent telemetry for cluster: %s\n", client.GetInstanceID()) |
|||
|
|||
// Wait a bit for processing
|
|||
time.Sleep(2 * time.Second) |
|||
|
|||
return nil |
|||
} |
|||
|
|||
func testMetricsEndpoint() error { |
|||
resp, err := http.Get(serverURL + "/metrics") |
|||
if err != nil { |
|||
return fmt.Errorf("failed to get metrics: %v", err) |
|||
} |
|||
defer resp.Body.Close() |
|||
|
|||
if resp.StatusCode != http.StatusOK { |
|||
return fmt.Errorf("metrics endpoint returned status %d", resp.StatusCode) |
|||
} |
|||
|
|||
// Read response and check for expected metrics
|
|||
content, err := io.ReadAll(resp.Body) |
|||
if err != nil { |
|||
return fmt.Errorf("failed to read metrics response: %v", err) |
|||
} |
|||
|
|||
contentStr := string(content) |
|||
expectedMetrics := []string{ |
|||
"seaweedfs_telemetry_total_clusters", |
|||
"seaweedfs_telemetry_active_clusters", |
|||
"seaweedfs_telemetry_reports_received_total", |
|||
"seaweedfs_telemetry_volume_servers", |
|||
"seaweedfs_telemetry_disk_bytes", |
|||
"seaweedfs_telemetry_volume_count", |
|||
"seaweedfs_telemetry_filer_count", |
|||
"seaweedfs_telemetry_broker_count", |
|||
} |
|||
|
|||
for _, metric := range expectedMetrics { |
|||
if !strings.Contains(contentStr, metric) { |
|||
return fmt.Errorf("missing expected metric: %s", metric) |
|||
} |
|||
} |
|||
|
|||
// Check that we have at least one report received
|
|||
if !strings.Contains(contentStr, "seaweedfs_telemetry_reports_received_total 1") { |
|||
fmt.Printf(" Warning: Expected at least 1 report received, metrics content:\n%s\n", contentStr) |
|||
} |
|||
|
|||
fmt.Printf(" Found %d expected metrics\n", len(expectedMetrics)) |
|||
|
|||
return nil |
|||
} |
|||
|
|||
func testStatsAPI() error { |
|||
resp, err := http.Get(serverURL + "/api/stats") |
|||
if err != nil { |
|||
return fmt.Errorf("failed to get stats: %v", err) |
|||
} |
|||
defer resp.Body.Close() |
|||
|
|||
if resp.StatusCode != http.StatusOK { |
|||
return fmt.Errorf("stats API returned status %d", resp.StatusCode) |
|||
} |
|||
|
|||
// Read and verify JSON response
|
|||
content, err := io.ReadAll(resp.Body) |
|||
if err != nil { |
|||
return fmt.Errorf("failed to read stats response: %v", err) |
|||
} |
|||
|
|||
contentStr := string(content) |
|||
if !strings.Contains(contentStr, "total_instances") { |
|||
return fmt.Errorf("stats response missing total_instances field") |
|||
} |
|||
|
|||
fmt.Printf(" Stats response: %s\n", contentStr) |
|||
|
|||
return nil |
|||
} |
|||
|
|||
func testInstancesAPI() error { |
|||
resp, err := http.Get(serverURL + "/api/instances?limit=10") |
|||
if err != nil { |
|||
return fmt.Errorf("failed to get instances: %v", err) |
|||
} |
|||
defer resp.Body.Close() |
|||
|
|||
if resp.StatusCode != http.StatusOK { |
|||
return fmt.Errorf("instances API returned status %d", resp.StatusCode) |
|||
} |
|||
|
|||
// Read response
|
|||
content, err := io.ReadAll(resp.Body) |
|||
if err != nil { |
|||
return fmt.Errorf("failed to read instances response: %v", err) |
|||
} |
|||
|
|||
fmt.Printf(" Instances response length: %d bytes\n", len(content)) |
|||
|
|||
return nil |
|||
} |
@ -0,0 +1,100 @@ |
|||
package telemetry |
|||
|
|||
import ( |
|||
"bytes" |
|||
"fmt" |
|||
"net/http" |
|||
"time" |
|||
|
|||
"github.com/google/uuid" |
|||
"github.com/seaweedfs/seaweedfs/telemetry/proto" |
|||
"github.com/seaweedfs/seaweedfs/weed/glog" |
|||
protobuf "google.golang.org/protobuf/proto" |
|||
) |
|||
|
|||
type Client struct { |
|||
url string |
|||
enabled bool |
|||
instanceID string |
|||
httpClient *http.Client |
|||
} |
|||
|
|||
// NewClient creates a new telemetry client
|
|||
func NewClient(url string, enabled bool) *Client { |
|||
return &Client{ |
|||
url: url, |
|||
enabled: enabled, |
|||
instanceID: uuid.New().String(), // Generate UUID in memory only
|
|||
httpClient: &http.Client{ |
|||
Timeout: 10 * time.Second, |
|||
}, |
|||
} |
|||
} |
|||
|
|||
// IsEnabled returns whether telemetry is enabled
|
|||
func (c *Client) IsEnabled() bool { |
|||
return c.enabled && c.url != "" |
|||
} |
|||
|
|||
// SendTelemetry sends telemetry data synchronously using protobuf format
|
|||
func (c *Client) SendTelemetry(data *proto.TelemetryData) error { |
|||
if !c.IsEnabled() { |
|||
return nil |
|||
} |
|||
|
|||
// Set the cluster ID
|
|||
data.ClusterId = c.instanceID |
|||
|
|||
return c.sendProtobuf(data) |
|||
} |
|||
|
|||
// SendTelemetryAsync sends telemetry data asynchronously
|
|||
func (c *Client) SendTelemetryAsync(data *proto.TelemetryData) { |
|||
if !c.IsEnabled() { |
|||
return |
|||
} |
|||
|
|||
go func() { |
|||
if err := c.SendTelemetry(data); err != nil { |
|||
glog.V(1).Infof("Failed to send telemetry: %v", err) |
|||
} |
|||
}() |
|||
} |
|||
|
|||
// sendProtobuf sends data using protobuf format
|
|||
func (c *Client) sendProtobuf(data *proto.TelemetryData) error { |
|||
req := &proto.TelemetryRequest{ |
|||
Data: data, |
|||
} |
|||
|
|||
body, err := protobuf.Marshal(req) |
|||
if err != nil { |
|||
return fmt.Errorf("failed to marshal protobuf: %v", err) |
|||
} |
|||
|
|||
httpReq, err := http.NewRequest("POST", c.url, bytes.NewBuffer(body)) |
|||
if err != nil { |
|||
return fmt.Errorf("failed to create request: %v", err) |
|||
} |
|||
|
|||
httpReq.Header.Set("Content-Type", "application/x-protobuf") |
|||
httpReq.Header.Set("User-Agent", fmt.Sprintf("SeaweedFS/%s", data.Version)) |
|||
|
|||
resp, err := c.httpClient.Do(httpReq) |
|||
if err != nil { |
|||
return fmt.Errorf("failed to send request: %v", err) |
|||
} |
|||
defer resp.Body.Close() |
|||
|
|||
if resp.StatusCode != http.StatusOK { |
|||
return fmt.Errorf("server returned status %d", resp.StatusCode) |
|||
} |
|||
|
|||
glog.V(2).Infof("Telemetry sent successfully via protobuf") |
|||
return nil |
|||
} |
|||
|
|||
// GetInstanceID returns the current instance ID
|
|||
func (c *Client) GetInstanceID() string { |
|||
return c.instanceID |
|||
} |
@ -0,0 +1,218 @@ |
|||
package telemetry |
|||
|
|||
import ( |
|||
"time" |
|||
|
|||
"github.com/seaweedfs/seaweedfs/telemetry/proto" |
|||
"github.com/seaweedfs/seaweedfs/weed/cluster" |
|||
"github.com/seaweedfs/seaweedfs/weed/glog" |
|||
"github.com/seaweedfs/seaweedfs/weed/topology" |
|||
) |
|||
|
|||
type Collector struct { |
|||
client *Client |
|||
topo *topology.Topology |
|||
cluster *cluster.Cluster |
|||
masterServer interface{} // Will be set to *weed_server.MasterServer to access client tracking
|
|||
features []string |
|||
deployment string |
|||
version string |
|||
os string |
|||
} |
|||
|
|||
// NewCollector creates a new telemetry collector
|
|||
func NewCollector(client *Client, topo *topology.Topology, cluster *cluster.Cluster) *Collector { |
|||
return &Collector{ |
|||
client: client, |
|||
topo: topo, |
|||
cluster: cluster, |
|||
masterServer: nil, |
|||
features: []string{}, |
|||
deployment: "unknown", |
|||
version: "unknown", |
|||
os: "unknown", |
|||
} |
|||
} |
|||
|
|||
// SetFeatures sets the list of enabled features
|
|||
func (c *Collector) SetFeatures(features []string) { |
|||
c.features = features |
|||
} |
|||
|
|||
// SetDeployment sets the deployment type (standalone, cluster, etc.)
|
|||
func (c *Collector) SetDeployment(deployment string) { |
|||
c.deployment = deployment |
|||
} |
|||
|
|||
// SetVersion sets the SeaweedFS version
|
|||
func (c *Collector) SetVersion(version string) { |
|||
c.version = version |
|||
} |
|||
|
|||
// SetOS sets the operating system information
|
|||
func (c *Collector) SetOS(os string) { |
|||
c.os = os |
|||
} |
|||
|
|||
// SetMasterServer sets a reference to the master server for client tracking
|
|||
func (c *Collector) SetMasterServer(masterServer interface{}) { |
|||
c.masterServer = masterServer |
|||
} |
|||
|
|||
// CollectAndSendAsync collects telemetry data and sends it asynchronously
|
|||
func (c *Collector) CollectAndSendAsync() { |
|||
if !c.client.IsEnabled() { |
|||
return |
|||
} |
|||
|
|||
go func() { |
|||
data := c.collectData() |
|||
c.client.SendTelemetryAsync(data) |
|||
}() |
|||
} |
|||
|
|||
// StartPeriodicCollection starts sending telemetry data periodically
|
|||
func (c *Collector) StartPeriodicCollection(interval time.Duration) { |
|||
if !c.client.IsEnabled() { |
|||
glog.V(1).Infof("Telemetry is disabled, skipping periodic collection") |
|||
return |
|||
} |
|||
|
|||
glog.V(0).Infof("Starting telemetry collection every %v", interval) |
|||
|
|||
// Send initial telemetry after a short delay
|
|||
go func() { |
|||
time.Sleep(30 * time.Second) // Wait for cluster to stabilize
|
|||
c.CollectAndSendAsync() |
|||
}() |
|||
|
|||
// Start periodic collection
|
|||
ticker := time.NewTicker(interval) |
|||
go func() { |
|||
defer ticker.Stop() |
|||
for range ticker.C { |
|||
c.CollectAndSendAsync() |
|||
} |
|||
}() |
|||
} |
|||
|
|||
// collectData gathers telemetry data from the topology
|
|||
func (c *Collector) collectData() *proto.TelemetryData { |
|||
data := &proto.TelemetryData{ |
|||
Version: c.version, |
|||
Os: c.os, |
|||
Features: c.features, |
|||
Deployment: c.deployment, |
|||
Timestamp: time.Now().Unix(), |
|||
} |
|||
|
|||
if c.topo != nil { |
|||
// Collect volume server count
|
|||
data.VolumeServerCount = int32(c.countVolumeServers()) |
|||
|
|||
// Collect total disk usage and volume count
|
|||
diskBytes, volumeCount := c.collectVolumeStats() |
|||
data.TotalDiskBytes = diskBytes |
|||
data.TotalVolumeCount = int32(volumeCount) |
|||
} |
|||
|
|||
if c.cluster != nil { |
|||
// Collect filer and broker counts
|
|||
data.FilerCount = int32(c.countFilers()) |
|||
data.BrokerCount = int32(c.countBrokers()) |
|||
} |
|||
|
|||
return data |
|||
} |
|||
|
|||
// countVolumeServers counts the number of active volume servers
|
|||
func (c *Collector) countVolumeServers() int { |
|||
count := 0 |
|||
for _, dcNode := range c.topo.Children() { |
|||
dc := dcNode.(*topology.DataCenter) |
|||
for _, rackNode := range dc.Children() { |
|||
rack := rackNode.(*topology.Rack) |
|||
for range rack.Children() { |
|||
count++ |
|||
} |
|||
} |
|||
} |
|||
return count |
|||
} |
|||
|
|||
// collectVolumeStats collects total disk usage and volume count
|
|||
func (c *Collector) collectVolumeStats() (uint64, int) { |
|||
var totalDiskBytes uint64 |
|||
var totalVolumeCount int |
|||
|
|||
for _, dcNode := range c.topo.Children() { |
|||
dc := dcNode.(*topology.DataCenter) |
|||
for _, rackNode := range dc.Children() { |
|||
rack := rackNode.(*topology.Rack) |
|||
for _, dnNode := range rack.Children() { |
|||
dn := dnNode.(*topology.DataNode) |
|||
volumes := dn.GetVolumes() |
|||
for _, volumeInfo := range volumes { |
|||
totalVolumeCount++ |
|||
totalDiskBytes += volumeInfo.Size |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
return totalDiskBytes, totalVolumeCount |
|||
} |
|||
|
|||
// countFilers counts the number of active filer servers across all groups
|
|||
func (c *Collector) countFilers() int { |
|||
// Count all filer-type nodes in the cluster
|
|||
// This includes both pure filer servers and S3 servers (which register as filers)
|
|||
count := 0 |
|||
for _, groupName := range c.getAllFilerGroups() { |
|||
nodes := c.cluster.ListClusterNode(cluster.FilerGroupName(groupName), cluster.FilerType) |
|||
count += len(nodes) |
|||
} |
|||
return count |
|||
} |
|||
|
|||
// countBrokers counts the number of active broker servers
|
|||
func (c *Collector) countBrokers() int { |
|||
// Count brokers across all broker groups
|
|||
count := 0 |
|||
for _, groupName := range c.getAllBrokerGroups() { |
|||
nodes := c.cluster.ListClusterNode(cluster.FilerGroupName(groupName), cluster.BrokerType) |
|||
count += len(nodes) |
|||
} |
|||
return count |
|||
} |
|||
|
|||
// getAllFilerGroups returns all filer group names
|
|||
func (c *Collector) getAllFilerGroups() []string { |
|||
// For simplicity, we check the default group
|
|||
// In a more sophisticated implementation, we could enumerate all groups
|
|||
return []string{""} |
|||
} |
|||
|
|||
// getAllBrokerGroups returns all broker group names
|
|||
func (c *Collector) getAllBrokerGroups() []string { |
|||
// For simplicity, we check the default group
|
|||
// In a more sophisticated implementation, we could enumerate all groups
|
|||
return []string{""} |
|||
} |
|||
|
|||
// DetermineDeployment determines the deployment type based on configuration
|
|||
func DetermineDeployment(isMasterEnabled, isVolumeEnabled bool, peerCount int) string { |
|||
if isMasterEnabled && isVolumeEnabled { |
|||
if peerCount > 1 { |
|||
return "cluster" |
|||
} |
|||
return "standalone" |
|||
} |
|||
if isMasterEnabled { |
|||
return "master-only" |
|||
} |
|||
if isVolumeEnabled { |
|||
return "volume-only" |
|||
} |
|||
return "unknown" |
|||
} |
Write
Preview
Loading…
Cancel
Save
Reference in new issue