Browse Source
add telemetry (#6926)
add telemetry (#6926)
* add telemetry * fix go mod * add default telemetry server url * Update README.md * replace with broker count instead of s3 count * Update telemetry.pb.go * github action to deploypull/6931/head
committed by
GitHub
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
23 changed files with 3657 additions and 0 deletions
-
157.github/workflows/deploy_telemetry.yml
-
271telemetry/DEPLOYMENT.md
-
351telemetry/README.md
-
55telemetry/docker-compose.yml
-
734telemetry/grafana-dashboard.json
-
12telemetry/grafana-provisioning/dashboards/dashboards.yml
-
9telemetry/grafana-provisioning/datasources/prometheus.yml
-
15telemetry/prometheus.yml
-
398telemetry/proto/telemetry.pb.go
-
52telemetry/proto/telemetry.proto
-
18telemetry/server/Dockerfile
-
97telemetry/server/Makefile
-
152telemetry/server/api/handlers.go
-
278telemetry/server/dashboard/dashboard.go
-
31telemetry/server/go.sum
-
111telemetry/server/main.go
-
245telemetry/server/storage/prometheus.go
-
315telemetry/test/integration.go
-
6weed/command/master.go
-
2weed/command/server.go
-
30weed/server/master_server.go
-
100weed/telemetry/client.go
-
218weed/telemetry/collector.go
@ -0,0 +1,157 @@ |
|||||
|
# This workflow will build and deploy the SeaweedFS telemetry server |
||||
|
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-go |
||||
|
|
||||
|
name: Deploy Telemetry Server |
||||
|
|
||||
|
on: |
||||
|
push: |
||||
|
branches: [ "master" ] |
||||
|
paths: |
||||
|
- 'telemetry/**' |
||||
|
workflow_dispatch: |
||||
|
inputs: |
||||
|
setup: |
||||
|
description: 'Run first-time server setup' |
||||
|
required: true |
||||
|
type: boolean |
||||
|
default: false |
||||
|
deploy: |
||||
|
description: 'Deploy telemetry server to remote server' |
||||
|
required: true |
||||
|
type: boolean |
||||
|
default: false |
||||
|
|
||||
|
jobs: |
||||
|
deploy: |
||||
|
runs-on: ubuntu-latest |
||||
|
steps: |
||||
|
- uses: actions/checkout@v4 |
||||
|
|
||||
|
- name: Set up Go |
||||
|
uses: actions/setup-go@v4 |
||||
|
with: |
||||
|
go-version: '1.24' |
||||
|
|
||||
|
- name: Build Telemetry Server |
||||
|
run: | |
||||
|
go mod tidy |
||||
|
cd telemetry/server |
||||
|
GOOS=linux GOARCH=amd64 go build -o telemetry-server main.go |
||||
|
|
||||
|
- name: First-time Server Setup |
||||
|
if: github.event_name == 'workflow_dispatch' && inputs.setup |
||||
|
env: |
||||
|
SSH_PRIVATE_KEY: ${{ secrets.TELEMETRY_SSH_PRIVATE_KEY }} |
||||
|
REMOTE_HOST: ${{ secrets.TELEMETRY_HOST }} |
||||
|
REMOTE_USER: ${{ secrets.TELEMETRY_USER }} |
||||
|
run: | |
||||
|
mkdir -p ~/.ssh |
||||
|
echo "$SSH_PRIVATE_KEY" > ~/.ssh/deploy_key |
||||
|
chmod 600 ~/.ssh/deploy_key |
||||
|
echo "Host *" > ~/.ssh/config |
||||
|
echo " StrictHostKeyChecking no" >> ~/.ssh/config |
||||
|
|
||||
|
# Create all required directories with proper permissions |
||||
|
ssh -i ~/.ssh/deploy_key $REMOTE_USER@$REMOTE_HOST " |
||||
|
mkdir -p ~/seaweedfs-telemetry/bin ~/seaweedfs-telemetry/logs ~/seaweedfs-telemetry/data ~/seaweedfs-telemetry/tmp && \ |
||||
|
chmod 755 ~/seaweedfs-telemetry/logs && \ |
||||
|
chmod 755 ~/seaweedfs-telemetry/data && \ |
||||
|
touch ~/seaweedfs-telemetry/logs/telemetry.log ~/seaweedfs-telemetry/logs/telemetry.error.log && \ |
||||
|
chmod 644 ~/seaweedfs-telemetry/logs/*.log" |
||||
|
|
||||
|
# Create systemd service file |
||||
|
echo " |
||||
|
[Unit] |
||||
|
Description=SeaweedFS Telemetry Server |
||||
|
After=network.target |
||||
|
|
||||
|
[Service] |
||||
|
Type=simple |
||||
|
User=$REMOTE_USER |
||||
|
WorkingDirectory=/home/$REMOTE_USER/seaweedfs-telemetry |
||||
|
ExecStart=/home/$REMOTE_USER/seaweedfs-telemetry/bin/telemetry-server -port=8353 |
||||
|
Restart=always |
||||
|
RestartSec=5 |
||||
|
StandardOutput=append:/home/$REMOTE_USER/seaweedfs-telemetry/logs/telemetry.log |
||||
|
StandardError=append:/home/$REMOTE_USER/seaweedfs-telemetry/logs/telemetry.error.log |
||||
|
|
||||
|
[Install] |
||||
|
WantedBy=multi-user.target" > telemetry.service |
||||
|
|
||||
|
# Setup logrotate configuration |
||||
|
echo "# SeaweedFS Telemetry service log rotation |
||||
|
/home/$REMOTE_USER/seaweedfs-telemetry/logs/*.log { |
||||
|
daily |
||||
|
rotate 30 |
||||
|
compress |
||||
|
delaycompress |
||||
|
missingok |
||||
|
notifempty |
||||
|
create 644 $REMOTE_USER $REMOTE_USER |
||||
|
postrotate |
||||
|
systemctl restart telemetry.service |
||||
|
endscript |
||||
|
}" > telemetry_logrotate |
||||
|
|
||||
|
# Copy Grafana dashboard and Prometheus config |
||||
|
scp -i ~/.ssh/deploy_key telemetry/grafana-dashboard.json $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/ |
||||
|
scp -i ~/.ssh/deploy_key telemetry/prometheus.yml $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/ |
||||
|
|
||||
|
# Copy and install service and logrotate files |
||||
|
scp -i ~/.ssh/deploy_key telemetry.service telemetry_logrotate $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/ |
||||
|
ssh -i ~/.ssh/deploy_key $REMOTE_USER@$REMOTE_HOST " |
||||
|
sudo mv ~/seaweedfs-telemetry/telemetry.service /etc/systemd/system/ && \ |
||||
|
sudo mv ~/seaweedfs-telemetry/telemetry_logrotate /etc/logrotate.d/seaweedfs-telemetry && \ |
||||
|
sudo systemctl daemon-reload && \ |
||||
|
sudo systemctl enable telemetry.service" |
||||
|
|
||||
|
rm -f ~/.ssh/deploy_key |
||||
|
|
||||
|
- name: Deploy Telemetry Server to Remote Server |
||||
|
if: (github.event_name == 'push' && contains(github.ref, 'refs/heads/master')) || (github.event_name == 'workflow_dispatch' && inputs.deploy) |
||||
|
env: |
||||
|
SSH_PRIVATE_KEY: ${{ secrets.TELEMETRY_SSH_PRIVATE_KEY }} |
||||
|
REMOTE_HOST: ${{ secrets.TELEMETRY_HOST }} |
||||
|
REMOTE_USER: ${{ secrets.TELEMETRY_USER }} |
||||
|
run: | |
||||
|
mkdir -p ~/.ssh |
||||
|
echo "$SSH_PRIVATE_KEY" > ~/.ssh/deploy_key |
||||
|
chmod 600 ~/.ssh/deploy_key |
||||
|
echo "Host *" > ~/.ssh/config |
||||
|
echo " StrictHostKeyChecking no" >> ~/.ssh/config |
||||
|
|
||||
|
# Create temp directory and copy binary |
||||
|
ssh -i ~/.ssh/deploy_key $REMOTE_USER@$REMOTE_HOST "mkdir -p ~/seaweedfs-telemetry/tmp" |
||||
|
scp -i ~/.ssh/deploy_key telemetry/server/telemetry-server $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/tmp/ |
||||
|
|
||||
|
# Copy updated configuration files |
||||
|
scp -i ~/.ssh/deploy_key telemetry/grafana-dashboard.json $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/ |
||||
|
scp -i ~/.ssh/deploy_key telemetry/prometheus.yml $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/ |
||||
|
|
||||
|
# Stop service, move binary, and restart |
||||
|
ssh -i ~/.ssh/deploy_key $REMOTE_USER@$REMOTE_HOST " |
||||
|
sudo systemctl stop telemetry.service || true && \ |
||||
|
mkdir -p ~/seaweedfs-telemetry/bin && \ |
||||
|
mv ~/seaweedfs-telemetry/tmp/telemetry-server ~/seaweedfs-telemetry/bin/ && \ |
||||
|
chmod +x ~/seaweedfs-telemetry/bin/telemetry-server && \ |
||||
|
sudo systemctl start telemetry.service && \ |
||||
|
sudo systemctl status telemetry.service" |
||||
|
|
||||
|
# Verify deployment |
||||
|
ssh -i ~/.ssh/deploy_key $REMOTE_USER@$REMOTE_HOST " |
||||
|
echo 'Waiting for service to start...' |
||||
|
sleep 5 |
||||
|
curl -f http://localhost:8353/health || echo 'Health check failed'" |
||||
|
|
||||
|
rm -f ~/.ssh/deploy_key |
||||
|
|
||||
|
- name: Notify Deployment Status |
||||
|
if: always() |
||||
|
run: | |
||||
|
if [ "${{ job.status }}" == "success" ]; then |
||||
|
echo "✅ Telemetry server deployment successful" |
||||
|
echo "Dashboard: http://${{ secrets.TELEMETRY_HOST }}:8353" |
||||
|
echo "Metrics: http://${{ secrets.TELEMETRY_HOST }}:8353/metrics" |
||||
|
else |
||||
|
echo "❌ Telemetry server deployment failed" |
||||
|
fi |
@ -0,0 +1,271 @@ |
|||||
|
# SeaweedFS Telemetry Server Deployment |
||||
|
|
||||
|
This document describes how to deploy the SeaweedFS telemetry server to a remote server using GitHub Actions. |
||||
|
|
||||
|
## Prerequisites |
||||
|
|
||||
|
1. A remote Linux server with: |
||||
|
- SSH access |
||||
|
- systemd (for service management) |
||||
|
- Optional: Prometheus and Grafana (for monitoring) |
||||
|
|
||||
|
2. GitHub repository secrets configured (see [Setup GitHub Secrets](#setup-github-secrets) below): |
||||
|
- `TELEMETRY_SSH_PRIVATE_KEY`: SSH private key for accessing the remote server |
||||
|
- `TELEMETRY_HOST`: Remote server hostname or IP address |
||||
|
- `TELEMETRY_USER`: Username for SSH access |
||||
|
|
||||
|
## Setup GitHub Secrets |
||||
|
|
||||
|
Before using the deployment workflow, you need to configure the required secrets in your GitHub repository. |
||||
|
|
||||
|
### Step 1: Generate SSH Key Pair |
||||
|
|
||||
|
On your local machine, generate a new SSH key pair specifically for deployment: |
||||
|
|
||||
|
```bash |
||||
|
# Generate a new SSH key pair |
||||
|
ssh-keygen -t ed25519 -C "seaweedfs-telemetry-deploy" -f ~/.ssh/seaweedfs_telemetry_deploy |
||||
|
|
||||
|
# This creates two files: |
||||
|
# ~/.ssh/seaweedfs_telemetry_deploy (private key) |
||||
|
# ~/.ssh/seaweedfs_telemetry_deploy.pub (public key) |
||||
|
``` |
||||
|
|
||||
|
### Step 2: Configure Remote Server |
||||
|
|
||||
|
Copy the public key to your remote server: |
||||
|
|
||||
|
```bash |
||||
|
# Copy public key to remote server |
||||
|
ssh-copy-id -i ~/.ssh/seaweedfs_telemetry_deploy.pub user@your-server.com |
||||
|
|
||||
|
# Or manually append to authorized_keys |
||||
|
cat ~/.ssh/seaweedfs_telemetry_deploy.pub | ssh user@your-server.com "mkdir -p ~/.ssh && cat >> ~/.ssh/authorized_keys" |
||||
|
``` |
||||
|
|
||||
|
Test the SSH connection: |
||||
|
|
||||
|
```bash |
||||
|
# Test SSH connection with the new key |
||||
|
ssh -i ~/.ssh/seaweedfs_telemetry_deploy user@your-server.com "echo 'SSH connection successful'" |
||||
|
``` |
||||
|
|
||||
|
### Step 3: Add Secrets to GitHub Repository |
||||
|
|
||||
|
1. Go to your GitHub repository |
||||
|
2. Click on **Settings** tab |
||||
|
3. In the sidebar, click **Secrets and variables** → **Actions** |
||||
|
4. Click **New repository secret** for each of the following: |
||||
|
|
||||
|
#### TELEMETRY_SSH_PRIVATE_KEY |
||||
|
|
||||
|
```bash |
||||
|
# Display the private key content |
||||
|
cat ~/.ssh/seaweedfs_telemetry_deploy |
||||
|
``` |
||||
|
|
||||
|
- **Name**: `TELEMETRY_SSH_PRIVATE_KEY` |
||||
|
- **Value**: Copy the entire private key content, including the `-----BEGIN OPENSSH PRIVATE KEY-----` and `-----END OPENSSH PRIVATE KEY-----` lines |
||||
|
|
||||
|
#### TELEMETRY_HOST |
||||
|
|
||||
|
- **Name**: `TELEMETRY_HOST` |
||||
|
- **Value**: Your server's hostname or IP address (e.g., `telemetry.example.com` or `192.168.1.100`) |
||||
|
|
||||
|
#### TELEMETRY_USER |
||||
|
|
||||
|
- **Name**: `TELEMETRY_USER` |
||||
|
- **Value**: The username on the remote server (e.g., `ubuntu`, `deploy`, or your username) |
||||
|
|
||||
|
### Step 4: Verify Configuration |
||||
|
|
||||
|
Create a simple test workflow or manually trigger the deployment to verify the secrets are working correctly. |
||||
|
|
||||
|
### Security Best Practices |
||||
|
|
||||
|
1. **Dedicated SSH Key**: Use a separate SSH key only for deployment |
||||
|
2. **Limited Permissions**: Create a dedicated user on the remote server with minimal required permissions |
||||
|
3. **Key Rotation**: Regularly rotate SSH keys |
||||
|
4. **Server Access**: Restrict SSH access to specific IP ranges if possible |
||||
|
|
||||
|
### Example Server Setup |
||||
|
|
||||
|
If you're setting up a new server, here's a basic configuration: |
||||
|
|
||||
|
```bash |
||||
|
# On the remote server, create a dedicated user for deployment |
||||
|
sudo useradd -m -s /bin/bash seaweedfs-deploy |
||||
|
sudo usermod -aG sudo seaweedfs-deploy # Only if sudo access is needed |
||||
|
|
||||
|
# Switch to the deployment user |
||||
|
sudo su - seaweedfs-deploy |
||||
|
|
||||
|
# Create SSH directory |
||||
|
mkdir -p ~/.ssh |
||||
|
chmod 700 ~/.ssh |
||||
|
|
||||
|
# Add your public key (paste the content of seaweedfs_telemetry_deploy.pub) |
||||
|
nano ~/.ssh/authorized_keys |
||||
|
chmod 600 ~/.ssh/authorized_keys |
||||
|
``` |
||||
|
|
||||
|
### Troubleshooting |
||||
|
|
||||
|
#### SSH Connection Issues |
||||
|
|
||||
|
```bash |
||||
|
# Test SSH connection manually |
||||
|
ssh -i ~/.ssh/seaweedfs_telemetry_deploy -v user@your-server.com |
||||
|
|
||||
|
# Check SSH key permissions |
||||
|
ls -la ~/.ssh/seaweedfs_telemetry_deploy* |
||||
|
# Should show: -rw------- for private key, -rw-r--r-- for public key |
||||
|
``` |
||||
|
|
||||
|
#### GitHub Actions Fails |
||||
|
|
||||
|
1. **Check secrets**: Ensure all three secrets are properly set in GitHub |
||||
|
2. **Verify SSH key**: Make sure the entire private key (including headers/footers) is copied |
||||
|
3. **Test connectivity**: Manually SSH to the server from your local machine |
||||
|
4. **Check user permissions**: Ensure the remote user has necessary permissions |
||||
|
|
||||
|
## GitHub Actions Workflow |
||||
|
|
||||
|
The deployment workflow (`.github/workflows/deploy_telemetry.yml`) provides two main operations: |
||||
|
|
||||
|
### 1. First-time Setup |
||||
|
|
||||
|
Run this once to set up the remote server: |
||||
|
|
||||
|
1. Go to GitHub Actions in your repository |
||||
|
2. Select "Deploy Telemetry Server" workflow |
||||
|
3. Click "Run workflow" |
||||
|
4. Check "Run first-time server setup" |
||||
|
5. Click "Run workflow" |
||||
|
|
||||
|
This will: |
||||
|
- Create necessary directories on the remote server |
||||
|
- Set up systemd service configuration |
||||
|
- Configure log rotation |
||||
|
- Upload Grafana dashboard and Prometheus configuration |
||||
|
|
||||
|
|
||||
|
### 2. Deploy Updates |
||||
|
|
||||
|
Deployments happen automatically when: |
||||
|
- Code is pushed to the `master` branch with changes in the `telemetry/` directory |
||||
|
|
||||
|
Or manually trigger deployment: |
||||
|
1. Go to GitHub Actions in your repository |
||||
|
2. Select "Deploy Telemetry Server" workflow |
||||
|
3. Click "Run workflow" |
||||
|
4. Check "Deploy telemetry server to remote server" |
||||
|
5. Click "Run workflow" |
||||
|
|
||||
|
## Server Directory Structure |
||||
|
|
||||
|
After setup, the remote server will have: |
||||
|
|
||||
|
``` |
||||
|
~/seaweedfs-telemetry/ |
||||
|
├── bin/ |
||||
|
│ └── telemetry-server # Binary executable |
||||
|
├── logs/ |
||||
|
│ ├── telemetry.log # Application logs |
||||
|
│ └── telemetry.error.log # Error logs |
||||
|
├── data/ # Data directory (if needed) |
||||
|
├── grafana-dashboard.json # Grafana dashboard configuration |
||||
|
└── prometheus.yml # Prometheus configuration |
||||
|
``` |
||||
|
|
||||
|
## Service Management |
||||
|
|
||||
|
The telemetry server runs as a systemd service: |
||||
|
|
||||
|
```bash |
||||
|
# Check service status |
||||
|
sudo systemctl status telemetry.service |
||||
|
|
||||
|
# View logs |
||||
|
sudo journalctl -u telemetry.service -f |
||||
|
|
||||
|
# Restart service |
||||
|
sudo systemctl restart telemetry.service |
||||
|
|
||||
|
# Stop/start service |
||||
|
sudo systemctl stop telemetry.service |
||||
|
sudo systemctl start telemetry.service |
||||
|
``` |
||||
|
|
||||
|
## Accessing the Service |
||||
|
|
||||
|
After deployment, the telemetry server will be available at: |
||||
|
|
||||
|
- **Dashboard**: `http://your-server:8353` |
||||
|
- **API**: `http://your-server:8353/api/*` |
||||
|
- **Metrics**: `http://your-server:8353/metrics` |
||||
|
- **Health Check**: `http://your-server:8353/health` |
||||
|
|
||||
|
## Optional: Prometheus and Grafana Integration |
||||
|
|
||||
|
### Prometheus Setup |
||||
|
|
||||
|
1. Install Prometheus on your server |
||||
|
2. Update `/etc/prometheus/prometheus.yml` to include: |
||||
|
```yaml |
||||
|
scrape_configs: |
||||
|
- job_name: 'seaweedfs-telemetry' |
||||
|
static_configs: |
||||
|
- targets: ['localhost:8353'] |
||||
|
metrics_path: '/metrics' |
||||
|
``` |
||||
|
|
||||
|
### Grafana Setup |
||||
|
|
||||
|
1. Install Grafana on your server |
||||
|
2. Import the dashboard from `~/seaweedfs-telemetry/grafana-dashboard.json` |
||||
|
3. Configure Prometheus as a data source pointing to your Prometheus instance |
||||
|
|
||||
|
## Troubleshooting |
||||
|
|
||||
|
### Deployment Fails |
||||
|
|
||||
|
1. Check GitHub Actions logs for detailed error messages |
||||
|
2. Verify SSH connectivity: `ssh user@host` |
||||
|
3. Ensure all required secrets are configured in GitHub |
||||
|
|
||||
|
### Service Won't Start |
||||
|
|
||||
|
1. Check service logs: `sudo journalctl -u telemetry.service` |
||||
|
2. Verify binary permissions: `ls -la ~/seaweedfs-telemetry/bin/` |
||||
|
3. Test binary manually: `~/seaweedfs-telemetry/bin/telemetry-server -help` |
||||
|
|
||||
|
### Port Conflicts |
||||
|
|
||||
|
If port 8353 is already in use: |
||||
|
|
||||
|
1. Edit the systemd service: `sudo systemctl edit telemetry.service` |
||||
|
2. Add override configuration: |
||||
|
```ini |
||||
|
[Service] |
||||
|
ExecStart= |
||||
|
ExecStart=/home/user/seaweedfs-telemetry/bin/telemetry-server -port=8354 |
||||
|
``` |
||||
|
3. Reload and restart: `sudo systemctl daemon-reload && sudo systemctl restart telemetry.service` |
||||
|
|
||||
|
## Security Considerations |
||||
|
|
||||
|
1. **Firewall**: Consider restricting access to telemetry ports |
||||
|
2. **SSH Keys**: Use dedicated SSH keys with minimal permissions |
||||
|
3. **User Permissions**: Run the service as a non-privileged user |
||||
|
4. **Network**: Consider running on internal networks only |
||||
|
|
||||
|
## Monitoring |
||||
|
|
||||
|
Monitor the deployment and service health: |
||||
|
|
||||
|
- **GitHub Actions**: Check workflow runs for deployment status |
||||
|
- **System Logs**: `sudo journalctl -u telemetry.service` |
||||
|
- **Application Logs**: `tail -f ~/seaweedfs-telemetry/logs/telemetry.log` |
||||
|
- **Health Endpoint**: `curl http://localhost:8353/health` |
||||
|
- **Metrics**: `curl http://localhost:8353/metrics` |
@ -0,0 +1,351 @@ |
|||||
|
# SeaweedFS Telemetry System |
||||
|
|
||||
|
A privacy-respecting telemetry system for SeaweedFS that collects cluster-level usage statistics and provides visualization through Prometheus and Grafana. |
||||
|
|
||||
|
## Features |
||||
|
|
||||
|
- **Privacy-First Design**: Uses in-memory cluster IDs (regenerated on restart), no personal data collection |
||||
|
- **Prometheus Integration**: Native Prometheus metrics for monitoring and alerting |
||||
|
- **Grafana Dashboards**: Pre-built dashboards for data visualization |
||||
|
- **Protocol Buffers**: Efficient binary data transmission for optimal performance |
||||
|
- **Opt-in Only**: Disabled by default, requires explicit configuration |
||||
|
- **Docker Compose**: Complete monitoring stack deployment |
||||
|
- **Automatic Cleanup**: Configurable data retention policies |
||||
|
|
||||
|
## Architecture |
||||
|
|
||||
|
``` |
||||
|
SeaweedFS Cluster → Telemetry Client → Telemetry Server → Prometheus → Grafana |
||||
|
(protobuf) (metrics) (queries) |
||||
|
``` |
||||
|
|
||||
|
## Data Transmission |
||||
|
|
||||
|
The telemetry system uses **Protocol Buffers exclusively** for efficient binary data transmission: |
||||
|
|
||||
|
- **Compact Format**: 30-50% smaller than JSON |
||||
|
- **Fast Serialization**: Better performance than text-based formats |
||||
|
- **Type Safety**: Strong typing with generated Go structs |
||||
|
- **Schema Evolution**: Built-in versioning support |
||||
|
|
||||
|
### Protobuf Schema |
||||
|
|
||||
|
```protobuf |
||||
|
message TelemetryData { |
||||
|
string cluster_id = 1; // In-memory generated UUID |
||||
|
string version = 2; // SeaweedFS version |
||||
|
string os = 3; // Operating system |
||||
|
repeated string features = 4; // Enabled features |
||||
|
string deployment = 5; // Deployment type |
||||
|
int32 volume_server_count = 6; // Number of volume servers |
||||
|
uint64 total_disk_bytes = 7; // Total disk usage |
||||
|
int32 total_volume_count = 8; // Total volume count |
||||
|
int64 timestamp = 9; // Collection timestamp |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
## Privacy Approach |
||||
|
|
||||
|
- **No Personal Data**: No hostnames, IP addresses, or user information |
||||
|
- **In-Memory IDs**: Cluster IDs are generated in-memory and change on restart |
||||
|
- **Aggregated Data**: Only cluster-level statistics, no individual file/user data |
||||
|
- **Opt-in Only**: Telemetry is disabled by default |
||||
|
- **Transparent**: Open source implementation, clear data collection policy |
||||
|
|
||||
|
## Collected Data |
||||
|
|
||||
|
| Field | Description | Example | |
||||
|
|-------|-------------|---------| |
||||
|
| `cluster_id` | In-memory UUID (changes on restart) | `a1b2c3d4-...` | |
||||
|
| `version` | SeaweedFS version | `3.45` | |
||||
|
| `os` | Operating system and architecture | `linux/amd64` | |
||||
|
| `features` | Enabled components | `["filer", "s3api"]` | |
||||
|
| `deployment` | Deployment type | `cluster` | |
||||
|
| `volume_server_count` | Number of volume servers | `5` | |
||||
|
| `total_disk_bytes` | Total disk usage across cluster | `1073741824` | |
||||
|
| `total_volume_count` | Total number of volumes | `120` | |
||||
|
| `timestamp` | When data was collected | `1640995200` | |
||||
|
|
||||
|
## Quick Start |
||||
|
|
||||
|
### 1. Deploy Telemetry Server |
||||
|
|
||||
|
```bash |
||||
|
# Clone and start the complete monitoring stack |
||||
|
git clone https://github.com/seaweedfs/seaweedfs.git |
||||
|
cd seaweedfs/telemetry |
||||
|
docker-compose up -d |
||||
|
|
||||
|
# Or run the server directly |
||||
|
cd server |
||||
|
go run . -port=8080 -dashboard=true |
||||
|
``` |
||||
|
|
||||
|
### 2. Configure SeaweedFS |
||||
|
|
||||
|
```bash |
||||
|
# Enable telemetry in SeaweedFS master (uses default telemetry.seaweedfs.com:3091) |
||||
|
weed master -telemetry=true |
||||
|
|
||||
|
# Or in server mode |
||||
|
weed server -telemetry=true |
||||
|
|
||||
|
# Or specify custom telemetry server |
||||
|
weed master -telemetry=true -telemetry.url=http://localhost:8080/api/collect |
||||
|
``` |
||||
|
|
||||
|
### 3. Access Dashboards |
||||
|
|
||||
|
- **Telemetry Server**: http://localhost:8080 |
||||
|
- **Prometheus**: http://localhost:9090 |
||||
|
- **Grafana**: http://localhost:3000 (admin/admin) |
||||
|
|
||||
|
## Configuration |
||||
|
|
||||
|
### SeaweedFS Master/Server |
||||
|
|
||||
|
```bash |
||||
|
# Enable telemetry |
||||
|
-telemetry=true |
||||
|
|
||||
|
# Set custom telemetry server URL (optional, defaults to telemetry.seaweedfs.com:3091) |
||||
|
-telemetry.url=http://your-telemetry-server:8080/api/collect |
||||
|
``` |
||||
|
|
||||
|
### Telemetry Server |
||||
|
|
||||
|
```bash |
||||
|
# Server configuration |
||||
|
-port=8080 # Server port |
||||
|
-dashboard=true # Enable built-in dashboard |
||||
|
-cleanup=24h # Cleanup interval |
||||
|
-max-age=720h # Maximum data retention (30 days) |
||||
|
|
||||
|
# Example |
||||
|
./telemetry-server -port=8080 -dashboard=true -cleanup=24h -max-age=720h |
||||
|
``` |
||||
|
|
||||
|
## Prometheus Metrics |
||||
|
|
||||
|
The telemetry server exposes these Prometheus metrics: |
||||
|
|
||||
|
### Cluster Metrics |
||||
|
- `seaweedfs_telemetry_total_clusters`: Total unique clusters (30 days) |
||||
|
- `seaweedfs_telemetry_active_clusters`: Active clusters (7 days) |
||||
|
|
||||
|
### Per-Cluster Metrics |
||||
|
- `seaweedfs_telemetry_volume_servers{cluster_id, version, os, deployment}`: Volume servers per cluster |
||||
|
- `seaweedfs_telemetry_disk_bytes{cluster_id, version, os, deployment}`: Disk usage per cluster |
||||
|
- `seaweedfs_telemetry_volume_count{cluster_id, version, os, deployment}`: Volume count per cluster |
||||
|
- `seaweedfs_telemetry_filer_count{cluster_id, version, os, deployment}`: Filer servers per cluster |
||||
|
- `seaweedfs_telemetry_broker_count{cluster_id, version, os, deployment}`: Broker servers per cluster |
||||
|
- `seaweedfs_telemetry_cluster_info{cluster_id, version, os, deployment, features}`: Cluster metadata |
||||
|
|
||||
|
### Server Metrics |
||||
|
- `seaweedfs_telemetry_reports_received_total`: Total telemetry reports received |
||||
|
|
||||
|
## API Endpoints |
||||
|
|
||||
|
### Data Collection |
||||
|
```bash |
||||
|
# Submit telemetry data (protobuf only) |
||||
|
POST /api/collect |
||||
|
Content-Type: application/x-protobuf |
||||
|
[TelemetryRequest protobuf data] |
||||
|
``` |
||||
|
|
||||
|
### Statistics (JSON for dashboard/debugging) |
||||
|
```bash |
||||
|
# Get aggregated statistics |
||||
|
GET /api/stats |
||||
|
|
||||
|
# Get recent cluster instances |
||||
|
GET /api/instances?limit=100 |
||||
|
|
||||
|
# Get metrics over time |
||||
|
GET /api/metrics?days=30 |
||||
|
``` |
||||
|
|
||||
|
### Monitoring |
||||
|
```bash |
||||
|
# Prometheus metrics |
||||
|
GET /metrics |
||||
|
``` |
||||
|
|
||||
|
## Docker Deployment |
||||
|
|
||||
|
### Complete Stack (Recommended) |
||||
|
|
||||
|
```yaml |
||||
|
# docker-compose.yml |
||||
|
version: '3.8' |
||||
|
services: |
||||
|
telemetry-server: |
||||
|
build: ./server |
||||
|
ports: |
||||
|
- "8080:8080" |
||||
|
command: ["-port=8080", "-dashboard=true", "-cleanup=24h"] |
||||
|
|
||||
|
prometheus: |
||||
|
image: prom/prometheus:latest |
||||
|
ports: |
||||
|
- "9090:9090" |
||||
|
volumes: |
||||
|
- ./prometheus.yml:/etc/prometheus/prometheus.yml |
||||
|
|
||||
|
grafana: |
||||
|
image: grafana/grafana:latest |
||||
|
ports: |
||||
|
- "3000:3000" |
||||
|
environment: |
||||
|
- GF_SECURITY_ADMIN_PASSWORD=admin |
||||
|
volumes: |
||||
|
- ./grafana-provisioning:/etc/grafana/provisioning |
||||
|
- ./grafana-dashboard.json:/var/lib/grafana/dashboards/seaweedfs.json |
||||
|
``` |
||||
|
|
||||
|
```bash |
||||
|
# Deploy the stack |
||||
|
docker-compose up -d |
||||
|
|
||||
|
# Scale telemetry server if needed |
||||
|
docker-compose up -d --scale telemetry-server=3 |
||||
|
``` |
||||
|
|
||||
|
### Server Only |
||||
|
|
||||
|
```bash |
||||
|
# Build and run telemetry server |
||||
|
cd server |
||||
|
docker build -t seaweedfs-telemetry . |
||||
|
docker run -p 8080:8080 seaweedfs-telemetry -port=8080 -dashboard=true |
||||
|
``` |
||||
|
|
||||
|
## Development |
||||
|
|
||||
|
### Protocol Buffer Development |
||||
|
|
||||
|
```bash |
||||
|
# Generate protobuf code |
||||
|
cd telemetry |
||||
|
protoc --go_out=. --go_opt=paths=source_relative proto/telemetry.proto |
||||
|
|
||||
|
# The generated code is already included in the repository |
||||
|
``` |
||||
|
|
||||
|
### Build from Source |
||||
|
|
||||
|
```bash |
||||
|
# Build telemetry server |
||||
|
cd telemetry/server |
||||
|
go build -o telemetry-server . |
||||
|
|
||||
|
# Build SeaweedFS with telemetry support |
||||
|
cd ../.. |
||||
|
go build -o weed ./weed |
||||
|
``` |
||||
|
|
||||
|
### Testing |
||||
|
|
||||
|
```bash |
||||
|
# Test telemetry server |
||||
|
cd telemetry/server |
||||
|
go test ./... |
||||
|
|
||||
|
# Test protobuf communication (requires protobuf tools) |
||||
|
# See telemetry client code for examples |
||||
|
``` |
||||
|
|
||||
|
## Grafana Dashboard |
||||
|
|
||||
|
The included Grafana dashboard provides: |
||||
|
|
||||
|
- **Overview**: Total and active clusters, version distribution |
||||
|
- **Resource Usage**: Volume servers and disk usage over time |
||||
|
- **Deployments**: Deployment type and OS distribution |
||||
|
- **Growth Trends**: Historical growth patterns |
||||
|
|
||||
|
### Custom Queries |
||||
|
|
||||
|
```promql |
||||
|
# Total active clusters |
||||
|
seaweedfs_telemetry_active_clusters |
||||
|
|
||||
|
# Disk usage by version |
||||
|
sum by (version) (seaweedfs_telemetry_disk_bytes) |
||||
|
|
||||
|
# Volume servers by deployment type |
||||
|
sum by (deployment) (seaweedfs_telemetry_volume_servers) |
||||
|
|
||||
|
# Filer servers by version |
||||
|
sum by (version) (seaweedfs_telemetry_filer_count) |
||||
|
|
||||
|
# Broker servers across all clusters |
||||
|
sum(seaweedfs_telemetry_broker_count) |
||||
|
|
||||
|
# Growth rate (weekly) |
||||
|
increase(seaweedfs_telemetry_total_clusters[7d]) |
||||
|
``` |
||||
|
|
||||
|
## Security Considerations |
||||
|
|
||||
|
- **Network Security**: Use HTTPS in production environments |
||||
|
- **Access Control**: Implement authentication for Grafana and Prometheus |
||||
|
- **Data Retention**: Configure appropriate retention policies |
||||
|
- **Monitoring**: Monitor the telemetry infrastructure itself |
||||
|
|
||||
|
## Troubleshooting |
||||
|
|
||||
|
### Common Issues |
||||
|
|
||||
|
**SeaweedFS not sending data:** |
||||
|
```bash |
||||
|
# Check telemetry configuration |
||||
|
weed master -h | grep telemetry |
||||
|
|
||||
|
# Verify connectivity |
||||
|
curl -v http://your-telemetry-server:8080/api/collect |
||||
|
``` |
||||
|
|
||||
|
**Server not receiving data:** |
||||
|
```bash |
||||
|
# Check server logs |
||||
|
docker-compose logs telemetry-server |
||||
|
|
||||
|
# Verify metrics endpoint |
||||
|
curl http://localhost:8080/metrics |
||||
|
``` |
||||
|
|
||||
|
**Prometheus not scraping:** |
||||
|
```bash |
||||
|
# Check Prometheus targets |
||||
|
curl http://localhost:9090/api/v1/targets |
||||
|
|
||||
|
# Verify configuration |
||||
|
docker-compose logs prometheus |
||||
|
``` |
||||
|
|
||||
|
### Debugging |
||||
|
|
||||
|
```bash |
||||
|
# Enable verbose logging in SeaweedFS |
||||
|
weed master -v=2 -telemetry=true |
||||
|
|
||||
|
# Check telemetry server metrics |
||||
|
curl http://localhost:8080/metrics | grep seaweedfs_telemetry |
||||
|
|
||||
|
# Test data flow |
||||
|
curl http://localhost:8080/api/stats |
||||
|
``` |
||||
|
|
||||
|
## Contributing |
||||
|
|
||||
|
1. Fork the repository |
||||
|
2. Create a feature branch |
||||
|
3. Make your changes |
||||
|
4. Add tests if applicable |
||||
|
5. Submit a pull request |
||||
|
|
||||
|
## License |
||||
|
|
||||
|
This telemetry system is part of SeaweedFS and follows the same Apache 2.0 license. |
@ -0,0 +1,55 @@ |
|||||
|
version: '3.8' |
||||
|
|
||||
|
services: |
||||
|
telemetry-server: |
||||
|
build: ./server |
||||
|
ports: |
||||
|
- "8080:8080" |
||||
|
command: [ |
||||
|
"./telemetry-server", |
||||
|
"-port=8080", |
||||
|
"-dashboard=false", # Disable built-in dashboard, use Grafana |
||||
|
"-log=true", |
||||
|
"-cors=true" |
||||
|
] |
||||
|
networks: |
||||
|
- telemetry |
||||
|
|
||||
|
prometheus: |
||||
|
image: prom/prometheus:latest |
||||
|
ports: |
||||
|
- "9090:9090" |
||||
|
volumes: |
||||
|
- ./prometheus.yml:/etc/prometheus/prometheus.yml |
||||
|
- prometheus_data:/prometheus |
||||
|
command: |
||||
|
- '--config.file=/etc/prometheus/prometheus.yml' |
||||
|
- '--storage.tsdb.path=/prometheus' |
||||
|
- '--web.console.libraries=/etc/prometheus/console_libraries' |
||||
|
- '--web.console.templates=/etc/prometheus/consoles' |
||||
|
- '--storage.tsdb.retention.time=200h' |
||||
|
- '--web.enable-lifecycle' |
||||
|
networks: |
||||
|
- telemetry |
||||
|
|
||||
|
grafana: |
||||
|
image: grafana/grafana:latest |
||||
|
ports: |
||||
|
- "3000:3000" |
||||
|
environment: |
||||
|
- GF_SECURITY_ADMIN_PASSWORD=admin |
||||
|
- GF_USERS_ALLOW_SIGN_UP=false |
||||
|
volumes: |
||||
|
- grafana_data:/var/lib/grafana |
||||
|
- ./grafana-dashboard.json:/var/lib/grafana/dashboards/seaweedfs-telemetry.json |
||||
|
- ./grafana-provisioning:/etc/grafana/provisioning |
||||
|
networks: |
||||
|
- telemetry |
||||
|
|
||||
|
volumes: |
||||
|
prometheus_data: |
||||
|
grafana_data: |
||||
|
|
||||
|
networks: |
||||
|
telemetry: |
||||
|
driver: bridge |
@ -0,0 +1,734 @@ |
|||||
|
{ |
||||
|
"annotations": { |
||||
|
"list": [ |
||||
|
{ |
||||
|
"builtIn": 1, |
||||
|
"datasource": { |
||||
|
"type": "grafana", |
||||
|
"uid": "-- Grafana --" |
||||
|
}, |
||||
|
"enable": true, |
||||
|
"hide": true, |
||||
|
"iconColor": "rgba(0, 211, 255, 1)", |
||||
|
"name": "Annotations & Alerts", |
||||
|
"type": "dashboard" |
||||
|
} |
||||
|
] |
||||
|
}, |
||||
|
"editable": true, |
||||
|
"fiscalYearStartMonth": 0, |
||||
|
"graphTooltip": 0, |
||||
|
"id": null, |
||||
|
"links": [], |
||||
|
"liveNow": false, |
||||
|
"panels": [ |
||||
|
{ |
||||
|
"datasource": { |
||||
|
"type": "prometheus", |
||||
|
"uid": "${DS_PROMETHEUS}" |
||||
|
}, |
||||
|
"fieldConfig": { |
||||
|
"defaults": { |
||||
|
"color": { |
||||
|
"mode": "thresholds" |
||||
|
}, |
||||
|
"custom": { |
||||
|
"align": "auto", |
||||
|
"cellOptions": { |
||||
|
"type": "auto" |
||||
|
}, |
||||
|
"inspect": false |
||||
|
}, |
||||
|
"mappings": [], |
||||
|
"thresholds": { |
||||
|
"mode": "absolute", |
||||
|
"steps": [ |
||||
|
{ |
||||
|
"color": "green", |
||||
|
"value": null |
||||
|
}, |
||||
|
{ |
||||
|
"color": "red", |
||||
|
"value": 80 |
||||
|
} |
||||
|
] |
||||
|
} |
||||
|
}, |
||||
|
"overrides": [] |
||||
|
}, |
||||
|
"gridPos": { |
||||
|
"h": 8, |
||||
|
"w": 12, |
||||
|
"x": 0, |
||||
|
"y": 0 |
||||
|
}, |
||||
|
"id": 1, |
||||
|
"options": { |
||||
|
"showHeader": true |
||||
|
}, |
||||
|
"pluginVersion": "10.0.0", |
||||
|
"targets": [ |
||||
|
{ |
||||
|
"datasource": { |
||||
|
"type": "prometheus", |
||||
|
"uid": "${DS_PROMETHEUS}" |
||||
|
}, |
||||
|
"expr": "seaweedfs_telemetry_total_clusters", |
||||
|
"format": "time_series", |
||||
|
"refId": "A" |
||||
|
} |
||||
|
], |
||||
|
"title": "Total SeaweedFS Clusters", |
||||
|
"type": "stat" |
||||
|
}, |
||||
|
{ |
||||
|
"datasource": { |
||||
|
"type": "prometheus", |
||||
|
"uid": "${DS_PROMETHEUS}" |
||||
|
}, |
||||
|
"fieldConfig": { |
||||
|
"defaults": { |
||||
|
"color": { |
||||
|
"mode": "thresholds" |
||||
|
}, |
||||
|
"custom": { |
||||
|
"align": "auto", |
||||
|
"cellOptions": { |
||||
|
"type": "auto" |
||||
|
}, |
||||
|
"inspect": false |
||||
|
}, |
||||
|
"mappings": [], |
||||
|
"thresholds": { |
||||
|
"mode": "absolute", |
||||
|
"steps": [ |
||||
|
{ |
||||
|
"color": "green", |
||||
|
"value": null |
||||
|
}, |
||||
|
{ |
||||
|
"color": "red", |
||||
|
"value": 80 |
||||
|
} |
||||
|
] |
||||
|
} |
||||
|
}, |
||||
|
"overrides": [] |
||||
|
}, |
||||
|
"gridPos": { |
||||
|
"h": 8, |
||||
|
"w": 12, |
||||
|
"x": 12, |
||||
|
"y": 0 |
||||
|
}, |
||||
|
"id": 2, |
||||
|
"options": { |
||||
|
"showHeader": true |
||||
|
}, |
||||
|
"pluginVersion": "10.0.0", |
||||
|
"targets": [ |
||||
|
{ |
||||
|
"datasource": { |
||||
|
"type": "prometheus", |
||||
|
"uid": "${DS_PROMETHEUS}" |
||||
|
}, |
||||
|
"expr": "seaweedfs_telemetry_active_clusters", |
||||
|
"format": "time_series", |
||||
|
"refId": "A" |
||||
|
} |
||||
|
], |
||||
|
"title": "Active Clusters (7 days)", |
||||
|
"type": "stat" |
||||
|
}, |
||||
|
{ |
||||
|
"datasource": { |
||||
|
"type": "prometheus", |
||||
|
"uid": "${DS_PROMETHEUS}" |
||||
|
}, |
||||
|
"fieldConfig": { |
||||
|
"defaults": { |
||||
|
"color": { |
||||
|
"mode": "palette-classic" |
||||
|
}, |
||||
|
"custom": { |
||||
|
"hideFrom": { |
||||
|
"legend": false, |
||||
|
"tooltip": false, |
||||
|
"vis": false |
||||
|
} |
||||
|
}, |
||||
|
"mappings": [] |
||||
|
}, |
||||
|
"overrides": [] |
||||
|
}, |
||||
|
"gridPos": { |
||||
|
"h": 8, |
||||
|
"w": 12, |
||||
|
"x": 0, |
||||
|
"y": 8 |
||||
|
}, |
||||
|
"id": 3, |
||||
|
"options": { |
||||
|
"legend": { |
||||
|
"displayMode": "visible", |
||||
|
"placement": "bottom", |
||||
|
"showLegend": true |
||||
|
}, |
||||
|
"pieType": "pie", |
||||
|
"reduceOptions": { |
||||
|
"values": false, |
||||
|
"calcs": [ |
||||
|
"lastNotNull" |
||||
|
], |
||||
|
"fields": "" |
||||
|
}, |
||||
|
"tooltip": { |
||||
|
"mode": "single", |
||||
|
"sort": "none" |
||||
|
} |
||||
|
}, |
||||
|
"targets": [ |
||||
|
{ |
||||
|
"datasource": { |
||||
|
"type": "prometheus", |
||||
|
"uid": "${DS_PROMETHEUS}" |
||||
|
}, |
||||
|
"expr": "count by (version) (seaweedfs_telemetry_cluster_info)", |
||||
|
"format": "time_series", |
||||
|
"legendFormat": "{{version}}", |
||||
|
"refId": "A" |
||||
|
} |
||||
|
], |
||||
|
"title": "SeaweedFS Version Distribution", |
||||
|
"type": "piechart" |
||||
|
}, |
||||
|
{ |
||||
|
"datasource": { |
||||
|
"type": "prometheus", |
||||
|
"uid": "${DS_PROMETHEUS}" |
||||
|
}, |
||||
|
"fieldConfig": { |
||||
|
"defaults": { |
||||
|
"color": { |
||||
|
"mode": "palette-classic" |
||||
|
}, |
||||
|
"custom": { |
||||
|
"hideFrom": { |
||||
|
"legend": false, |
||||
|
"tooltip": false, |
||||
|
"vis": false |
||||
|
} |
||||
|
}, |
||||
|
"mappings": [] |
||||
|
}, |
||||
|
"overrides": [] |
||||
|
}, |
||||
|
"gridPos": { |
||||
|
"h": 8, |
||||
|
"w": 12, |
||||
|
"x": 12, |
||||
|
"y": 8 |
||||
|
}, |
||||
|
"id": 4, |
||||
|
"options": { |
||||
|
"legend": { |
||||
|
"displayMode": "visible", |
||||
|
"placement": "bottom", |
||||
|
"showLegend": true |
||||
|
}, |
||||
|
"pieType": "pie", |
||||
|
"reduceOptions": { |
||||
|
"values": false, |
||||
|
"calcs": [ |
||||
|
"lastNotNull" |
||||
|
], |
||||
|
"fields": "" |
||||
|
}, |
||||
|
"tooltip": { |
||||
|
"mode": "single", |
||||
|
"sort": "none" |
||||
|
} |
||||
|
}, |
||||
|
"targets": [ |
||||
|
{ |
||||
|
"datasource": { |
||||
|
"type": "prometheus", |
||||
|
"uid": "${DS_PROMETHEUS}" |
||||
|
}, |
||||
|
"expr": "count by (os) (seaweedfs_telemetry_cluster_info)", |
||||
|
"format": "time_series", |
||||
|
"legendFormat": "{{os}}", |
||||
|
"refId": "A" |
||||
|
} |
||||
|
], |
||||
|
"title": "Operating System Distribution", |
||||
|
"type": "piechart" |
||||
|
}, |
||||
|
{ |
||||
|
"datasource": { |
||||
|
"type": "prometheus", |
||||
|
"uid": "${DS_PROMETHEUS}" |
||||
|
}, |
||||
|
"fieldConfig": { |
||||
|
"defaults": { |
||||
|
"color": { |
||||
|
"mode": "palette-classic" |
||||
|
}, |
||||
|
"custom": { |
||||
|
"axisLabel": "", |
||||
|
"axisPlacement": "auto", |
||||
|
"barAlignment": 0, |
||||
|
"drawStyle": "line", |
||||
|
"fillOpacity": 0, |
||||
|
"gradientMode": "none", |
||||
|
"hideFrom": { |
||||
|
"legend": false, |
||||
|
"tooltip": false, |
||||
|
"vis": false |
||||
|
}, |
||||
|
"lineInterpolation": "linear", |
||||
|
"lineWidth": 1, |
||||
|
"pointSize": 5, |
||||
|
"scaleDistribution": { |
||||
|
"type": "linear" |
||||
|
}, |
||||
|
"showPoints": "auto", |
||||
|
"spanNulls": false, |
||||
|
"stacking": { |
||||
|
"group": "A", |
||||
|
"mode": "none" |
||||
|
}, |
||||
|
"thresholdsStyle": { |
||||
|
"mode": "off" |
||||
|
} |
||||
|
}, |
||||
|
"mappings": [], |
||||
|
"thresholds": { |
||||
|
"mode": "absolute", |
||||
|
"steps": [ |
||||
|
{ |
||||
|
"color": "green", |
||||
|
"value": null |
||||
|
}, |
||||
|
{ |
||||
|
"color": "red", |
||||
|
"value": 80 |
||||
|
} |
||||
|
] |
||||
|
} |
||||
|
}, |
||||
|
"overrides": [] |
||||
|
}, |
||||
|
"gridPos": { |
||||
|
"h": 8, |
||||
|
"w": 24, |
||||
|
"x": 0, |
||||
|
"y": 16 |
||||
|
}, |
||||
|
"id": 5, |
||||
|
"options": { |
||||
|
"legend": { |
||||
|
"calcs": [], |
||||
|
"displayMode": "list", |
||||
|
"placement": "bottom", |
||||
|
"showLegend": true |
||||
|
}, |
||||
|
"tooltip": { |
||||
|
"mode": "single", |
||||
|
"sort": "none" |
||||
|
} |
||||
|
}, |
||||
|
"targets": [ |
||||
|
{ |
||||
|
"datasource": { |
||||
|
"type": "prometheus", |
||||
|
"uid": "${DS_PROMETHEUS}" |
||||
|
}, |
||||
|
"expr": "sum(seaweedfs_telemetry_volume_servers)", |
||||
|
"format": "time_series", |
||||
|
"legendFormat": "Total Volume Servers", |
||||
|
"refId": "A" |
||||
|
} |
||||
|
], |
||||
|
"title": "Total Volume Servers Over Time", |
||||
|
"type": "timeseries" |
||||
|
}, |
||||
|
{ |
||||
|
"datasource": { |
||||
|
"type": "prometheus", |
||||
|
"uid": "${DS_PROMETHEUS}" |
||||
|
}, |
||||
|
"fieldConfig": { |
||||
|
"defaults": { |
||||
|
"color": { |
||||
|
"mode": "palette-classic" |
||||
|
}, |
||||
|
"custom": { |
||||
|
"axisLabel": "", |
||||
|
"axisPlacement": "auto", |
||||
|
"barAlignment": 0, |
||||
|
"drawStyle": "line", |
||||
|
"fillOpacity": 0, |
||||
|
"gradientMode": "none", |
||||
|
"hideFrom": { |
||||
|
"legend": false, |
||||
|
"tooltip": false, |
||||
|
"vis": false |
||||
|
}, |
||||
|
"lineInterpolation": "linear", |
||||
|
"lineWidth": 1, |
||||
|
"pointSize": 5, |
||||
|
"scaleDistribution": { |
||||
|
"type": "linear" |
||||
|
}, |
||||
|
"showPoints": "auto", |
||||
|
"spanNulls": false, |
||||
|
"stacking": { |
||||
|
"group": "A", |
||||
|
"mode": "none" |
||||
|
}, |
||||
|
"thresholdsStyle": { |
||||
|
"mode": "off" |
||||
|
} |
||||
|
}, |
||||
|
"mappings": [], |
||||
|
"thresholds": { |
||||
|
"mode": "absolute", |
||||
|
"steps": [ |
||||
|
{ |
||||
|
"color": "green", |
||||
|
"value": null |
||||
|
}, |
||||
|
{ |
||||
|
"color": "red", |
||||
|
"value": 80 |
||||
|
} |
||||
|
] |
||||
|
}, |
||||
|
"unit": "bytes" |
||||
|
}, |
||||
|
"overrides": [] |
||||
|
}, |
||||
|
"gridPos": { |
||||
|
"h": 8, |
||||
|
"w": 12, |
||||
|
"x": 0, |
||||
|
"y": 24 |
||||
|
}, |
||||
|
"id": 6, |
||||
|
"options": { |
||||
|
"legend": { |
||||
|
"calcs": [], |
||||
|
"displayMode": "list", |
||||
|
"placement": "bottom", |
||||
|
"showLegend": true |
||||
|
}, |
||||
|
"tooltip": { |
||||
|
"mode": "single", |
||||
|
"sort": "none" |
||||
|
} |
||||
|
}, |
||||
|
"targets": [ |
||||
|
{ |
||||
|
"datasource": { |
||||
|
"type": "prometheus", |
||||
|
"uid": "${DS_PROMETHEUS}" |
||||
|
}, |
||||
|
"expr": "sum(seaweedfs_telemetry_disk_bytes)", |
||||
|
"format": "time_series", |
||||
|
"legendFormat": "Total Disk Usage", |
||||
|
"refId": "A" |
||||
|
} |
||||
|
], |
||||
|
"title": "Total Disk Usage Over Time", |
||||
|
"type": "timeseries" |
||||
|
}, |
||||
|
{ |
||||
|
"datasource": { |
||||
|
"type": "prometheus", |
||||
|
"uid": "${DS_PROMETHEUS}" |
||||
|
}, |
||||
|
"fieldConfig": { |
||||
|
"defaults": { |
||||
|
"color": { |
||||
|
"mode": "palette-classic" |
||||
|
}, |
||||
|
"custom": { |
||||
|
"axisLabel": "", |
||||
|
"axisPlacement": "auto", |
||||
|
"barAlignment": 0, |
||||
|
"drawStyle": "line", |
||||
|
"fillOpacity": 0, |
||||
|
"gradientMode": "none", |
||||
|
"hideFrom": { |
||||
|
"legend": false, |
||||
|
"tooltip": false, |
||||
|
"vis": false |
||||
|
}, |
||||
|
"lineInterpolation": "linear", |
||||
|
"lineWidth": 1, |
||||
|
"pointSize": 5, |
||||
|
"scaleDistribution": { |
||||
|
"type": "linear" |
||||
|
}, |
||||
|
"showPoints": "auto", |
||||
|
"spanNulls": false, |
||||
|
"stacking": { |
||||
|
"group": "A", |
||||
|
"mode": "none" |
||||
|
}, |
||||
|
"thresholdsStyle": { |
||||
|
"mode": "off" |
||||
|
} |
||||
|
}, |
||||
|
"mappings": [], |
||||
|
"thresholds": { |
||||
|
"mode": "absolute", |
||||
|
"steps": [ |
||||
|
{ |
||||
|
"color": "green", |
||||
|
"value": null |
||||
|
}, |
||||
|
{ |
||||
|
"color": "red", |
||||
|
"value": 80 |
||||
|
} |
||||
|
] |
||||
|
} |
||||
|
}, |
||||
|
"overrides": [] |
||||
|
}, |
||||
|
"gridPos": { |
||||
|
"h": 8, |
||||
|
"w": 12, |
||||
|
"x": 12, |
||||
|
"y": 24 |
||||
|
}, |
||||
|
"id": 7, |
||||
|
"options": { |
||||
|
"legend": { |
||||
|
"calcs": [], |
||||
|
"displayMode": "list", |
||||
|
"placement": "bottom", |
||||
|
"showLegend": true |
||||
|
}, |
||||
|
"tooltip": { |
||||
|
"mode": "single", |
||||
|
"sort": "none" |
||||
|
} |
||||
|
}, |
||||
|
"targets": [ |
||||
|
{ |
||||
|
"datasource": { |
||||
|
"type": "prometheus", |
||||
|
"uid": "${DS_PROMETHEUS}" |
||||
|
}, |
||||
|
"expr": "sum(seaweedfs_telemetry_volume_count)", |
||||
|
"format": "time_series", |
||||
|
"legendFormat": "Total Volume Count", |
||||
|
"refId": "A" |
||||
|
} |
||||
|
], |
||||
|
"title": "Total Volume Count Over Time", |
||||
|
"type": "timeseries" |
||||
|
}, |
||||
|
{ |
||||
|
"datasource": { |
||||
|
"type": "prometheus", |
||||
|
"uid": "${DS_PROMETHEUS}" |
||||
|
}, |
||||
|
"fieldConfig": { |
||||
|
"defaults": { |
||||
|
"color": { |
||||
|
"mode": "palette-classic" |
||||
|
}, |
||||
|
"custom": { |
||||
|
"axisLabel": "", |
||||
|
"axisPlacement": "auto", |
||||
|
"barAlignment": 0, |
||||
|
"drawStyle": "line", |
||||
|
"fillOpacity": 0, |
||||
|
"gradientMode": "none", |
||||
|
"hideFrom": { |
||||
|
"legend": false, |
||||
|
"tooltip": false, |
||||
|
"vis": false |
||||
|
}, |
||||
|
"lineInterpolation": "linear", |
||||
|
"lineWidth": 1, |
||||
|
"pointSize": 5, |
||||
|
"scaleDistribution": { |
||||
|
"type": "linear" |
||||
|
}, |
||||
|
"showPoints": "auto", |
||||
|
"spanNulls": false, |
||||
|
"stacking": { |
||||
|
"group": "A", |
||||
|
"mode": "none" |
||||
|
}, |
||||
|
"thresholdsStyle": { |
||||
|
"mode": "off" |
||||
|
} |
||||
|
}, |
||||
|
"mappings": [], |
||||
|
"thresholds": { |
||||
|
"mode": "absolute", |
||||
|
"steps": [ |
||||
|
{ |
||||
|
"color": "green", |
||||
|
"value": null |
||||
|
}, |
||||
|
{ |
||||
|
"color": "red", |
||||
|
"value": 80 |
||||
|
} |
||||
|
] |
||||
|
} |
||||
|
}, |
||||
|
"overrides": [] |
||||
|
}, |
||||
|
"gridPos": { |
||||
|
"h": 8, |
||||
|
"w": 12, |
||||
|
"x": 0, |
||||
|
"y": 32 |
||||
|
}, |
||||
|
"id": 8, |
||||
|
"options": { |
||||
|
"legend": { |
||||
|
"calcs": [], |
||||
|
"displayMode": "list", |
||||
|
"placement": "bottom", |
||||
|
"showLegend": true |
||||
|
}, |
||||
|
"tooltip": { |
||||
|
"mode": "single", |
||||
|
"sort": "none" |
||||
|
} |
||||
|
}, |
||||
|
"targets": [ |
||||
|
{ |
||||
|
"datasource": { |
||||
|
"type": "prometheus", |
||||
|
"uid": "${DS_PROMETHEUS}" |
||||
|
}, |
||||
|
"expr": "sum(seaweedfs_telemetry_filer_count)", |
||||
|
"format": "time_series", |
||||
|
"legendFormat": "Total Filer Count", |
||||
|
"refId": "A" |
||||
|
} |
||||
|
], |
||||
|
"title": "Total Filer Servers Over Time", |
||||
|
"type": "timeseries" |
||||
|
}, |
||||
|
{ |
||||
|
"datasource": { |
||||
|
"type": "prometheus", |
||||
|
"uid": "${DS_PROMETHEUS}" |
||||
|
}, |
||||
|
"fieldConfig": { |
||||
|
"defaults": { |
||||
|
"color": { |
||||
|
"mode": "palette-classic" |
||||
|
}, |
||||
|
"custom": { |
||||
|
"axisLabel": "", |
||||
|
"axisPlacement": "auto", |
||||
|
"barAlignment": 0, |
||||
|
"drawStyle": "line", |
||||
|
"fillOpacity": 0, |
||||
|
"gradientMode": "none", |
||||
|
"hideFrom": { |
||||
|
"legend": false, |
||||
|
"tooltip": false, |
||||
|
"vis": false |
||||
|
}, |
||||
|
"lineInterpolation": "linear", |
||||
|
"lineWidth": 1, |
||||
|
"pointSize": 5, |
||||
|
"scaleDistribution": { |
||||
|
"type": "linear" |
||||
|
}, |
||||
|
"showPoints": "auto", |
||||
|
"spanNulls": false, |
||||
|
"stacking": { |
||||
|
"group": "A", |
||||
|
"mode": "none" |
||||
|
}, |
||||
|
"thresholdsStyle": { |
||||
|
"mode": "off" |
||||
|
} |
||||
|
}, |
||||
|
"mappings": [], |
||||
|
"thresholds": { |
||||
|
"mode": "absolute", |
||||
|
"steps": [ |
||||
|
{ |
||||
|
"color": "green", |
||||
|
"value": null |
||||
|
}, |
||||
|
{ |
||||
|
"color": "red", |
||||
|
"value": 80 |
||||
|
} |
||||
|
] |
||||
|
} |
||||
|
}, |
||||
|
"overrides": [] |
||||
|
}, |
||||
|
"gridPos": { |
||||
|
"h": 8, |
||||
|
"w": 12, |
||||
|
"x": 12, |
||||
|
"y": 32 |
||||
|
}, |
||||
|
"id": 9, |
||||
|
"options": { |
||||
|
"legend": { |
||||
|
"calcs": [], |
||||
|
"displayMode": "list", |
||||
|
"placement": "bottom", |
||||
|
"showLegend": true |
||||
|
}, |
||||
|
"tooltip": { |
||||
|
"mode": "single", |
||||
|
"sort": "none" |
||||
|
} |
||||
|
}, |
||||
|
"targets": [ |
||||
|
{ |
||||
|
"datasource": { |
||||
|
"type": "prometheus", |
||||
|
"uid": "${DS_PROMETHEUS}" |
||||
|
}, |
||||
|
"expr": "sum(seaweedfs_telemetry_broker_count)", |
||||
|
"format": "time_series", |
||||
|
"legendFormat": "Total Broker Count", |
||||
|
"refId": "A" |
||||
|
} |
||||
|
], |
||||
|
"title": "Total Broker Servers Over Time", |
||||
|
"type": "timeseries" |
||||
|
} |
||||
|
], |
||||
|
"refresh": "5m", |
||||
|
"schemaVersion": 38, |
||||
|
"style": "dark", |
||||
|
"tags": [ |
||||
|
"seaweedfs", |
||||
|
"telemetry" |
||||
|
], |
||||
|
"templating": { |
||||
|
"list": [] |
||||
|
}, |
||||
|
"time": { |
||||
|
"from": "now-24h", |
||||
|
"to": "now" |
||||
|
}, |
||||
|
"timepicker": {}, |
||||
|
"timezone": "", |
||||
|
"title": "SeaweedFS Telemetry Dashboard", |
||||
|
"uid": "seaweedfs-telemetry", |
||||
|
"version": 1, |
||||
|
"weekStart": "" |
||||
|
} |
@ -0,0 +1,12 @@ |
|||||
|
apiVersion: 1 |
||||
|
|
||||
|
providers: |
||||
|
- name: 'seaweedfs' |
||||
|
orgId: 1 |
||||
|
folder: '' |
||||
|
type: file |
||||
|
disableDeletion: false |
||||
|
updateIntervalSeconds: 10 |
||||
|
allowUiUpdates: true |
||||
|
options: |
||||
|
path: /var/lib/grafana/dashboards |
@ -0,0 +1,9 @@ |
|||||
|
apiVersion: 1 |
||||
|
|
||||
|
datasources: |
||||
|
- name: Prometheus |
||||
|
type: prometheus |
||||
|
access: proxy |
||||
|
url: http://prometheus:9090 |
||||
|
isDefault: true |
||||
|
editable: true |
@ -0,0 +1,15 @@ |
|||||
|
global: |
||||
|
scrape_interval: 15s |
||||
|
evaluation_interval: 15s |
||||
|
|
||||
|
rule_files: |
||||
|
# - "first_rules.yml" |
||||
|
# - "second_rules.yml" |
||||
|
|
||||
|
scrape_configs: |
||||
|
- job_name: 'seaweedfs-telemetry' |
||||
|
static_configs: |
||||
|
- targets: ['telemetry-server:8080'] |
||||
|
scrape_interval: 30s |
||||
|
metrics_path: '/metrics' |
||||
|
scrape_timeout: 10s |
@ -0,0 +1,398 @@ |
|||||
|
// Code generated by protoc-gen-go. DO NOT EDIT.
|
||||
|
// versions:
|
||||
|
// protoc-gen-go v1.34.2
|
||||
|
// protoc v5.29.3
|
||||
|
// source: proto/telemetry.proto
|
||||
|
|
||||
|
package proto |
||||
|
|
||||
|
import ( |
||||
|
protoreflect "google.golang.org/protobuf/reflect/protoreflect" |
||||
|
protoimpl "google.golang.org/protobuf/runtime/protoimpl" |
||||
|
reflect "reflect" |
||||
|
sync "sync" |
||||
|
) |
||||
|
|
||||
|
const ( |
||||
|
// Verify that this generated code is sufficiently up-to-date.
|
||||
|
_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) |
||||
|
// Verify that runtime/protoimpl is sufficiently up-to-date.
|
||||
|
_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) |
||||
|
) |
||||
|
|
||||
|
// TelemetryData represents cluster-level telemetry information
|
||||
|
type TelemetryData struct { |
||||
|
state protoimpl.MessageState |
||||
|
sizeCache protoimpl.SizeCache |
||||
|
unknownFields protoimpl.UnknownFields |
||||
|
|
||||
|
// Unique cluster identifier (generated in-memory)
|
||||
|
ClusterId string `protobuf:"bytes,1,opt,name=cluster_id,json=clusterId,proto3" json:"cluster_id,omitempty"` |
||||
|
// SeaweedFS version
|
||||
|
Version string `protobuf:"bytes,2,opt,name=version,proto3" json:"version,omitempty"` |
||||
|
// Operating system (e.g., "linux/amd64")
|
||||
|
Os string `protobuf:"bytes,3,opt,name=os,proto3" json:"os,omitempty"` |
||||
|
// Enabled features (e.g., ["filer", "s3api", "mq"])
|
||||
|
Features []string `protobuf:"bytes,4,rep,name=features,proto3" json:"features,omitempty"` |
||||
|
// Deployment type ("standalone", "cluster", "master-only", "volume-only")
|
||||
|
Deployment string `protobuf:"bytes,5,opt,name=deployment,proto3" json:"deployment,omitempty"` |
||||
|
// Number of volume servers in the cluster
|
||||
|
VolumeServerCount int32 `protobuf:"varint,6,opt,name=volume_server_count,json=volumeServerCount,proto3" json:"volume_server_count,omitempty"` |
||||
|
// Total disk usage across all volume servers (in bytes)
|
||||
|
TotalDiskBytes uint64 `protobuf:"varint,7,opt,name=total_disk_bytes,json=totalDiskBytes,proto3" json:"total_disk_bytes,omitempty"` |
||||
|
// Total number of volumes in the cluster
|
||||
|
TotalVolumeCount int32 `protobuf:"varint,8,opt,name=total_volume_count,json=totalVolumeCount,proto3" json:"total_volume_count,omitempty"` |
||||
|
// Number of filer servers in the cluster
|
||||
|
FilerCount int32 `protobuf:"varint,9,opt,name=filer_count,json=filerCount,proto3" json:"filer_count,omitempty"` |
||||
|
// Number of broker servers in the cluster
|
||||
|
BrokerCount int32 `protobuf:"varint,10,opt,name=broker_count,json=brokerCount,proto3" json:"broker_count,omitempty"` |
||||
|
// Unix timestamp when the data was collected
|
||||
|
Timestamp int64 `protobuf:"varint,11,opt,name=timestamp,proto3" json:"timestamp,omitempty"` |
||||
|
} |
||||
|
|
||||
|
func (x *TelemetryData) Reset() { |
||||
|
*x = TelemetryData{} |
||||
|
if protoimpl.UnsafeEnabled { |
||||
|
mi := &file_proto_telemetry_proto_msgTypes[0] |
||||
|
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) |
||||
|
ms.StoreMessageInfo(mi) |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
func (x *TelemetryData) String() string { |
||||
|
return protoimpl.X.MessageStringOf(x) |
||||
|
} |
||||
|
|
||||
|
func (*TelemetryData) ProtoMessage() {} |
||||
|
|
||||
|
func (x *TelemetryData) ProtoReflect() protoreflect.Message { |
||||
|
mi := &file_proto_telemetry_proto_msgTypes[0] |
||||
|
if protoimpl.UnsafeEnabled && x != nil { |
||||
|
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) |
||||
|
if ms.LoadMessageInfo() == nil { |
||||
|
ms.StoreMessageInfo(mi) |
||||
|
} |
||||
|
return ms |
||||
|
} |
||||
|
return mi.MessageOf(x) |
||||
|
} |
||||
|
|
||||
|
// Deprecated: Use TelemetryData.ProtoReflect.Descriptor instead.
|
||||
|
func (*TelemetryData) Descriptor() ([]byte, []int) { |
||||
|
return file_proto_telemetry_proto_rawDescGZIP(), []int{0} |
||||
|
} |
||||
|
|
||||
|
func (x *TelemetryData) GetClusterId() string { |
||||
|
if x != nil { |
||||
|
return x.ClusterId |
||||
|
} |
||||
|
return "" |
||||
|
} |
||||
|
|
||||
|
func (x *TelemetryData) GetVersion() string { |
||||
|
if x != nil { |
||||
|
return x.Version |
||||
|
} |
||||
|
return "" |
||||
|
} |
||||
|
|
||||
|
func (x *TelemetryData) GetOs() string { |
||||
|
if x != nil { |
||||
|
return x.Os |
||||
|
} |
||||
|
return "" |
||||
|
} |
||||
|
|
||||
|
func (x *TelemetryData) GetFeatures() []string { |
||||
|
if x != nil { |
||||
|
return x.Features |
||||
|
} |
||||
|
return nil |
||||
|
} |
||||
|
|
||||
|
func (x *TelemetryData) GetDeployment() string { |
||||
|
if x != nil { |
||||
|
return x.Deployment |
||||
|
} |
||||
|
return "" |
||||
|
} |
||||
|
|
||||
|
func (x *TelemetryData) GetVolumeServerCount() int32 { |
||||
|
if x != nil { |
||||
|
return x.VolumeServerCount |
||||
|
} |
||||
|
return 0 |
||||
|
} |
||||
|
|
||||
|
func (x *TelemetryData) GetTotalDiskBytes() uint64 { |
||||
|
if x != nil { |
||||
|
return x.TotalDiskBytes |
||||
|
} |
||||
|
return 0 |
||||
|
} |
||||
|
|
||||
|
func (x *TelemetryData) GetTotalVolumeCount() int32 { |
||||
|
if x != nil { |
||||
|
return x.TotalVolumeCount |
||||
|
} |
||||
|
return 0 |
||||
|
} |
||||
|
|
||||
|
func (x *TelemetryData) GetFilerCount() int32 { |
||||
|
if x != nil { |
||||
|
return x.FilerCount |
||||
|
} |
||||
|
return 0 |
||||
|
} |
||||
|
|
||||
|
func (x *TelemetryData) GetBrokerCount() int32 { |
||||
|
if x != nil { |
||||
|
return x.BrokerCount |
||||
|
} |
||||
|
return 0 |
||||
|
} |
||||
|
|
||||
|
func (x *TelemetryData) GetTimestamp() int64 { |
||||
|
if x != nil { |
||||
|
return x.Timestamp |
||||
|
} |
||||
|
return 0 |
||||
|
} |
||||
|
|
||||
|
// TelemetryRequest is sent from SeaweedFS clusters to the telemetry server
|
||||
|
type TelemetryRequest struct { |
||||
|
state protoimpl.MessageState |
||||
|
sizeCache protoimpl.SizeCache |
||||
|
unknownFields protoimpl.UnknownFields |
||||
|
|
||||
|
Data *TelemetryData `protobuf:"bytes,1,opt,name=data,proto3" json:"data,omitempty"` |
||||
|
} |
||||
|
|
||||
|
func (x *TelemetryRequest) Reset() { |
||||
|
*x = TelemetryRequest{} |
||||
|
if protoimpl.UnsafeEnabled { |
||||
|
mi := &file_proto_telemetry_proto_msgTypes[1] |
||||
|
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) |
||||
|
ms.StoreMessageInfo(mi) |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
func (x *TelemetryRequest) String() string { |
||||
|
return protoimpl.X.MessageStringOf(x) |
||||
|
} |
||||
|
|
||||
|
func (*TelemetryRequest) ProtoMessage() {} |
||||
|
|
||||
|
func (x *TelemetryRequest) ProtoReflect() protoreflect.Message { |
||||
|
mi := &file_proto_telemetry_proto_msgTypes[1] |
||||
|
if protoimpl.UnsafeEnabled && x != nil { |
||||
|
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) |
||||
|
if ms.LoadMessageInfo() == nil { |
||||
|
ms.StoreMessageInfo(mi) |
||||
|
} |
||||
|
return ms |
||||
|
} |
||||
|
return mi.MessageOf(x) |
||||
|
} |
||||
|
|
||||
|
// Deprecated: Use TelemetryRequest.ProtoReflect.Descriptor instead.
|
||||
|
func (*TelemetryRequest) Descriptor() ([]byte, []int) { |
||||
|
return file_proto_telemetry_proto_rawDescGZIP(), []int{1} |
||||
|
} |
||||
|
|
||||
|
func (x *TelemetryRequest) GetData() *TelemetryData { |
||||
|
if x != nil { |
||||
|
return x.Data |
||||
|
} |
||||
|
return nil |
||||
|
} |
||||
|
|
||||
|
// TelemetryResponse is returned by the telemetry server
|
||||
|
type TelemetryResponse struct { |
||||
|
state protoimpl.MessageState |
||||
|
sizeCache protoimpl.SizeCache |
||||
|
unknownFields protoimpl.UnknownFields |
||||
|
|
||||
|
Success bool `protobuf:"varint,1,opt,name=success,proto3" json:"success,omitempty"` |
||||
|
Message string `protobuf:"bytes,2,opt,name=message,proto3" json:"message,omitempty"` |
||||
|
} |
||||
|
|
||||
|
func (x *TelemetryResponse) Reset() { |
||||
|
*x = TelemetryResponse{} |
||||
|
if protoimpl.UnsafeEnabled { |
||||
|
mi := &file_proto_telemetry_proto_msgTypes[2] |
||||
|
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) |
||||
|
ms.StoreMessageInfo(mi) |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
func (x *TelemetryResponse) String() string { |
||||
|
return protoimpl.X.MessageStringOf(x) |
||||
|
} |
||||
|
|
||||
|
func (*TelemetryResponse) ProtoMessage() {} |
||||
|
|
||||
|
func (x *TelemetryResponse) ProtoReflect() protoreflect.Message { |
||||
|
mi := &file_proto_telemetry_proto_msgTypes[2] |
||||
|
if protoimpl.UnsafeEnabled && x != nil { |
||||
|
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) |
||||
|
if ms.LoadMessageInfo() == nil { |
||||
|
ms.StoreMessageInfo(mi) |
||||
|
} |
||||
|
return ms |
||||
|
} |
||||
|
return mi.MessageOf(x) |
||||
|
} |
||||
|
|
||||
|
// Deprecated: Use TelemetryResponse.ProtoReflect.Descriptor instead.
|
||||
|
func (*TelemetryResponse) Descriptor() ([]byte, []int) { |
||||
|
return file_proto_telemetry_proto_rawDescGZIP(), []int{2} |
||||
|
} |
||||
|
|
||||
|
func (x *TelemetryResponse) GetSuccess() bool { |
||||
|
if x != nil { |
||||
|
return x.Success |
||||
|
} |
||||
|
return false |
||||
|
} |
||||
|
|
||||
|
func (x *TelemetryResponse) GetMessage() string { |
||||
|
if x != nil { |
||||
|
return x.Message |
||||
|
} |
||||
|
return "" |
||||
|
} |
||||
|
|
||||
|
var File_proto_telemetry_proto protoreflect.FileDescriptor |
||||
|
|
||||
|
var file_proto_telemetry_proto_rawDesc = []byte{ |
||||
|
0x0a, 0x15, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x74, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, |
||||
|
0x79, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x09, 0x74, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, |
||||
|
0x72, 0x79, 0x22, 0xfe, 0x02, 0x0a, 0x0d, 0x54, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, 0x79, |
||||
|
0x44, 0x61, 0x74, 0x61, 0x12, 0x1d, 0x0a, 0x0a, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, |
||||
|
0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, |
||||
|
0x72, 0x49, 0x64, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, |
||||
|
0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x0e, 0x0a, |
||||
|
0x02, 0x6f, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x6f, 0x73, 0x12, 0x1a, 0x0a, |
||||
|
0x08, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x09, 0x52, |
||||
|
0x08, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x12, 0x1e, 0x0a, 0x0a, 0x64, 0x65, 0x70, |
||||
|
0x6c, 0x6f, 0x79, 0x6d, 0x65, 0x6e, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x64, |
||||
|
0x65, 0x70, 0x6c, 0x6f, 0x79, 0x6d, 0x65, 0x6e, 0x74, 0x12, 0x2e, 0x0a, 0x13, 0x76, 0x6f, 0x6c, |
||||
|
0x75, 0x6d, 0x65, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, |
||||
|
0x18, 0x06, 0x20, 0x01, 0x28, 0x05, 0x52, 0x11, 0x76, 0x6f, 0x6c, 0x75, 0x6d, 0x65, 0x53, 0x65, |
||||
|
0x72, 0x76, 0x65, 0x72, 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x12, 0x28, 0x0a, 0x10, 0x74, 0x6f, 0x74, |
||||
|
0x61, 0x6c, 0x5f, 0x64, 0x69, 0x73, 0x6b, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x07, 0x20, |
||||
|
0x01, 0x28, 0x04, 0x52, 0x0e, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x44, 0x69, 0x73, 0x6b, 0x42, 0x79, |
||||
|
0x74, 0x65, 0x73, 0x12, 0x2c, 0x0a, 0x12, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x76, 0x6f, 0x6c, |
||||
|
0x75, 0x6d, 0x65, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x08, 0x20, 0x01, 0x28, 0x05, 0x52, |
||||
|
0x10, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x56, 0x6f, 0x6c, 0x75, 0x6d, 0x65, 0x43, 0x6f, 0x75, 0x6e, |
||||
|
0x74, 0x12, 0x1f, 0x0a, 0x0b, 0x66, 0x69, 0x6c, 0x65, 0x72, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, |
||||
|
0x18, 0x09, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x66, 0x69, 0x6c, 0x65, 0x72, 0x43, 0x6f, 0x75, |
||||
|
0x6e, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x62, 0x72, 0x6f, 0x6b, 0x65, 0x72, 0x5f, 0x63, 0x6f, 0x75, |
||||
|
0x6e, 0x74, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x62, 0x72, 0x6f, 0x6b, 0x65, 0x72, |
||||
|
0x43, 0x6f, 0x75, 0x6e, 0x74, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, |
||||
|
0x6d, 0x70, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, |
||||
|
0x61, 0x6d, 0x70, 0x22, 0x40, 0x0a, 0x10, 0x54, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, 0x79, |
||||
|
0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x2c, 0x0a, 0x04, 0x64, 0x61, 0x74, 0x61, 0x18, |
||||
|
0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x74, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, |
||||
|
0x79, 0x2e, 0x54, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, 0x79, 0x44, 0x61, 0x74, 0x61, 0x52, |
||||
|
0x04, 0x64, 0x61, 0x74, 0x61, 0x22, 0x47, 0x0a, 0x11, 0x54, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, |
||||
|
0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75, |
||||
|
0x63, 0x63, 0x65, 0x73, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x73, 0x75, 0x63, |
||||
|
0x63, 0x65, 0x73, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, |
||||
|
0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x42, 0x30, |
||||
|
0x5a, 0x2e, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x73, 0x65, 0x61, |
||||
|
0x77, 0x65, 0x65, 0x64, 0x66, 0x73, 0x2f, 0x73, 0x65, 0x61, 0x77, 0x65, 0x65, 0x64, 0x66, 0x73, |
||||
|
0x2f, 0x74, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, 0x79, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, |
||||
|
0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, |
||||
|
} |
||||
|
|
||||
|
var ( |
||||
|
file_proto_telemetry_proto_rawDescOnce sync.Once |
||||
|
file_proto_telemetry_proto_rawDescData = file_proto_telemetry_proto_rawDesc |
||||
|
) |
||||
|
|
||||
|
func file_proto_telemetry_proto_rawDescGZIP() []byte { |
||||
|
file_proto_telemetry_proto_rawDescOnce.Do(func() { |
||||
|
file_proto_telemetry_proto_rawDescData = protoimpl.X.CompressGZIP(file_proto_telemetry_proto_rawDescData) |
||||
|
}) |
||||
|
return file_proto_telemetry_proto_rawDescData |
||||
|
} |
||||
|
|
||||
|
var file_proto_telemetry_proto_msgTypes = make([]protoimpl.MessageInfo, 3) |
||||
|
var file_proto_telemetry_proto_goTypes = []any{ |
||||
|
(*TelemetryData)(nil), // 0: telemetry.TelemetryData
|
||||
|
(*TelemetryRequest)(nil), // 1: telemetry.TelemetryRequest
|
||||
|
(*TelemetryResponse)(nil), // 2: telemetry.TelemetryResponse
|
||||
|
} |
||||
|
var file_proto_telemetry_proto_depIdxs = []int32{ |
||||
|
0, // 0: telemetry.TelemetryRequest.data:type_name -> telemetry.TelemetryData
|
||||
|
1, // [1:1] is the sub-list for method output_type
|
||||
|
1, // [1:1] is the sub-list for method input_type
|
||||
|
1, // [1:1] is the sub-list for extension type_name
|
||||
|
1, // [1:1] is the sub-list for extension extendee
|
||||
|
0, // [0:1] is the sub-list for field type_name
|
||||
|
} |
||||
|
|
||||
|
func init() { file_proto_telemetry_proto_init() } |
||||
|
func file_proto_telemetry_proto_init() { |
||||
|
if File_proto_telemetry_proto != nil { |
||||
|
return |
||||
|
} |
||||
|
if !protoimpl.UnsafeEnabled { |
||||
|
file_proto_telemetry_proto_msgTypes[0].Exporter = func(v any, i int) any { |
||||
|
switch v := v.(*TelemetryData); i { |
||||
|
case 0: |
||||
|
return &v.state |
||||
|
case 1: |
||||
|
return &v.sizeCache |
||||
|
case 2: |
||||
|
return &v.unknownFields |
||||
|
default: |
||||
|
return nil |
||||
|
} |
||||
|
} |
||||
|
file_proto_telemetry_proto_msgTypes[1].Exporter = func(v any, i int) any { |
||||
|
switch v := v.(*TelemetryRequest); i { |
||||
|
case 0: |
||||
|
return &v.state |
||||
|
case 1: |
||||
|
return &v.sizeCache |
||||
|
case 2: |
||||
|
return &v.unknownFields |
||||
|
default: |
||||
|
return nil |
||||
|
} |
||||
|
} |
||||
|
file_proto_telemetry_proto_msgTypes[2].Exporter = func(v any, i int) any { |
||||
|
switch v := v.(*TelemetryResponse); i { |
||||
|
case 0: |
||||
|
return &v.state |
||||
|
case 1: |
||||
|
return &v.sizeCache |
||||
|
case 2: |
||||
|
return &v.unknownFields |
||||
|
default: |
||||
|
return nil |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
type x struct{} |
||||
|
out := protoimpl.TypeBuilder{ |
||||
|
File: protoimpl.DescBuilder{ |
||||
|
GoPackagePath: reflect.TypeOf(x{}).PkgPath(), |
||||
|
RawDescriptor: file_proto_telemetry_proto_rawDesc, |
||||
|
NumEnums: 0, |
||||
|
NumMessages: 3, |
||||
|
NumExtensions: 0, |
||||
|
NumServices: 0, |
||||
|
}, |
||||
|
GoTypes: file_proto_telemetry_proto_goTypes, |
||||
|
DependencyIndexes: file_proto_telemetry_proto_depIdxs, |
||||
|
MessageInfos: file_proto_telemetry_proto_msgTypes, |
||||
|
}.Build() |
||||
|
File_proto_telemetry_proto = out.File |
||||
|
file_proto_telemetry_proto_rawDesc = nil |
||||
|
file_proto_telemetry_proto_goTypes = nil |
||||
|
file_proto_telemetry_proto_depIdxs = nil |
||||
|
} |
@ -0,0 +1,52 @@ |
|||||
|
syntax = "proto3"; |
||||
|
|
||||
|
package telemetry; |
||||
|
|
||||
|
option go_package = "github.com/seaweedfs/seaweedfs/telemetry/proto"; |
||||
|
|
||||
|
// TelemetryData represents cluster-level telemetry information |
||||
|
message TelemetryData { |
||||
|
// Unique cluster identifier (generated in-memory) |
||||
|
string cluster_id = 1; |
||||
|
|
||||
|
// SeaweedFS version |
||||
|
string version = 2; |
||||
|
|
||||
|
// Operating system (e.g., "linux/amd64") |
||||
|
string os = 3; |
||||
|
|
||||
|
// Enabled features (e.g., ["filer", "s3api", "mq"]) |
||||
|
repeated string features = 4; |
||||
|
|
||||
|
// Deployment type ("standalone", "cluster", "master-only", "volume-only") |
||||
|
string deployment = 5; |
||||
|
|
||||
|
// Number of volume servers in the cluster |
||||
|
int32 volume_server_count = 6; |
||||
|
|
||||
|
// Total disk usage across all volume servers (in bytes) |
||||
|
uint64 total_disk_bytes = 7; |
||||
|
|
||||
|
// Total number of volumes in the cluster |
||||
|
int32 total_volume_count = 8; |
||||
|
|
||||
|
// Number of filer servers in the cluster |
||||
|
int32 filer_count = 9; |
||||
|
|
||||
|
// Number of broker servers in the cluster |
||||
|
int32 broker_count = 10; |
||||
|
|
||||
|
// Unix timestamp when the data was collected |
||||
|
int64 timestamp = 11; |
||||
|
} |
||||
|
|
||||
|
// TelemetryRequest is sent from SeaweedFS clusters to the telemetry server |
||||
|
message TelemetryRequest { |
||||
|
TelemetryData data = 1; |
||||
|
} |
||||
|
|
||||
|
// TelemetryResponse is returned by the telemetry server |
||||
|
message TelemetryResponse { |
||||
|
bool success = 1; |
||||
|
string message = 2; |
||||
|
} |
@ -0,0 +1,18 @@ |
|||||
|
FROM golang:1.21-alpine AS builder |
||||
|
|
||||
|
WORKDIR /app |
||||
|
COPY go.mod go.sum ./ |
||||
|
RUN go mod download |
||||
|
|
||||
|
COPY . . |
||||
|
RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -ldflags '-extldflags "-static"' -o telemetry-server . |
||||
|
|
||||
|
FROM alpine:latest |
||||
|
RUN apk --no-cache add ca-certificates |
||||
|
WORKDIR /root/ |
||||
|
|
||||
|
COPY --from=builder /app/telemetry-server . |
||||
|
|
||||
|
EXPOSE 8080 |
||||
|
|
||||
|
CMD ["./telemetry-server"] |
@ -0,0 +1,97 @@ |
|||||
|
.PHONY: build run clean test deps proto integration-test test-all |
||||
|
|
||||
|
# Build the telemetry server
|
||||
|
build: |
||||
|
go build -o telemetry-server . |
||||
|
|
||||
|
# Run the server in development mode
|
||||
|
run: |
||||
|
go run . -port=8080 -dashboard=true -cleanup=1h -max-age=24h |
||||
|
|
||||
|
# Run the server in production mode
|
||||
|
run-prod: |
||||
|
./telemetry-server -port=8080 -dashboard=true -cleanup=24h -max-age=720h |
||||
|
|
||||
|
# Clean build artifacts
|
||||
|
clean: |
||||
|
rm -f telemetry-server |
||||
|
rm -f ../test/telemetry-server-test.log |
||||
|
go clean |
||||
|
|
||||
|
# Run unit tests
|
||||
|
test: |
||||
|
go test ./... |
||||
|
|
||||
|
# Run integration tests
|
||||
|
integration-test: |
||||
|
@echo "🧪 Running telemetry integration tests..." |
||||
|
cd ../../ && go run telemetry/test/integration.go |
||||
|
|
||||
|
# Run all tests (unit + integration)
|
||||
|
test-all: test integration-test |
||||
|
|
||||
|
# Install dependencies
|
||||
|
deps: |
||||
|
go mod download |
||||
|
go mod tidy |
||||
|
|
||||
|
# Generate protobuf code (requires protoc)
|
||||
|
proto: |
||||
|
cd .. && protoc --go_out=. --go_opt=paths=source_relative proto/telemetry.proto |
||||
|
|
||||
|
# Build Docker image
|
||||
|
docker-build: |
||||
|
docker build -t seaweedfs-telemetry . |
||||
|
|
||||
|
# Run with Docker
|
||||
|
docker-run: |
||||
|
docker run -p 8080:8080 seaweedfs-telemetry -port=8080 -dashboard=true |
||||
|
|
||||
|
# Development with auto-reload (requires air: go install github.com/cosmtrek/air@latest)
|
||||
|
dev: |
||||
|
air |
||||
|
|
||||
|
# Check if protoc is available
|
||||
|
check-protoc: |
||||
|
@which protoc > /dev/null || (echo "protoc is required for proto generation. Install from https://grpc.io/docs/protoc-installation/" && exit 1) |
||||
|
|
||||
|
# Full development setup
|
||||
|
setup: check-protoc deps proto build |
||||
|
|
||||
|
# Run a quick smoke test
|
||||
|
smoke-test: build |
||||
|
@echo "🔥 Running smoke test..." |
||||
|
@timeout 10s ./telemetry-server -port=18081 > /dev/null 2>&1 & \
|
||||
|
SERVER_PID=$$!; \
|
||||
|
sleep 2; \
|
||||
|
if curl -s http://localhost:18081/health > /dev/null; then \
|
||||
|
echo "✅ Smoke test passed - server responds to health check"; \
|
||||
|
else \
|
||||
|
echo "❌ Smoke test failed - server not responding"; \
|
||||
|
exit 1; \
|
||||
|
fi; \
|
||||
|
kill $$SERVER_PID 2>/dev/null || true |
||||
|
|
||||
|
# Continuous integration target
|
||||
|
ci: deps proto build test integration-test |
||||
|
@echo "🎉 All CI tests passed!" |
||||
|
|
||||
|
# Help
|
||||
|
help: |
||||
|
@echo "Available targets:" |
||||
|
@echo " build - Build the telemetry server binary" |
||||
|
@echo " run - Run server in development mode" |
||||
|
@echo " run-prod - Run server in production mode" |
||||
|
@echo " clean - Clean build artifacts" |
||||
|
@echo " test - Run unit tests" |
||||
|
@echo " integration-test- Run integration tests" |
||||
|
@echo " test-all - Run all tests (unit + integration)" |
||||
|
@echo " deps - Install Go dependencies" |
||||
|
@echo " proto - Generate protobuf code" |
||||
|
@echo " docker-build - Build Docker image" |
||||
|
@echo " docker-run - Run with Docker" |
||||
|
@echo " dev - Run with auto-reload (requires air)" |
||||
|
@echo " smoke-test - Quick server health check" |
||||
|
@echo " setup - Full development setup" |
||||
|
@echo " ci - Continuous integration (all tests)" |
||||
|
@echo " help - Show this help" |
@ -0,0 +1,152 @@ |
|||||
|
package api |
||||
|
|
||||
|
import ( |
||||
|
"encoding/json" |
||||
|
"io" |
||||
|
"net/http" |
||||
|
"strconv" |
||||
|
"time" |
||||
|
|
||||
|
"github.com/seaweedfs/seaweedfs/telemetry/proto" |
||||
|
"github.com/seaweedfs/seaweedfs/telemetry/server/storage" |
||||
|
protobuf "google.golang.org/protobuf/proto" |
||||
|
) |
||||
|
|
||||
|
type Handler struct { |
||||
|
storage *storage.PrometheusStorage |
||||
|
} |
||||
|
|
||||
|
func NewHandler(storage *storage.PrometheusStorage) *Handler { |
||||
|
return &Handler{storage: storage} |
||||
|
} |
||||
|
|
||||
|
func (h *Handler) CollectTelemetry(w http.ResponseWriter, r *http.Request) { |
||||
|
if r.Method != http.MethodPost { |
||||
|
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
contentType := r.Header.Get("Content-Type") |
||||
|
|
||||
|
// Only accept protobuf content type
|
||||
|
if contentType != "application/x-protobuf" && contentType != "application/protobuf" { |
||||
|
http.Error(w, "Content-Type must be application/x-protobuf", http.StatusUnsupportedMediaType) |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
// Read protobuf request
|
||||
|
body, err := io.ReadAll(r.Body) |
||||
|
if err != nil { |
||||
|
http.Error(w, "Failed to read request body", http.StatusBadRequest) |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
req := &proto.TelemetryRequest{} |
||||
|
if err := protobuf.Unmarshal(body, req); err != nil { |
||||
|
http.Error(w, "Invalid protobuf data", http.StatusBadRequest) |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
data := req.Data |
||||
|
if data == nil { |
||||
|
http.Error(w, "Missing telemetry data", http.StatusBadRequest) |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
// Validate required fields
|
||||
|
if data.ClusterId == "" || data.Version == "" || data.Os == "" { |
||||
|
http.Error(w, "Missing required fields", http.StatusBadRequest) |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
// Set timestamp if not provided
|
||||
|
if data.Timestamp == 0 { |
||||
|
data.Timestamp = time.Now().Unix() |
||||
|
} |
||||
|
|
||||
|
// Store the telemetry data
|
||||
|
if err := h.storage.StoreTelemetry(data); err != nil { |
||||
|
http.Error(w, "Failed to store data", http.StatusInternalServerError) |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
// Return protobuf response
|
||||
|
resp := &proto.TelemetryResponse{ |
||||
|
Success: true, |
||||
|
Message: "Telemetry data received", |
||||
|
} |
||||
|
|
||||
|
respData, err := protobuf.Marshal(resp) |
||||
|
if err != nil { |
||||
|
http.Error(w, "Failed to marshal response", http.StatusInternalServerError) |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
w.Header().Set("Content-Type", "application/x-protobuf") |
||||
|
w.WriteHeader(http.StatusOK) |
||||
|
w.Write(respData) |
||||
|
} |
||||
|
|
||||
|
func (h *Handler) GetStats(w http.ResponseWriter, r *http.Request) { |
||||
|
if r.Method != http.MethodGet { |
||||
|
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
stats, err := h.storage.GetStats() |
||||
|
if err != nil { |
||||
|
http.Error(w, "Failed to get stats", http.StatusInternalServerError) |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
w.Header().Set("Content-Type", "application/json") |
||||
|
json.NewEncoder(w).Encode(stats) |
||||
|
} |
||||
|
|
||||
|
func (h *Handler) GetInstances(w http.ResponseWriter, r *http.Request) { |
||||
|
if r.Method != http.MethodGet { |
||||
|
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
limitStr := r.URL.Query().Get("limit") |
||||
|
limit := 100 // default
|
||||
|
if limitStr != "" { |
||||
|
if l, err := strconv.Atoi(limitStr); err == nil && l > 0 && l <= 1000 { |
||||
|
limit = l |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
instances, err := h.storage.GetInstances(limit) |
||||
|
if err != nil { |
||||
|
http.Error(w, "Failed to get instances", http.StatusInternalServerError) |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
w.Header().Set("Content-Type", "application/json") |
||||
|
json.NewEncoder(w).Encode(instances) |
||||
|
} |
||||
|
|
||||
|
func (h *Handler) GetMetrics(w http.ResponseWriter, r *http.Request) { |
||||
|
if r.Method != http.MethodGet { |
||||
|
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
daysStr := r.URL.Query().Get("days") |
||||
|
days := 30 // default
|
||||
|
if daysStr != "" { |
||||
|
if d, err := strconv.Atoi(daysStr); err == nil && d > 0 && d <= 365 { |
||||
|
days = d |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
metrics, err := h.storage.GetMetrics(days) |
||||
|
if err != nil { |
||||
|
http.Error(w, "Failed to get metrics", http.StatusInternalServerError) |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
w.Header().Set("Content-Type", "application/json") |
||||
|
json.NewEncoder(w).Encode(metrics) |
||||
|
} |
@ -0,0 +1,278 @@ |
|||||
|
package dashboard |
||||
|
|
||||
|
import ( |
||||
|
"net/http" |
||||
|
) |
||||
|
|
||||
|
type Handler struct{} |
||||
|
|
||||
|
func NewHandler() *Handler { |
||||
|
return &Handler{} |
||||
|
} |
||||
|
|
||||
|
func (h *Handler) ServeIndex(w http.ResponseWriter, r *http.Request) { |
||||
|
html := `<!DOCTYPE html> |
||||
|
<html lang="en"> |
||||
|
<head> |
||||
|
<meta charset="UTF-8"> |
||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
||||
|
<title>SeaweedFS Telemetry Dashboard</title> |
||||
|
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script> |
||||
|
<style> |
||||
|
body { |
||||
|
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; |
||||
|
margin: 0; |
||||
|
padding: 20px; |
||||
|
background-color: #f5f5f5; |
||||
|
} |
||||
|
.container { |
||||
|
max-width: 1200px; |
||||
|
margin: 0 auto; |
||||
|
} |
||||
|
.header { |
||||
|
background: white; |
||||
|
padding: 20px; |
||||
|
border-radius: 8px; |
||||
|
margin-bottom: 20px; |
||||
|
box-shadow: 0 2px 4px rgba(0,0,0,0.1); |
||||
|
} |
||||
|
.stats-grid { |
||||
|
display: grid; |
||||
|
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); |
||||
|
gap: 20px; |
||||
|
margin-bottom: 20px; |
||||
|
} |
||||
|
.stat-card { |
||||
|
background: white; |
||||
|
padding: 20px; |
||||
|
border-radius: 8px; |
||||
|
box-shadow: 0 2px 4px rgba(0,0,0,0.1); |
||||
|
} |
||||
|
.stat-value { |
||||
|
font-size: 2em; |
||||
|
font-weight: bold; |
||||
|
color: #2196F3; |
||||
|
} |
||||
|
.stat-label { |
||||
|
color: #666; |
||||
|
margin-top: 5px; |
||||
|
} |
||||
|
.chart-container { |
||||
|
background: white; |
||||
|
padding: 20px; |
||||
|
border-radius: 8px; |
||||
|
margin-bottom: 20px; |
||||
|
box-shadow: 0 2px 4px rgba(0,0,0,0.1); |
||||
|
} |
||||
|
.chart-title { |
||||
|
font-size: 1.2em; |
||||
|
font-weight: bold; |
||||
|
margin-bottom: 15px; |
||||
|
} |
||||
|
.loading { |
||||
|
text-align: center; |
||||
|
padding: 40px; |
||||
|
color: #666; |
||||
|
} |
||||
|
.error { |
||||
|
background: #ffebee; |
||||
|
color: #c62828; |
||||
|
padding: 15px; |
||||
|
border-radius: 4px; |
||||
|
margin: 10px 0; |
||||
|
} |
||||
|
</style> |
||||
|
</head> |
||||
|
<body> |
||||
|
<div class="container"> |
||||
|
<div class="header"> |
||||
|
<h1>SeaweedFS Telemetry Dashboard</h1> |
||||
|
<p>Privacy-respecting usage analytics for SeaweedFS</p> |
||||
|
</div> |
||||
|
|
||||
|
<div id="loading" class="loading">Loading telemetry data...</div> |
||||
|
<div id="error" class="error" style="display: none;"></div> |
||||
|
|
||||
|
<div id="dashboard" style="display: none;"> |
||||
|
<div class="stats-grid"> |
||||
|
<div class="stat-card"> |
||||
|
<div class="stat-value" id="totalInstances">-</div> |
||||
|
<div class="stat-label">Total Instances (30 days)</div> |
||||
|
</div> |
||||
|
<div class="stat-card"> |
||||
|
<div class="stat-value" id="activeInstances">-</div> |
||||
|
<div class="stat-label">Active Instances (7 days)</div> |
||||
|
</div> |
||||
|
<div class="stat-card"> |
||||
|
<div class="stat-value" id="totalVersions">-</div> |
||||
|
<div class="stat-label">Different Versions</div> |
||||
|
</div> |
||||
|
<div class="stat-card"> |
||||
|
<div class="stat-value" id="totalOS">-</div> |
||||
|
<div class="stat-label">Operating Systems</div> |
||||
|
</div> |
||||
|
</div> |
||||
|
|
||||
|
<div class="chart-container"> |
||||
|
<div class="chart-title">Version Distribution</div> |
||||
|
<canvas id="versionChart" width="400" height="200"></canvas> |
||||
|
</div> |
||||
|
|
||||
|
<div class="chart-container"> |
||||
|
<div class="chart-title">Operating System Distribution</div> |
||||
|
<canvas id="osChart" width="400" height="200"></canvas> |
||||
|
</div> |
||||
|
|
||||
|
<div class="chart-container"> |
||||
|
<div class="chart-title">Deployment Types</div> |
||||
|
<canvas id="deploymentChart" width="400" height="200"></canvas> |
||||
|
</div> |
||||
|
|
||||
|
<div class="chart-container"> |
||||
|
<div class="chart-title">Volume Servers Over Time</div> |
||||
|
<canvas id="serverChart" width="400" height="200"></canvas> |
||||
|
</div> |
||||
|
|
||||
|
<div class="chart-container"> |
||||
|
<div class="chart-title">Total Disk Usage Over Time</div> |
||||
|
<canvas id="diskChart" width="400" height="200"></canvas> |
||||
|
</div> |
||||
|
</div> |
||||
|
</div> |
||||
|
|
||||
|
<script> |
||||
|
let charts = {}; |
||||
|
|
||||
|
async function loadDashboard() { |
||||
|
try { |
||||
|
// Load stats
|
||||
|
const statsResponse = await fetch('/api/stats'); |
||||
|
const stats = await statsResponse.json(); |
||||
|
|
||||
|
// Load metrics
|
||||
|
const metricsResponse = await fetch('/api/metrics?days=30'); |
||||
|
const metrics = await metricsResponse.json(); |
||||
|
|
||||
|
updateStats(stats); |
||||
|
updateCharts(stats, metrics); |
||||
|
|
||||
|
document.getElementById('loading').style.display = 'none'; |
||||
|
document.getElementById('dashboard').style.display = 'block'; |
||||
|
} catch (error) { |
||||
|
console.error('Error loading dashboard:', error); |
||||
|
showError('Failed to load telemetry data: ' + error.message); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
function updateStats(stats) { |
||||
|
document.getElementById('totalInstances').textContent = stats.total_instances || 0; |
||||
|
document.getElementById('activeInstances').textContent = stats.active_instances || 0; |
||||
|
document.getElementById('totalVersions').textContent = Object.keys(stats.versions || {}).length; |
||||
|
document.getElementById('totalOS').textContent = Object.keys(stats.os_distribution || {}).length; |
||||
|
} |
||||
|
|
||||
|
function updateCharts(stats, metrics) { |
||||
|
// Version chart
|
||||
|
createPieChart('versionChart', 'Version Distribution', stats.versions || {}); |
||||
|
|
||||
|
// OS chart
|
||||
|
createPieChart('osChart', 'Operating System Distribution', stats.os_distribution || {}); |
||||
|
|
||||
|
// Deployment chart
|
||||
|
createPieChart('deploymentChart', 'Deployment Types', stats.deployments || {}); |
||||
|
|
||||
|
// Server count over time
|
||||
|
if (metrics.dates && metrics.server_counts) { |
||||
|
createLineChart('serverChart', 'Volume Servers', metrics.dates, metrics.server_counts, '#2196F3'); |
||||
|
} |
||||
|
|
||||
|
// Disk usage over time
|
||||
|
if (metrics.dates && metrics.disk_usage) { |
||||
|
const diskUsageGB = metrics.disk_usage.map(bytes => Math.round(bytes / (1024 * 1024 * 1024))); |
||||
|
createLineChart('diskChart', 'Disk Usage (GB)', metrics.dates, diskUsageGB, '#4CAF50'); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
function createPieChart(canvasId, title, data) { |
||||
|
const ctx = document.getElementById(canvasId).getContext('2d'); |
||||
|
|
||||
|
if (charts[canvasId]) { |
||||
|
charts[canvasId].destroy(); |
||||
|
} |
||||
|
|
||||
|
const labels = Object.keys(data); |
||||
|
const values = Object.values(data); |
||||
|
|
||||
|
charts[canvasId] = new Chart(ctx, { |
||||
|
type: 'pie', |
||||
|
data: { |
||||
|
labels: labels, |
||||
|
datasets: [{ |
||||
|
data: values, |
||||
|
backgroundColor: [ |
||||
|
'#FF6384', '#36A2EB', '#FFCE56', '#4BC0C0', |
||||
|
'#9966FF', '#FF9F40', '#FF6384', '#C9CBCF' |
||||
|
] |
||||
|
}] |
||||
|
}, |
||||
|
options: { |
||||
|
responsive: true, |
||||
|
plugins: { |
||||
|
legend: { |
||||
|
position: 'bottom' |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
}); |
||||
|
} |
||||
|
|
||||
|
function createLineChart(canvasId, label, labels, data, color) { |
||||
|
const ctx = document.getElementById(canvasId).getContext('2d'); |
||||
|
|
||||
|
if (charts[canvasId]) { |
||||
|
charts[canvasId].destroy(); |
||||
|
} |
||||
|
|
||||
|
charts[canvasId] = new Chart(ctx, { |
||||
|
type: 'line', |
||||
|
data: { |
||||
|
labels: labels, |
||||
|
datasets: [{ |
||||
|
label: label, |
||||
|
data: data, |
||||
|
borderColor: color, |
||||
|
backgroundColor: color + '20', |
||||
|
fill: true, |
||||
|
tension: 0.1 |
||||
|
}] |
||||
|
}, |
||||
|
options: { |
||||
|
responsive: true, |
||||
|
scales: { |
||||
|
y: { |
||||
|
beginAtZero: true |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
}); |
||||
|
} |
||||
|
|
||||
|
function showError(message) { |
||||
|
document.getElementById('loading').style.display = 'none'; |
||||
|
document.getElementById('error').style.display = 'block'; |
||||
|
document.getElementById('error').textContent = message; |
||||
|
} |
||||
|
|
||||
|
// Load dashboard on page load
|
||||
|
loadDashboard(); |
||||
|
|
||||
|
// Refresh every 5 minutes
|
||||
|
setInterval(loadDashboard, 5 * 60 * 1000); |
||||
|
</script> |
||||
|
</body> |
||||
|
</html>` |
||||
|
|
||||
|
w.Header().Set("Content-Type", "text/html") |
||||
|
w.WriteHeader(http.StatusOK) |
||||
|
w.Write([]byte(html)) |
||||
|
} |
@ -0,0 +1,31 @@ |
|||||
|
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= |
||||
|
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= |
||||
|
github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= |
||||
|
github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= |
||||
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= |
||||
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= |
||||
|
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= |
||||
|
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= |
||||
|
github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= |
||||
|
github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= |
||||
|
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= |
||||
|
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= |
||||
|
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= |
||||
|
github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo= |
||||
|
github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= |
||||
|
github.com/prometheus/client_golang v1.17.0 h1:rl2sfwZMtSthVU752MqfjQozy7blglC+1SOtjMAMh+Q= |
||||
|
github.com/prometheus/client_golang v1.17.0/go.mod h1:VeL+gMmOAxkS2IqfCq0ZmHSL+LjWfWDUmp1mBz9JgUY= |
||||
|
github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16 h1:v7DLqVdK4VrYkVD5diGdl4sxJurKJEMnODWRJlxV9oM= |
||||
|
github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16/go.mod h1:oMQmHW1/JoDwqLtg57MGgP/Fb1CJEYF2imWWhWtMkYU= |
||||
|
github.com/prometheus/common v0.44.0 h1:+5BrQJwiBB9xsMygAB3TNvpQKOwlkc25LbISbrdOOfY= |
||||
|
github.com/prometheus/common v0.44.0/go.mod h1:ofAIvZbQ1e/nugmZGz4/qCb9Ap1VoSTIO7x0VV9VvuY= |
||||
|
github.com/prometheus/procfs v0.11.1 h1:xRC8Iq1yyca5ypa9n1EZnWZkt7dwcoRPQwX/5gwaUuI= |
||||
|
github.com/prometheus/procfs v0.11.1/go.mod h1:eesXgaPo1q7lBpVMoMy0ZOFTth9hBn4W/y0/p/ScXhY= |
||||
|
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= |
||||
|
golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM= |
||||
|
golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= |
||||
|
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= |
||||
|
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= |
||||
|
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= |
||||
|
google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= |
||||
|
google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= |
@ -0,0 +1,111 @@ |
|||||
|
package main |
||||
|
|
||||
|
import ( |
||||
|
"encoding/json" |
||||
|
"flag" |
||||
|
"fmt" |
||||
|
"log" |
||||
|
"net/http" |
||||
|
"time" |
||||
|
|
||||
|
"github.com/prometheus/client_golang/prometheus/promhttp" |
||||
|
"github.com/seaweedfs/seaweedfs/telemetry/server/api" |
||||
|
"github.com/seaweedfs/seaweedfs/telemetry/server/dashboard" |
||||
|
"github.com/seaweedfs/seaweedfs/telemetry/server/storage" |
||||
|
) |
||||
|
|
||||
|
var ( |
||||
|
port = flag.Int("port", 8080, "HTTP server port") |
||||
|
enableCORS = flag.Bool("cors", true, "Enable CORS for dashboard") |
||||
|
logRequests = flag.Bool("log", true, "Log incoming requests") |
||||
|
enableDashboard = flag.Bool("dashboard", true, "Enable built-in dashboard (optional when using Grafana)") |
||||
|
cleanupInterval = flag.Duration("cleanup", 24*time.Hour, "Cleanup interval for old instances") |
||||
|
maxInstanceAge = flag.Duration("max-age", 30*24*time.Hour, "Maximum age for instances before cleanup") |
||||
|
) |
||||
|
|
||||
|
func main() { |
||||
|
flag.Parse() |
||||
|
|
||||
|
// Create Prometheus storage instance
|
||||
|
store := storage.NewPrometheusStorage() |
||||
|
|
||||
|
// Start cleanup routine
|
||||
|
go func() { |
||||
|
ticker := time.NewTicker(*cleanupInterval) |
||||
|
defer ticker.Stop() |
||||
|
for range ticker.C { |
||||
|
store.CleanupOldInstances(*maxInstanceAge) |
||||
|
} |
||||
|
}() |
||||
|
|
||||
|
// Setup HTTP handlers
|
||||
|
mux := http.NewServeMux() |
||||
|
|
||||
|
// Prometheus metrics endpoint
|
||||
|
mux.Handle("/metrics", promhttp.Handler()) |
||||
|
|
||||
|
// API endpoints
|
||||
|
apiHandler := api.NewHandler(store) |
||||
|
mux.HandleFunc("/api/collect", corsMiddleware(logMiddleware(apiHandler.CollectTelemetry))) |
||||
|
mux.HandleFunc("/api/stats", corsMiddleware(logMiddleware(apiHandler.GetStats))) |
||||
|
mux.HandleFunc("/api/instances", corsMiddleware(logMiddleware(apiHandler.GetInstances))) |
||||
|
mux.HandleFunc("/api/metrics", corsMiddleware(logMiddleware(apiHandler.GetMetrics))) |
||||
|
|
||||
|
// Dashboard (optional)
|
||||
|
if *enableDashboard { |
||||
|
dashboardHandler := dashboard.NewHandler() |
||||
|
mux.HandleFunc("/", corsMiddleware(dashboardHandler.ServeIndex)) |
||||
|
mux.HandleFunc("/dashboard", corsMiddleware(dashboardHandler.ServeIndex)) |
||||
|
mux.Handle("/static/", http.StripPrefix("/static/", http.FileServer(http.Dir("./static")))) |
||||
|
} |
||||
|
|
||||
|
// Health check
|
||||
|
mux.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) { |
||||
|
w.Header().Set("Content-Type", "application/json") |
||||
|
json.NewEncoder(w).Encode(map[string]string{ |
||||
|
"status": "ok", |
||||
|
"time": time.Now().UTC().Format(time.RFC3339), |
||||
|
}) |
||||
|
}) |
||||
|
|
||||
|
addr := fmt.Sprintf(":%d", *port) |
||||
|
log.Printf("Starting telemetry server on %s", addr) |
||||
|
log.Printf("Prometheus metrics: http://localhost%s/metrics", addr) |
||||
|
if *enableDashboard { |
||||
|
log.Printf("Dashboard: http://localhost%s/dashboard", addr) |
||||
|
} |
||||
|
log.Printf("Cleanup interval: %v, Max instance age: %v", *cleanupInterval, *maxInstanceAge) |
||||
|
|
||||
|
if err := http.ListenAndServe(addr, mux); err != nil { |
||||
|
log.Fatalf("Server failed: %v", err) |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
func corsMiddleware(next http.HandlerFunc) http.HandlerFunc { |
||||
|
return func(w http.ResponseWriter, r *http.Request) { |
||||
|
if *enableCORS { |
||||
|
w.Header().Set("Access-Control-Allow-Origin", "*") |
||||
|
w.Header().Set("Access-Control-Allow-Methods", "GET, POST, OPTIONS") |
||||
|
w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization") |
||||
|
} |
||||
|
|
||||
|
if r.Method == "OPTIONS" { |
||||
|
w.WriteHeader(http.StatusOK) |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
next(w, r) |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
func logMiddleware(next http.HandlerFunc) http.HandlerFunc { |
||||
|
return func(w http.ResponseWriter, r *http.Request) { |
||||
|
if *logRequests { |
||||
|
start := time.Now() |
||||
|
next(w, r) |
||||
|
log.Printf("%s %s %s %v", r.Method, r.URL.Path, r.RemoteAddr, time.Since(start)) |
||||
|
} else { |
||||
|
next(w, r) |
||||
|
} |
||||
|
} |
||||
|
} |
@ -0,0 +1,245 @@ |
|||||
|
package storage |
||||
|
|
||||
|
import ( |
||||
|
"encoding/json" |
||||
|
"sync" |
||||
|
"time" |
||||
|
|
||||
|
"github.com/prometheus/client_golang/prometheus" |
||||
|
"github.com/prometheus/client_golang/prometheus/promauto" |
||||
|
"github.com/seaweedfs/seaweedfs/telemetry/proto" |
||||
|
) |
||||
|
|
||||
|
type PrometheusStorage struct { |
||||
|
// Prometheus metrics
|
||||
|
totalClusters prometheus.Gauge |
||||
|
activeClusters prometheus.Gauge |
||||
|
volumeServerCount *prometheus.GaugeVec |
||||
|
totalDiskBytes *prometheus.GaugeVec |
||||
|
totalVolumeCount *prometheus.GaugeVec |
||||
|
filerCount *prometheus.GaugeVec |
||||
|
brokerCount *prometheus.GaugeVec |
||||
|
clusterInfo *prometheus.GaugeVec |
||||
|
telemetryReceived prometheus.Counter |
||||
|
|
||||
|
// In-memory storage for API endpoints (if needed)
|
||||
|
mu sync.RWMutex |
||||
|
instances map[string]*telemetryData |
||||
|
stats map[string]interface{} |
||||
|
} |
||||
|
|
||||
|
// telemetryData is an internal struct that includes the received timestamp
|
||||
|
type telemetryData struct { |
||||
|
*proto.TelemetryData |
||||
|
ReceivedAt time.Time `json:"received_at"` |
||||
|
} |
||||
|
|
||||
|
func NewPrometheusStorage() *PrometheusStorage { |
||||
|
return &PrometheusStorage{ |
||||
|
totalClusters: promauto.NewGauge(prometheus.GaugeOpts{ |
||||
|
Name: "seaweedfs_telemetry_total_clusters", |
||||
|
Help: "Total number of unique SeaweedFS clusters (last 30 days)", |
||||
|
}), |
||||
|
activeClusters: promauto.NewGauge(prometheus.GaugeOpts{ |
||||
|
Name: "seaweedfs_telemetry_active_clusters", |
||||
|
Help: "Number of active SeaweedFS clusters (last 7 days)", |
||||
|
}), |
||||
|
volumeServerCount: promauto.NewGaugeVec(prometheus.GaugeOpts{ |
||||
|
Name: "seaweedfs_telemetry_volume_servers", |
||||
|
Help: "Number of volume servers per cluster", |
||||
|
}, []string{"cluster_id", "version", "os", "deployment"}), |
||||
|
totalDiskBytes: promauto.NewGaugeVec(prometheus.GaugeOpts{ |
||||
|
Name: "seaweedfs_telemetry_disk_bytes", |
||||
|
Help: "Total disk usage in bytes per cluster", |
||||
|
}, []string{"cluster_id", "version", "os", "deployment"}), |
||||
|
totalVolumeCount: promauto.NewGaugeVec(prometheus.GaugeOpts{ |
||||
|
Name: "seaweedfs_telemetry_volume_count", |
||||
|
Help: "Total number of volumes per cluster", |
||||
|
}, []string{"cluster_id", "version", "os", "deployment"}), |
||||
|
filerCount: promauto.NewGaugeVec(prometheus.GaugeOpts{ |
||||
|
Name: "seaweedfs_telemetry_filer_count", |
||||
|
Help: "Number of filer servers per cluster", |
||||
|
}, []string{"cluster_id", "version", "os", "deployment"}), |
||||
|
brokerCount: promauto.NewGaugeVec(prometheus.GaugeOpts{ |
||||
|
Name: "seaweedfs_telemetry_broker_count", |
||||
|
Help: "Number of broker servers per cluster", |
||||
|
}, []string{"cluster_id", "version", "os", "deployment"}), |
||||
|
clusterInfo: promauto.NewGaugeVec(prometheus.GaugeOpts{ |
||||
|
Name: "seaweedfs_telemetry_cluster_info", |
||||
|
Help: "Cluster information (always 1, labels contain metadata)", |
||||
|
}, []string{"cluster_id", "version", "os", "deployment", "features"}), |
||||
|
telemetryReceived: promauto.NewCounter(prometheus.CounterOpts{ |
||||
|
Name: "seaweedfs_telemetry_reports_received_total", |
||||
|
Help: "Total number of telemetry reports received", |
||||
|
}), |
||||
|
instances: make(map[string]*telemetryData), |
||||
|
stats: make(map[string]interface{}), |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
func (s *PrometheusStorage) StoreTelemetry(data *proto.TelemetryData) error { |
||||
|
s.mu.Lock() |
||||
|
defer s.mu.Unlock() |
||||
|
|
||||
|
// Update Prometheus metrics
|
||||
|
labels := prometheus.Labels{ |
||||
|
"cluster_id": data.ClusterId, |
||||
|
"version": data.Version, |
||||
|
"os": data.Os, |
||||
|
"deployment": data.Deployment, |
||||
|
} |
||||
|
|
||||
|
s.volumeServerCount.With(labels).Set(float64(data.VolumeServerCount)) |
||||
|
s.totalDiskBytes.With(labels).Set(float64(data.TotalDiskBytes)) |
||||
|
s.totalVolumeCount.With(labels).Set(float64(data.TotalVolumeCount)) |
||||
|
s.filerCount.With(labels).Set(float64(data.FilerCount)) |
||||
|
s.brokerCount.With(labels).Set(float64(data.BrokerCount)) |
||||
|
|
||||
|
// Features as JSON string for the label
|
||||
|
featuresJSON, _ := json.Marshal(data.Features) |
||||
|
infoLabels := prometheus.Labels{ |
||||
|
"cluster_id": data.ClusterId, |
||||
|
"version": data.Version, |
||||
|
"os": data.Os, |
||||
|
"deployment": data.Deployment, |
||||
|
"features": string(featuresJSON), |
||||
|
} |
||||
|
s.clusterInfo.With(infoLabels).Set(1) |
||||
|
|
||||
|
s.telemetryReceived.Inc() |
||||
|
|
||||
|
// Store in memory for API endpoints
|
||||
|
s.instances[data.ClusterId] = &telemetryData{ |
||||
|
TelemetryData: data, |
||||
|
ReceivedAt: time.Now().UTC(), |
||||
|
} |
||||
|
|
||||
|
// Update aggregated stats
|
||||
|
s.updateStats() |
||||
|
|
||||
|
return nil |
||||
|
} |
||||
|
|
||||
|
func (s *PrometheusStorage) GetStats() (map[string]interface{}, error) { |
||||
|
s.mu.RLock() |
||||
|
defer s.mu.RUnlock() |
||||
|
|
||||
|
// Return cached stats
|
||||
|
result := make(map[string]interface{}) |
||||
|
for k, v := range s.stats { |
||||
|
result[k] = v |
||||
|
} |
||||
|
return result, nil |
||||
|
} |
||||
|
|
||||
|
func (s *PrometheusStorage) GetInstances(limit int) ([]*telemetryData, error) { |
||||
|
s.mu.RLock() |
||||
|
defer s.mu.RUnlock() |
||||
|
|
||||
|
var instances []*telemetryData |
||||
|
count := 0 |
||||
|
for _, instance := range s.instances { |
||||
|
if count >= limit { |
||||
|
break |
||||
|
} |
||||
|
instances = append(instances, instance) |
||||
|
count++ |
||||
|
} |
||||
|
|
||||
|
return instances, nil |
||||
|
} |
||||
|
|
||||
|
func (s *PrometheusStorage) GetMetrics(days int) (map[string]interface{}, error) { |
||||
|
s.mu.RLock() |
||||
|
defer s.mu.RUnlock() |
||||
|
|
||||
|
// Return current metrics from in-memory storage
|
||||
|
// Historical data should be queried from Prometheus directly
|
||||
|
cutoff := time.Now().AddDate(0, 0, -days) |
||||
|
|
||||
|
var volumeServers []map[string]interface{} |
||||
|
var diskUsage []map[string]interface{} |
||||
|
|
||||
|
for _, instance := range s.instances { |
||||
|
if instance.ReceivedAt.After(cutoff) { |
||||
|
volumeServers = append(volumeServers, map[string]interface{}{ |
||||
|
"date": instance.ReceivedAt.Format("2006-01-02"), |
||||
|
"value": instance.TelemetryData.VolumeServerCount, |
||||
|
}) |
||||
|
diskUsage = append(diskUsage, map[string]interface{}{ |
||||
|
"date": instance.ReceivedAt.Format("2006-01-02"), |
||||
|
"value": instance.TelemetryData.TotalDiskBytes, |
||||
|
}) |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return map[string]interface{}{ |
||||
|
"volume_servers": volumeServers, |
||||
|
"disk_usage": diskUsage, |
||||
|
}, nil |
||||
|
} |
||||
|
|
||||
|
func (s *PrometheusStorage) updateStats() { |
||||
|
now := time.Now() |
||||
|
last7Days := now.AddDate(0, 0, -7) |
||||
|
last30Days := now.AddDate(0, 0, -30) |
||||
|
|
||||
|
totalInstances := 0 |
||||
|
activeInstances := 0 |
||||
|
versions := make(map[string]int) |
||||
|
osDistribution := make(map[string]int) |
||||
|
deployments := make(map[string]int) |
||||
|
|
||||
|
for _, instance := range s.instances { |
||||
|
if instance.ReceivedAt.After(last30Days) { |
||||
|
totalInstances++ |
||||
|
} |
||||
|
if instance.ReceivedAt.After(last7Days) { |
||||
|
activeInstances++ |
||||
|
versions[instance.TelemetryData.Version]++ |
||||
|
osDistribution[instance.TelemetryData.Os]++ |
||||
|
deployments[instance.TelemetryData.Deployment]++ |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// Update Prometheus gauges
|
||||
|
s.totalClusters.Set(float64(totalInstances)) |
||||
|
s.activeClusters.Set(float64(activeInstances)) |
||||
|
|
||||
|
// Update cached stats for API
|
||||
|
s.stats = map[string]interface{}{ |
||||
|
"total_instances": totalInstances, |
||||
|
"active_instances": activeInstances, |
||||
|
"versions": versions, |
||||
|
"os_distribution": osDistribution, |
||||
|
"deployments": deployments, |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// CleanupOldInstances removes instances older than the specified duration
|
||||
|
func (s *PrometheusStorage) CleanupOldInstances(maxAge time.Duration) { |
||||
|
s.mu.Lock() |
||||
|
defer s.mu.Unlock() |
||||
|
|
||||
|
cutoff := time.Now().Add(-maxAge) |
||||
|
for instanceID, instance := range s.instances { |
||||
|
if instance.ReceivedAt.Before(cutoff) { |
||||
|
delete(s.instances, instanceID) |
||||
|
|
||||
|
// Remove from Prometheus metrics
|
||||
|
labels := prometheus.Labels{ |
||||
|
"cluster_id": instance.TelemetryData.ClusterId, |
||||
|
"version": instance.TelemetryData.Version, |
||||
|
"os": instance.TelemetryData.Os, |
||||
|
"deployment": instance.TelemetryData.Deployment, |
||||
|
} |
||||
|
s.volumeServerCount.Delete(labels) |
||||
|
s.totalDiskBytes.Delete(labels) |
||||
|
s.totalVolumeCount.Delete(labels) |
||||
|
s.filerCount.Delete(labels) |
||||
|
s.brokerCount.Delete(labels) |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
s.updateStats() |
||||
|
} |
@ -0,0 +1,315 @@ |
|||||
|
package main |
||||
|
|
||||
|
import ( |
||||
|
"context" |
||||
|
"fmt" |
||||
|
"io" |
||||
|
"log" |
||||
|
"net/http" |
||||
|
"os" |
||||
|
"os/exec" |
||||
|
"path/filepath" |
||||
|
"strings" |
||||
|
"syscall" |
||||
|
"time" |
||||
|
|
||||
|
"github.com/seaweedfs/seaweedfs/telemetry/proto" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/telemetry" |
||||
|
protobuf "google.golang.org/protobuf/proto" |
||||
|
) |
||||
|
|
||||
|
const ( |
||||
|
serverPort = "18080" // Use different port to avoid conflicts
|
||||
|
serverURL = "http://localhost:" + serverPort |
||||
|
) |
||||
|
|
||||
|
func main() { |
||||
|
fmt.Println("🧪 Starting SeaweedFS Telemetry Integration Test") |
||||
|
|
||||
|
// Start telemetry server
|
||||
|
fmt.Println("📡 Starting telemetry server...") |
||||
|
serverCmd, err := startTelemetryServer() |
||||
|
if err != nil { |
||||
|
log.Fatalf("❌ Failed to start telemetry server: %v", err) |
||||
|
} |
||||
|
defer stopServer(serverCmd) |
||||
|
|
||||
|
// Wait for server to start
|
||||
|
if !waitForServer(serverURL+"/health", 15*time.Second) { |
||||
|
log.Fatal("❌ Telemetry server failed to start") |
||||
|
} |
||||
|
fmt.Println("✅ Telemetry server started successfully") |
||||
|
|
||||
|
// Test protobuf marshaling first
|
||||
|
fmt.Println("🔧 Testing protobuf marshaling...") |
||||
|
if err := testProtobufMarshaling(); err != nil { |
||||
|
log.Fatalf("❌ Protobuf marshaling test failed: %v", err) |
||||
|
} |
||||
|
fmt.Println("✅ Protobuf marshaling test passed") |
||||
|
|
||||
|
// Test protobuf client
|
||||
|
fmt.Println("🔄 Testing protobuf telemetry client...") |
||||
|
if err := testTelemetryClient(); err != nil { |
||||
|
log.Fatalf("❌ Telemetry client test failed: %v", err) |
||||
|
} |
||||
|
fmt.Println("✅ Telemetry client test passed") |
||||
|
|
||||
|
// Test server metrics endpoint
|
||||
|
fmt.Println("📊 Testing Prometheus metrics endpoint...") |
||||
|
if err := testMetricsEndpoint(); err != nil { |
||||
|
log.Fatalf("❌ Metrics endpoint test failed: %v", err) |
||||
|
} |
||||
|
fmt.Println("✅ Metrics endpoint test passed") |
||||
|
|
||||
|
// Test stats API
|
||||
|
fmt.Println("📈 Testing stats API...") |
||||
|
if err := testStatsAPI(); err != nil { |
||||
|
log.Fatalf("❌ Stats API test failed: %v", err) |
||||
|
} |
||||
|
fmt.Println("✅ Stats API test passed") |
||||
|
|
||||
|
// Test instances API
|
||||
|
fmt.Println("📋 Testing instances API...") |
||||
|
if err := testInstancesAPI(); err != nil { |
||||
|
log.Fatalf("❌ Instances API test failed: %v", err) |
||||
|
} |
||||
|
fmt.Println("✅ Instances API test passed") |
||||
|
|
||||
|
fmt.Println("🎉 All telemetry integration tests passed!") |
||||
|
} |
||||
|
|
||||
|
func startTelemetryServer() (*exec.Cmd, error) { |
||||
|
// Get the directory where this test is running
|
||||
|
testDir, err := os.Getwd() |
||||
|
if err != nil { |
||||
|
return nil, fmt.Errorf("failed to get working directory: %v", err) |
||||
|
} |
||||
|
|
||||
|
// Navigate to the server directory (from main seaweedfs directory)
|
||||
|
serverDir := filepath.Join(testDir, "telemetry", "server") |
||||
|
|
||||
|
cmd := exec.Command("go", "run", ".", |
||||
|
"-port="+serverPort, |
||||
|
"-dashboard=false", |
||||
|
"-cleanup=1m", |
||||
|
"-max-age=1h") |
||||
|
|
||||
|
cmd.Dir = serverDir |
||||
|
|
||||
|
// Create log files for server output
|
||||
|
logFile, err := os.Create("telemetry-server-test.log") |
||||
|
if err != nil { |
||||
|
return nil, fmt.Errorf("failed to create log file: %v", err) |
||||
|
} |
||||
|
|
||||
|
cmd.Stdout = logFile |
||||
|
cmd.Stderr = logFile |
||||
|
|
||||
|
if err := cmd.Start(); err != nil { |
||||
|
return nil, fmt.Errorf("failed to start server: %v", err) |
||||
|
} |
||||
|
|
||||
|
return cmd, nil |
||||
|
} |
||||
|
|
||||
|
func stopServer(cmd *exec.Cmd) { |
||||
|
if cmd != nil && cmd.Process != nil { |
||||
|
cmd.Process.Signal(syscall.SIGTERM) |
||||
|
cmd.Wait() |
||||
|
|
||||
|
// Clean up log file
|
||||
|
os.Remove("telemetry-server-test.log") |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
func waitForServer(url string, timeout time.Duration) bool { |
||||
|
ctx, cancel := context.WithTimeout(context.Background(), timeout) |
||||
|
defer cancel() |
||||
|
|
||||
|
fmt.Printf("⏳ Waiting for server at %s...\n", url) |
||||
|
|
||||
|
for { |
||||
|
select { |
||||
|
case <-ctx.Done(): |
||||
|
return false |
||||
|
default: |
||||
|
resp, err := http.Get(url) |
||||
|
if err == nil { |
||||
|
resp.Body.Close() |
||||
|
if resp.StatusCode == http.StatusOK { |
||||
|
return true |
||||
|
} |
||||
|
} |
||||
|
time.Sleep(500 * time.Millisecond) |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
func testProtobufMarshaling() error { |
||||
|
// Test protobuf marshaling/unmarshaling
|
||||
|
testData := &proto.TelemetryData{ |
||||
|
ClusterId: "test-cluster-12345", |
||||
|
Version: "test-3.45", |
||||
|
Os: "linux/amd64", |
||||
|
Features: []string{"filer", "s3api"}, |
||||
|
Deployment: "test", |
||||
|
VolumeServerCount: 2, |
||||
|
TotalDiskBytes: 1000000, |
||||
|
TotalVolumeCount: 10, |
||||
|
FilerCount: 1, |
||||
|
BrokerCount: 1, |
||||
|
Timestamp: time.Now().Unix(), |
||||
|
} |
||||
|
|
||||
|
// Marshal
|
||||
|
data, err := protobuf.Marshal(testData) |
||||
|
if err != nil { |
||||
|
return fmt.Errorf("failed to marshal protobuf: %v", err) |
||||
|
} |
||||
|
|
||||
|
fmt.Printf(" Protobuf size: %d bytes\n", len(data)) |
||||
|
|
||||
|
// Unmarshal
|
||||
|
testData2 := &proto.TelemetryData{} |
||||
|
if err := protobuf.Unmarshal(data, testData2); err != nil { |
||||
|
return fmt.Errorf("failed to unmarshal protobuf: %v", err) |
||||
|
} |
||||
|
|
||||
|
// Verify data
|
||||
|
if testData2.ClusterId != testData.ClusterId { |
||||
|
return fmt.Errorf("protobuf data mismatch: expected %s, got %s", |
||||
|
testData.ClusterId, testData2.ClusterId) |
||||
|
} |
||||
|
|
||||
|
if testData2.VolumeServerCount != testData.VolumeServerCount { |
||||
|
return fmt.Errorf("volume server count mismatch: expected %d, got %d", |
||||
|
testData.VolumeServerCount, testData2.VolumeServerCount) |
||||
|
} |
||||
|
|
||||
|
return nil |
||||
|
} |
||||
|
|
||||
|
func testTelemetryClient() error { |
||||
|
// Create telemetry client
|
||||
|
client := telemetry.NewClient(serverURL+"/api/collect", true) |
||||
|
|
||||
|
// Create test data using protobuf format
|
||||
|
testData := &proto.TelemetryData{ |
||||
|
Version: "test-3.45", |
||||
|
Os: "linux/amd64", |
||||
|
Features: []string{"filer", "s3api", "mq"}, |
||||
|
Deployment: "integration-test", |
||||
|
VolumeServerCount: 3, |
||||
|
TotalDiskBytes: 1073741824, // 1GB
|
||||
|
TotalVolumeCount: 50, |
||||
|
FilerCount: 2, |
||||
|
BrokerCount: 1, |
||||
|
Timestamp: time.Now().Unix(), |
||||
|
} |
||||
|
|
||||
|
// Send telemetry data
|
||||
|
if err := client.SendTelemetry(testData); err != nil { |
||||
|
return fmt.Errorf("failed to send telemetry: %v", err) |
||||
|
} |
||||
|
|
||||
|
fmt.Printf(" Sent telemetry for cluster: %s\n", client.GetInstanceID()) |
||||
|
|
||||
|
// Wait a bit for processing
|
||||
|
time.Sleep(2 * time.Second) |
||||
|
|
||||
|
return nil |
||||
|
} |
||||
|
|
||||
|
func testMetricsEndpoint() error { |
||||
|
resp, err := http.Get(serverURL + "/metrics") |
||||
|
if err != nil { |
||||
|
return fmt.Errorf("failed to get metrics: %v", err) |
||||
|
} |
||||
|
defer resp.Body.Close() |
||||
|
|
||||
|
if resp.StatusCode != http.StatusOK { |
||||
|
return fmt.Errorf("metrics endpoint returned status %d", resp.StatusCode) |
||||
|
} |
||||
|
|
||||
|
// Read response and check for expected metrics
|
||||
|
content, err := io.ReadAll(resp.Body) |
||||
|
if err != nil { |
||||
|
return fmt.Errorf("failed to read metrics response: %v", err) |
||||
|
} |
||||
|
|
||||
|
contentStr := string(content) |
||||
|
expectedMetrics := []string{ |
||||
|
"seaweedfs_telemetry_total_clusters", |
||||
|
"seaweedfs_telemetry_active_clusters", |
||||
|
"seaweedfs_telemetry_reports_received_total", |
||||
|
"seaweedfs_telemetry_volume_servers", |
||||
|
"seaweedfs_telemetry_disk_bytes", |
||||
|
"seaweedfs_telemetry_volume_count", |
||||
|
"seaweedfs_telemetry_filer_count", |
||||
|
"seaweedfs_telemetry_broker_count", |
||||
|
} |
||||
|
|
||||
|
for _, metric := range expectedMetrics { |
||||
|
if !strings.Contains(contentStr, metric) { |
||||
|
return fmt.Errorf("missing expected metric: %s", metric) |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// Check that we have at least one report received
|
||||
|
if !strings.Contains(contentStr, "seaweedfs_telemetry_reports_received_total 1") { |
||||
|
fmt.Printf(" Warning: Expected at least 1 report received, metrics content:\n%s\n", contentStr) |
||||
|
} |
||||
|
|
||||
|
fmt.Printf(" Found %d expected metrics\n", len(expectedMetrics)) |
||||
|
|
||||
|
return nil |
||||
|
} |
||||
|
|
||||
|
func testStatsAPI() error { |
||||
|
resp, err := http.Get(serverURL + "/api/stats") |
||||
|
if err != nil { |
||||
|
return fmt.Errorf("failed to get stats: %v", err) |
||||
|
} |
||||
|
defer resp.Body.Close() |
||||
|
|
||||
|
if resp.StatusCode != http.StatusOK { |
||||
|
return fmt.Errorf("stats API returned status %d", resp.StatusCode) |
||||
|
} |
||||
|
|
||||
|
// Read and verify JSON response
|
||||
|
content, err := io.ReadAll(resp.Body) |
||||
|
if err != nil { |
||||
|
return fmt.Errorf("failed to read stats response: %v", err) |
||||
|
} |
||||
|
|
||||
|
contentStr := string(content) |
||||
|
if !strings.Contains(contentStr, "total_instances") { |
||||
|
return fmt.Errorf("stats response missing total_instances field") |
||||
|
} |
||||
|
|
||||
|
fmt.Printf(" Stats response: %s\n", contentStr) |
||||
|
|
||||
|
return nil |
||||
|
} |
||||
|
|
||||
|
func testInstancesAPI() error { |
||||
|
resp, err := http.Get(serverURL + "/api/instances?limit=10") |
||||
|
if err != nil { |
||||
|
return fmt.Errorf("failed to get instances: %v", err) |
||||
|
} |
||||
|
defer resp.Body.Close() |
||||
|
|
||||
|
if resp.StatusCode != http.StatusOK { |
||||
|
return fmt.Errorf("instances API returned status %d", resp.StatusCode) |
||||
|
} |
||||
|
|
||||
|
// Read response
|
||||
|
content, err := io.ReadAll(resp.Body) |
||||
|
if err != nil { |
||||
|
return fmt.Errorf("failed to read instances response: %v", err) |
||||
|
} |
||||
|
|
||||
|
fmt.Printf(" Instances response length: %d bytes\n", len(content)) |
||||
|
|
||||
|
return nil |
||||
|
} |
@ -0,0 +1,100 @@ |
|||||
|
package telemetry |
||||
|
|
||||
|
import ( |
||||
|
"bytes" |
||||
|
"fmt" |
||||
|
"net/http" |
||||
|
"time" |
||||
|
|
||||
|
"github.com/google/uuid" |
||||
|
"github.com/seaweedfs/seaweedfs/telemetry/proto" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/glog" |
||||
|
protobuf "google.golang.org/protobuf/proto" |
||||
|
) |
||||
|
|
||||
|
type Client struct { |
||||
|
url string |
||||
|
enabled bool |
||||
|
instanceID string |
||||
|
httpClient *http.Client |
||||
|
} |
||||
|
|
||||
|
// NewClient creates a new telemetry client
|
||||
|
func NewClient(url string, enabled bool) *Client { |
||||
|
return &Client{ |
||||
|
url: url, |
||||
|
enabled: enabled, |
||||
|
instanceID: uuid.New().String(), // Generate UUID in memory only
|
||||
|
httpClient: &http.Client{ |
||||
|
Timeout: 10 * time.Second, |
||||
|
}, |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// IsEnabled returns whether telemetry is enabled
|
||||
|
func (c *Client) IsEnabled() bool { |
||||
|
return c.enabled && c.url != "" |
||||
|
} |
||||
|
|
||||
|
// SendTelemetry sends telemetry data synchronously using protobuf format
|
||||
|
func (c *Client) SendTelemetry(data *proto.TelemetryData) error { |
||||
|
if !c.IsEnabled() { |
||||
|
return nil |
||||
|
} |
||||
|
|
||||
|
// Set the cluster ID
|
||||
|
data.ClusterId = c.instanceID |
||||
|
|
||||
|
return c.sendProtobuf(data) |
||||
|
} |
||||
|
|
||||
|
// SendTelemetryAsync sends telemetry data asynchronously
|
||||
|
func (c *Client) SendTelemetryAsync(data *proto.TelemetryData) { |
||||
|
if !c.IsEnabled() { |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
go func() { |
||||
|
if err := c.SendTelemetry(data); err != nil { |
||||
|
glog.V(1).Infof("Failed to send telemetry: %v", err) |
||||
|
} |
||||
|
}() |
||||
|
} |
||||
|
|
||||
|
// sendProtobuf sends data using protobuf format
|
||||
|
func (c *Client) sendProtobuf(data *proto.TelemetryData) error { |
||||
|
req := &proto.TelemetryRequest{ |
||||
|
Data: data, |
||||
|
} |
||||
|
|
||||
|
body, err := protobuf.Marshal(req) |
||||
|
if err != nil { |
||||
|
return fmt.Errorf("failed to marshal protobuf: %v", err) |
||||
|
} |
||||
|
|
||||
|
httpReq, err := http.NewRequest("POST", c.url, bytes.NewBuffer(body)) |
||||
|
if err != nil { |
||||
|
return fmt.Errorf("failed to create request: %v", err) |
||||
|
} |
||||
|
|
||||
|
httpReq.Header.Set("Content-Type", "application/x-protobuf") |
||||
|
httpReq.Header.Set("User-Agent", fmt.Sprintf("SeaweedFS/%s", data.Version)) |
||||
|
|
||||
|
resp, err := c.httpClient.Do(httpReq) |
||||
|
if err != nil { |
||||
|
return fmt.Errorf("failed to send request: %v", err) |
||||
|
} |
||||
|
defer resp.Body.Close() |
||||
|
|
||||
|
if resp.StatusCode != http.StatusOK { |
||||
|
return fmt.Errorf("server returned status %d", resp.StatusCode) |
||||
|
} |
||||
|
|
||||
|
glog.V(2).Infof("Telemetry sent successfully via protobuf") |
||||
|
return nil |
||||
|
} |
||||
|
|
||||
|
// GetInstanceID returns the current instance ID
|
||||
|
func (c *Client) GetInstanceID() string { |
||||
|
return c.instanceID |
||||
|
} |
@ -0,0 +1,218 @@ |
|||||
|
package telemetry |
||||
|
|
||||
|
import ( |
||||
|
"time" |
||||
|
|
||||
|
"github.com/seaweedfs/seaweedfs/telemetry/proto" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/cluster" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/glog" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/topology" |
||||
|
) |
||||
|
|
||||
|
type Collector struct { |
||||
|
client *Client |
||||
|
topo *topology.Topology |
||||
|
cluster *cluster.Cluster |
||||
|
masterServer interface{} // Will be set to *weed_server.MasterServer to access client tracking
|
||||
|
features []string |
||||
|
deployment string |
||||
|
version string |
||||
|
os string |
||||
|
} |
||||
|
|
||||
|
// NewCollector creates a new telemetry collector
|
||||
|
func NewCollector(client *Client, topo *topology.Topology, cluster *cluster.Cluster) *Collector { |
||||
|
return &Collector{ |
||||
|
client: client, |
||||
|
topo: topo, |
||||
|
cluster: cluster, |
||||
|
masterServer: nil, |
||||
|
features: []string{}, |
||||
|
deployment: "unknown", |
||||
|
version: "unknown", |
||||
|
os: "unknown", |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// SetFeatures sets the list of enabled features
|
||||
|
func (c *Collector) SetFeatures(features []string) { |
||||
|
c.features = features |
||||
|
} |
||||
|
|
||||
|
// SetDeployment sets the deployment type (standalone, cluster, etc.)
|
||||
|
func (c *Collector) SetDeployment(deployment string) { |
||||
|
c.deployment = deployment |
||||
|
} |
||||
|
|
||||
|
// SetVersion sets the SeaweedFS version
|
||||
|
func (c *Collector) SetVersion(version string) { |
||||
|
c.version = version |
||||
|
} |
||||
|
|
||||
|
// SetOS sets the operating system information
|
||||
|
func (c *Collector) SetOS(os string) { |
||||
|
c.os = os |
||||
|
} |
||||
|
|
||||
|
// SetMasterServer sets a reference to the master server for client tracking
|
||||
|
func (c *Collector) SetMasterServer(masterServer interface{}) { |
||||
|
c.masterServer = masterServer |
||||
|
} |
||||
|
|
||||
|
// CollectAndSendAsync collects telemetry data and sends it asynchronously
|
||||
|
func (c *Collector) CollectAndSendAsync() { |
||||
|
if !c.client.IsEnabled() { |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
go func() { |
||||
|
data := c.collectData() |
||||
|
c.client.SendTelemetryAsync(data) |
||||
|
}() |
||||
|
} |
||||
|
|
||||
|
// StartPeriodicCollection starts sending telemetry data periodically
|
||||
|
func (c *Collector) StartPeriodicCollection(interval time.Duration) { |
||||
|
if !c.client.IsEnabled() { |
||||
|
glog.V(1).Infof("Telemetry is disabled, skipping periodic collection") |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
glog.V(0).Infof("Starting telemetry collection every %v", interval) |
||||
|
|
||||
|
// Send initial telemetry after a short delay
|
||||
|
go func() { |
||||
|
time.Sleep(30 * time.Second) // Wait for cluster to stabilize
|
||||
|
c.CollectAndSendAsync() |
||||
|
}() |
||||
|
|
||||
|
// Start periodic collection
|
||||
|
ticker := time.NewTicker(interval) |
||||
|
go func() { |
||||
|
defer ticker.Stop() |
||||
|
for range ticker.C { |
||||
|
c.CollectAndSendAsync() |
||||
|
} |
||||
|
}() |
||||
|
} |
||||
|
|
||||
|
// collectData gathers telemetry data from the topology
|
||||
|
func (c *Collector) collectData() *proto.TelemetryData { |
||||
|
data := &proto.TelemetryData{ |
||||
|
Version: c.version, |
||||
|
Os: c.os, |
||||
|
Features: c.features, |
||||
|
Deployment: c.deployment, |
||||
|
Timestamp: time.Now().Unix(), |
||||
|
} |
||||
|
|
||||
|
if c.topo != nil { |
||||
|
// Collect volume server count
|
||||
|
data.VolumeServerCount = int32(c.countVolumeServers()) |
||||
|
|
||||
|
// Collect total disk usage and volume count
|
||||
|
diskBytes, volumeCount := c.collectVolumeStats() |
||||
|
data.TotalDiskBytes = diskBytes |
||||
|
data.TotalVolumeCount = int32(volumeCount) |
||||
|
} |
||||
|
|
||||
|
if c.cluster != nil { |
||||
|
// Collect filer and broker counts
|
||||
|
data.FilerCount = int32(c.countFilers()) |
||||
|
data.BrokerCount = int32(c.countBrokers()) |
||||
|
} |
||||
|
|
||||
|
return data |
||||
|
} |
||||
|
|
||||
|
// countVolumeServers counts the number of active volume servers
|
||||
|
func (c *Collector) countVolumeServers() int { |
||||
|
count := 0 |
||||
|
for _, dcNode := range c.topo.Children() { |
||||
|
dc := dcNode.(*topology.DataCenter) |
||||
|
for _, rackNode := range dc.Children() { |
||||
|
rack := rackNode.(*topology.Rack) |
||||
|
for range rack.Children() { |
||||
|
count++ |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
return count |
||||
|
} |
||||
|
|
||||
|
// collectVolumeStats collects total disk usage and volume count
|
||||
|
func (c *Collector) collectVolumeStats() (uint64, int) { |
||||
|
var totalDiskBytes uint64 |
||||
|
var totalVolumeCount int |
||||
|
|
||||
|
for _, dcNode := range c.topo.Children() { |
||||
|
dc := dcNode.(*topology.DataCenter) |
||||
|
for _, rackNode := range dc.Children() { |
||||
|
rack := rackNode.(*topology.Rack) |
||||
|
for _, dnNode := range rack.Children() { |
||||
|
dn := dnNode.(*topology.DataNode) |
||||
|
volumes := dn.GetVolumes() |
||||
|
for _, volumeInfo := range volumes { |
||||
|
totalVolumeCount++ |
||||
|
totalDiskBytes += volumeInfo.Size |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return totalDiskBytes, totalVolumeCount |
||||
|
} |
||||
|
|
||||
|
// countFilers counts the number of active filer servers across all groups
|
||||
|
func (c *Collector) countFilers() int { |
||||
|
// Count all filer-type nodes in the cluster
|
||||
|
// This includes both pure filer servers and S3 servers (which register as filers)
|
||||
|
count := 0 |
||||
|
for _, groupName := range c.getAllFilerGroups() { |
||||
|
nodes := c.cluster.ListClusterNode(cluster.FilerGroupName(groupName), cluster.FilerType) |
||||
|
count += len(nodes) |
||||
|
} |
||||
|
return count |
||||
|
} |
||||
|
|
||||
|
// countBrokers counts the number of active broker servers
|
||||
|
func (c *Collector) countBrokers() int { |
||||
|
// Count brokers across all broker groups
|
||||
|
count := 0 |
||||
|
for _, groupName := range c.getAllBrokerGroups() { |
||||
|
nodes := c.cluster.ListClusterNode(cluster.FilerGroupName(groupName), cluster.BrokerType) |
||||
|
count += len(nodes) |
||||
|
} |
||||
|
return count |
||||
|
} |
||||
|
|
||||
|
// getAllFilerGroups returns all filer group names
|
||||
|
func (c *Collector) getAllFilerGroups() []string { |
||||
|
// For simplicity, we check the default group
|
||||
|
// In a more sophisticated implementation, we could enumerate all groups
|
||||
|
return []string{""} |
||||
|
} |
||||
|
|
||||
|
// getAllBrokerGroups returns all broker group names
|
||||
|
func (c *Collector) getAllBrokerGroups() []string { |
||||
|
// For simplicity, we check the default group
|
||||
|
// In a more sophisticated implementation, we could enumerate all groups
|
||||
|
return []string{""} |
||||
|
} |
||||
|
|
||||
|
// DetermineDeployment determines the deployment type based on configuration
|
||||
|
func DetermineDeployment(isMasterEnabled, isVolumeEnabled bool, peerCount int) string { |
||||
|
if isMasterEnabled && isVolumeEnabled { |
||||
|
if peerCount > 1 { |
||||
|
return "cluster" |
||||
|
} |
||||
|
return "standalone" |
||||
|
} |
||||
|
if isMasterEnabled { |
||||
|
return "master-only" |
||||
|
} |
||||
|
if isVolumeEnabled { |
||||
|
return "volume-only" |
||||
|
} |
||||
|
return "unknown" |
||||
|
} |
Write
Preview
Loading…
Cancel
Save
Reference in new issue