Browse Source

glog: add gzip compression for rotated log files (#8709)

* glog: add gzip compression for rotated log files

Add opt-in gzip compression that automatically compresses log files
after rotation, reducing disk usage in long-running deployments.

- Add --log_compress flag to enable compression at startup
- Add SetCompressRotated()/IsCompressRotated() for runtime toggle
- Compress rotated files in background goroutine (non-blocking)
- Use gzip.BestSpeed for minimal CPU overhead
- Fix .gz file cleanup: TrimSuffix approach correctly counts
  compressed files toward MaxFileCount limit
- Include 6 unit tests covering normal, empty, large, and edge cases

Enabled via --log_compress flag. Default behavior unchanged.

* glog: fix compressFile to check gz/dst close errors and use atomic rename

Write to a temp file (.gz.tmp) and rename atomically to prevent
exposing partial archives. Check gz.Close() and dst.Close() errors
to avoid deleting the original log when flush fails (e.g. ENOSPC).
Use defer for robust resource cleanup.

* glog: deduplicate .log/.log.gz pairs in rotation cleanup

During concurrent compression, both foo.log and foo.log.gz can exist
simultaneously. Count them as one entry against MaxFileCount to prevent
premature eviction of rotated logs.

* glog: use portable temp path in TestCompressFile_NonExistent

Replace hardcoded /nonexistent/path with t.TempDir() for portability.

---------

Co-authored-by: Copilot <copilot@github.com>
pull/4975/merge
JARDEL ALVES 3 days ago
committed by GitHub
parent
commit
5f2244d25d
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 6
      weed/glog/glog.go
  2. 87
      weed/glog/glog_compress.go
  3. 151
      weed/glog/glog_compress_test.go
  4. 34
      weed/glog/glog_file.go

6
weed/glog/glog.go

@ -852,10 +852,16 @@ func (sb *syncBuffer) Write(p []byte) (n int, err error) {
}
// rotateFile closes the syncBuffer's file and starts a new one.
// If compression is enabled, the old file is gzipped in the background.
func (sb *syncBuffer) rotateFile(now time.Time) error {
if sb.file != nil {
oldName := sb.file.Name()
sb.Flush()
sb.file.Close()
// Compress the rotated file in background (non-blocking)
if IsCompressRotated() && oldName != "" {
go compressFile(oldName)
}
}
var err error
sb.file, _, err = create(severityName[sb.sev], now)

87
weed/glog/glog_compress.go

@ -0,0 +1,87 @@
package glog
import (
"compress/gzip"
"io"
"os"
"strings"
"sync/atomic"
)
// CompressRotated controls whether rotated log files are gzip-compressed.
// 0 = disabled (default), 1 = enabled.
var compressRotated int32
// SetCompressRotated enables or disables gzip compression of rotated log files.
// When enabled, after a log file is rotated the old file is compressed in a
// background goroutine (non-blocking to the logging path).
func SetCompressRotated(enabled bool) {
if enabled {
atomic.StoreInt32(&compressRotated, 1)
} else {
atomic.StoreInt32(&compressRotated, 0)
}
}
// IsCompressRotated returns whether compression of rotated files is active.
func IsCompressRotated() bool {
return atomic.LoadInt32(&compressRotated) == 1
}
// compressFile compresses the given file with gzip and removes the original.
// This runs in a background goroutine to avoid blocking the log write path.
// If any step fails the original file is kept intact.
func compressFile(path string) {
// Skip if already compressed
if strings.HasSuffix(path, ".gz") {
return
}
src, err := os.Open(path)
if err != nil {
return
}
defer src.Close()
dstPath := path + ".gz"
tmpPath := dstPath + ".tmp"
dst, err := os.Create(tmpPath)
if err != nil {
return
}
var success bool
defer func() {
dst.Close()
if !success {
os.Remove(tmpPath)
}
}()
gz, err := gzip.NewWriterLevel(dst, gzip.BestSpeed)
if err != nil {
return
}
if _, err = io.Copy(gz, src); err != nil {
gz.Close()
return
}
if err = gz.Close(); err != nil {
return
}
if err = dst.Close(); err != nil {
return
}
if err = os.Rename(tmpPath, dstPath); err != nil {
return
}
success = true
// Only remove original after successful compression
os.Remove(path)
}

151
weed/glog/glog_compress_test.go

@ -0,0 +1,151 @@
package glog
import (
"compress/gzip"
"io"
"os"
"path/filepath"
"strings"
"testing"
)
func TestCompressRotated_Toggle(t *testing.T) {
if IsCompressRotated() {
t.Error("compression should be off by default")
}
SetCompressRotated(true)
if !IsCompressRotated() {
t.Error("compression should be on after SetCompressRotated(true)")
}
SetCompressRotated(false)
if IsCompressRotated() {
t.Error("compression should be off after SetCompressRotated(false)")
}
}
func TestCompressFile(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "test.log")
content := "line 1\nline 2\nline 3\n"
if err := os.WriteFile(path, []byte(content), 0644); err != nil {
t.Fatalf("failed to write test file: %v", err)
}
compressFile(path)
// Original should be deleted
if _, err := os.Stat(path); !os.IsNotExist(err) {
t.Error("original file should be deleted after compression")
}
// .gz should exist
gzPath := path + ".gz"
if _, err := os.Stat(gzPath); err != nil {
t.Fatalf(".gz file should exist: %v", err)
}
// Verify .gz content
f, err := os.Open(gzPath)
if err != nil {
t.Fatalf("failed to open .gz: %v", err)
}
defer f.Close()
gz, err := gzip.NewReader(f)
if err != nil {
t.Fatalf("failed to create gzip reader: %v", err)
}
defer gz.Close()
decompressed, err := io.ReadAll(gz)
if err != nil {
t.Fatalf("failed to decompress: %v", err)
}
if string(decompressed) != content {
t.Errorf("decompressed content mismatch: got %q, want %q", string(decompressed), content)
}
}
func TestCompressFile_AlreadyGz(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "test.log.gz")
if err := os.WriteFile(path, []byte("fake gz"), 0644); err != nil {
t.Fatal(err)
}
// Should skip .gz files
compressFile(path)
// File should still exist unchanged
data, err := os.ReadFile(path)
if err != nil {
t.Fatal(err)
}
if string(data) != "fake gz" {
t.Error("should not modify .gz files")
}
// No .gz.gz should exist
if _, err := os.Stat(path + ".gz"); !os.IsNotExist(err) {
t.Error(".gz.gz should not be created")
}
}
func TestCompressFile_NonExistent(t *testing.T) {
// Should not panic on missing file
missing := filepath.Join(t.TempDir(), "does-not-exist.log")
compressFile(missing)
}
func TestCompressFile_EmptyFile(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "empty.log")
if err := os.WriteFile(path, []byte{}, 0644); err != nil {
t.Fatal(err)
}
compressFile(path)
// .gz should exist even for empty files
gzPath := path + ".gz"
if _, err := os.Stat(gzPath); err != nil {
t.Errorf(".gz should exist for empty file: %v", err)
}
// Original should be removed
if _, err := os.Stat(path); !os.IsNotExist(err) {
t.Error("original empty file should be deleted")
}
}
func TestCompressFile_LargeContent(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "large.log")
// Write 1MB of repetitive content (compresses well)
content := strings.Repeat("I0319 12:00:00.000000 12345 server.go:42] repeated log line\n", 15000)
if err := os.WriteFile(path, []byte(content), 0644); err != nil {
t.Fatal(err)
}
origSize := int64(len(content))
compressFile(path)
gzPath := path + ".gz"
info, err := os.Stat(gzPath)
if err != nil {
t.Fatalf(".gz missing: %v", err)
}
// Compressed should be significantly smaller
ratio := float64(info.Size()) / float64(origSize) * 100
if ratio > 20 {
t.Errorf("compression ratio too high: %.1f%% (expected < 20%% for repetitive logs)", ratio)
}
}

34
weed/glog/glog_file.go

@ -64,6 +64,10 @@ var logMaxFiles = flag.Int("log_max_files", 5, "Maximum number of log files to k
// The default is 168 hours (7 days). Set to 0 to disable time-based rotation.
var logRotateHours = flag.Int("log_rotate_hours", 168, "Rotate log files after this many hours (default: 168 = 7 days, 0 = disabled)")
// logCompress enables gzip compression of rotated log files.
// Compressed files get a .gz suffix. Compression runs in the background.
var logCompress = flag.Bool("log_compress", false, "Gzip-compress rotated log files to save disk space")
func createLogDirs() {
// Apply flag values now that flags have been parsed.
if *logMaxSizeMB > 0 {
@ -73,6 +77,10 @@ func createLogDirs() {
MaxFileCount = *logMaxFiles
}
if *logCompress {
SetCompressRotated(true)
}
if *logDir != "" {
logDirs = append(logDirs, *logDir)
} else {
@ -160,21 +168,31 @@ func create(tag string, t time.Time) (f *os.File, filename string, err error) {
var lastErr error
for _, dir := range logDirs {
// remove old logs
// remove old logs (including .gz compressed rotated files)
// Deduplicate .log/.log.gz pairs so concurrent compression
// doesn't cause double-counting against MaxFileCount.
entries, _ := os.ReadDir(dir)
var previousLogs []string
previousLogs := make(map[string][]string) // bare name -> actual file names
for _, entry := range entries {
if strings.HasPrefix(entry.Name(), logPrefix) {
previousLogs = append(previousLogs, entry.Name())
name := entry.Name()
bare := strings.TrimSuffix(name, ".gz")
if strings.HasPrefix(bare, logPrefix) {
previousLogs[bare] = append(previousLogs[bare], name)
}
}
if len(previousLogs) >= MaxFileCount {
sort.Strings(previousLogs)
for i, entry := range previousLogs {
if i > len(previousLogs)-MaxFileCount {
var keys []string
for bare := range previousLogs {
keys = append(keys, bare)
}
sort.Strings(keys)
for i, bare := range keys {
if i > len(keys)-MaxFileCount {
break
}
os.Remove(filepath.Join(dir, entry))
for _, name := range previousLogs[bare] {
os.Remove(filepath.Join(dir, name))
}
}
}

Loading…
Cancel
Save