From 5f2244d25db7a0eafa496623d69bbc46e600f6be Mon Sep 17 00:00:00 2001 From: JARDEL ALVES Date: Fri, 20 Mar 2026 01:35:08 -0300 Subject: [PATCH] glog: add gzip compression for rotated log files (#8709) * glog: add gzip compression for rotated log files Add opt-in gzip compression that automatically compresses log files after rotation, reducing disk usage in long-running deployments. - Add --log_compress flag to enable compression at startup - Add SetCompressRotated()/IsCompressRotated() for runtime toggle - Compress rotated files in background goroutine (non-blocking) - Use gzip.BestSpeed for minimal CPU overhead - Fix .gz file cleanup: TrimSuffix approach correctly counts compressed files toward MaxFileCount limit - Include 6 unit tests covering normal, empty, large, and edge cases Enabled via --log_compress flag. Default behavior unchanged. * glog: fix compressFile to check gz/dst close errors and use atomic rename Write to a temp file (.gz.tmp) and rename atomically to prevent exposing partial archives. Check gz.Close() and dst.Close() errors to avoid deleting the original log when flush fails (e.g. ENOSPC). Use defer for robust resource cleanup. * glog: deduplicate .log/.log.gz pairs in rotation cleanup During concurrent compression, both foo.log and foo.log.gz can exist simultaneously. Count them as one entry against MaxFileCount to prevent premature eviction of rotated logs. * glog: use portable temp path in TestCompressFile_NonExistent Replace hardcoded /nonexistent/path with t.TempDir() for portability. --------- Co-authored-by: Copilot --- weed/glog/glog.go | 6 ++ weed/glog/glog_compress.go | 87 ++++++++++++++++++ weed/glog/glog_compress_test.go | 151 ++++++++++++++++++++++++++++++++ weed/glog/glog_file.go | 34 +++++-- 4 files changed, 270 insertions(+), 8 deletions(-) create mode 100644 weed/glog/glog_compress.go create mode 100644 weed/glog/glog_compress_test.go diff --git a/weed/glog/glog.go b/weed/glog/glog.go index 2206e42f3..de1e02f57 100644 --- a/weed/glog/glog.go +++ b/weed/glog/glog.go @@ -852,10 +852,16 @@ func (sb *syncBuffer) Write(p []byte) (n int, err error) { } // rotateFile closes the syncBuffer's file and starts a new one. +// If compression is enabled, the old file is gzipped in the background. func (sb *syncBuffer) rotateFile(now time.Time) error { if sb.file != nil { + oldName := sb.file.Name() sb.Flush() sb.file.Close() + // Compress the rotated file in background (non-blocking) + if IsCompressRotated() && oldName != "" { + go compressFile(oldName) + } } var err error sb.file, _, err = create(severityName[sb.sev], now) diff --git a/weed/glog/glog_compress.go b/weed/glog/glog_compress.go new file mode 100644 index 000000000..974699d92 --- /dev/null +++ b/weed/glog/glog_compress.go @@ -0,0 +1,87 @@ +package glog + +import ( + "compress/gzip" + "io" + "os" + "strings" + "sync/atomic" +) + +// CompressRotated controls whether rotated log files are gzip-compressed. +// 0 = disabled (default), 1 = enabled. +var compressRotated int32 + +// SetCompressRotated enables or disables gzip compression of rotated log files. +// When enabled, after a log file is rotated the old file is compressed in a +// background goroutine (non-blocking to the logging path). +func SetCompressRotated(enabled bool) { + if enabled { + atomic.StoreInt32(&compressRotated, 1) + } else { + atomic.StoreInt32(&compressRotated, 0) + } +} + +// IsCompressRotated returns whether compression of rotated files is active. +func IsCompressRotated() bool { + return atomic.LoadInt32(&compressRotated) == 1 +} + +// compressFile compresses the given file with gzip and removes the original. +// This runs in a background goroutine to avoid blocking the log write path. +// If any step fails the original file is kept intact. +func compressFile(path string) { + // Skip if already compressed + if strings.HasSuffix(path, ".gz") { + return + } + + src, err := os.Open(path) + if err != nil { + return + } + defer src.Close() + + dstPath := path + ".gz" + tmpPath := dstPath + ".tmp" + dst, err := os.Create(tmpPath) + if err != nil { + return + } + + var success bool + defer func() { + dst.Close() + if !success { + os.Remove(tmpPath) + } + }() + + gz, err := gzip.NewWriterLevel(dst, gzip.BestSpeed) + if err != nil { + return + } + + if _, err = io.Copy(gz, src); err != nil { + gz.Close() + return + } + + if err = gz.Close(); err != nil { + return + } + + if err = dst.Close(); err != nil { + return + } + + if err = os.Rename(tmpPath, dstPath); err != nil { + return + } + + success = true + + // Only remove original after successful compression + os.Remove(path) +} diff --git a/weed/glog/glog_compress_test.go b/weed/glog/glog_compress_test.go new file mode 100644 index 000000000..4429eb848 --- /dev/null +++ b/weed/glog/glog_compress_test.go @@ -0,0 +1,151 @@ +package glog + +import ( + "compress/gzip" + "io" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestCompressRotated_Toggle(t *testing.T) { + if IsCompressRotated() { + t.Error("compression should be off by default") + } + + SetCompressRotated(true) + if !IsCompressRotated() { + t.Error("compression should be on after SetCompressRotated(true)") + } + + SetCompressRotated(false) + if IsCompressRotated() { + t.Error("compression should be off after SetCompressRotated(false)") + } +} + +func TestCompressFile(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "test.log") + + content := "line 1\nline 2\nline 3\n" + if err := os.WriteFile(path, []byte(content), 0644); err != nil { + t.Fatalf("failed to write test file: %v", err) + } + + compressFile(path) + + // Original should be deleted + if _, err := os.Stat(path); !os.IsNotExist(err) { + t.Error("original file should be deleted after compression") + } + + // .gz should exist + gzPath := path + ".gz" + if _, err := os.Stat(gzPath); err != nil { + t.Fatalf(".gz file should exist: %v", err) + } + + // Verify .gz content + f, err := os.Open(gzPath) + if err != nil { + t.Fatalf("failed to open .gz: %v", err) + } + defer f.Close() + + gz, err := gzip.NewReader(f) + if err != nil { + t.Fatalf("failed to create gzip reader: %v", err) + } + defer gz.Close() + + decompressed, err := io.ReadAll(gz) + if err != nil { + t.Fatalf("failed to decompress: %v", err) + } + + if string(decompressed) != content { + t.Errorf("decompressed content mismatch: got %q, want %q", string(decompressed), content) + } +} + +func TestCompressFile_AlreadyGz(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "test.log.gz") + + if err := os.WriteFile(path, []byte("fake gz"), 0644); err != nil { + t.Fatal(err) + } + + // Should skip .gz files + compressFile(path) + + // File should still exist unchanged + data, err := os.ReadFile(path) + if err != nil { + t.Fatal(err) + } + if string(data) != "fake gz" { + t.Error("should not modify .gz files") + } + + // No .gz.gz should exist + if _, err := os.Stat(path + ".gz"); !os.IsNotExist(err) { + t.Error(".gz.gz should not be created") + } +} + +func TestCompressFile_NonExistent(t *testing.T) { + // Should not panic on missing file + missing := filepath.Join(t.TempDir(), "does-not-exist.log") + compressFile(missing) +} + +func TestCompressFile_EmptyFile(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "empty.log") + + if err := os.WriteFile(path, []byte{}, 0644); err != nil { + t.Fatal(err) + } + + compressFile(path) + + // .gz should exist even for empty files + gzPath := path + ".gz" + if _, err := os.Stat(gzPath); err != nil { + t.Errorf(".gz should exist for empty file: %v", err) + } + + // Original should be removed + if _, err := os.Stat(path); !os.IsNotExist(err) { + t.Error("original empty file should be deleted") + } +} + +func TestCompressFile_LargeContent(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "large.log") + + // Write 1MB of repetitive content (compresses well) + content := strings.Repeat("I0319 12:00:00.000000 12345 server.go:42] repeated log line\n", 15000) + if err := os.WriteFile(path, []byte(content), 0644); err != nil { + t.Fatal(err) + } + + origSize := int64(len(content)) + compressFile(path) + + gzPath := path + ".gz" + info, err := os.Stat(gzPath) + if err != nil { + t.Fatalf(".gz missing: %v", err) + } + + // Compressed should be significantly smaller + ratio := float64(info.Size()) / float64(origSize) * 100 + if ratio > 20 { + t.Errorf("compression ratio too high: %.1f%% (expected < 20%% for repetitive logs)", ratio) + } +} diff --git a/weed/glog/glog_file.go b/weed/glog/glog_file.go index e0de6e958..5911e0f2f 100644 --- a/weed/glog/glog_file.go +++ b/weed/glog/glog_file.go @@ -64,6 +64,10 @@ var logMaxFiles = flag.Int("log_max_files", 5, "Maximum number of log files to k // The default is 168 hours (7 days). Set to 0 to disable time-based rotation. var logRotateHours = flag.Int("log_rotate_hours", 168, "Rotate log files after this many hours (default: 168 = 7 days, 0 = disabled)") +// logCompress enables gzip compression of rotated log files. +// Compressed files get a .gz suffix. Compression runs in the background. +var logCompress = flag.Bool("log_compress", false, "Gzip-compress rotated log files to save disk space") + func createLogDirs() { // Apply flag values now that flags have been parsed. if *logMaxSizeMB > 0 { @@ -73,6 +77,10 @@ func createLogDirs() { MaxFileCount = *logMaxFiles } + if *logCompress { + SetCompressRotated(true) + } + if *logDir != "" { logDirs = append(logDirs, *logDir) } else { @@ -160,21 +168,31 @@ func create(tag string, t time.Time) (f *os.File, filename string, err error) { var lastErr error for _, dir := range logDirs { - // remove old logs + // remove old logs (including .gz compressed rotated files) + // Deduplicate .log/.log.gz pairs so concurrent compression + // doesn't cause double-counting against MaxFileCount. entries, _ := os.ReadDir(dir) - var previousLogs []string + previousLogs := make(map[string][]string) // bare name -> actual file names for _, entry := range entries { - if strings.HasPrefix(entry.Name(), logPrefix) { - previousLogs = append(previousLogs, entry.Name()) + name := entry.Name() + bare := strings.TrimSuffix(name, ".gz") + if strings.HasPrefix(bare, logPrefix) { + previousLogs[bare] = append(previousLogs[bare], name) } } if len(previousLogs) >= MaxFileCount { - sort.Strings(previousLogs) - for i, entry := range previousLogs { - if i > len(previousLogs)-MaxFileCount { + var keys []string + for bare := range previousLogs { + keys = append(keys, bare) + } + sort.Strings(keys) + for i, bare := range keys { + if i > len(keys)-MaxFileCount { break } - os.Remove(filepath.Join(dir, entry)) + for _, name := range previousLogs[bare] { + os.Remove(filepath.Join(dir, name)) + } } }