Browse Source
merge current message queue code changes (#6201)
merge current message queue code changes (#6201)
* listing files to convert to parquet * write parquet files * save logs into parquet files * pass by value * compact logs into parquet format * can skip existing files * refactor * refactor * fix compilation * when no partition found * refactor * add untested parquet file read * rename package * refactor * rename files * remove unused * add merged log read func * parquet wants to know the file size * rewind by time * pass in stop ts * add stop ts * adjust log * minor * adjust log * skip .parquet files when reading message logs * skip non message files * Update subscriber_record.go * send messages * skip message data with only ts * skip non log files * update parquet-go package * ensure a valid record type * add new field to a record type * Update read_parquet_to_log.go * fix parquet file name generation * separating reading parquet and logs * add key field * add skipped logs * use in memory cache * refactor * refactor * refactor * refactor, and change compact log * refactor * rename * refactor * fix format * prefix v to version directorypull/6209/head
Chris Lu
2 months ago
committed by
GitHub
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
33 changed files with 1106 additions and 264 deletions
-
2go.mod
-
2go.sum
-
4weed/command/benchmark.go
-
18weed/command/upload.go
-
4weed/filer/reader_at.go
-
5weed/filer/topics.go
-
36weed/filer_client/filer_client_accessor.go
-
3weed/mq/broker/broker_grpc_assign.go
-
2weed/mq/broker/broker_grpc_configure.go
-
5weed/mq/broker/broker_grpc_pub_follow.go
-
9weed/mq/broker/broker_grpc_sub_follow.go
-
5weed/mq/broker/broker_topic_conf_read_write.go
-
137weed/mq/broker/broker_topic_partition_read_write.go
-
13weed/mq/client/cmd/weed_pub_record/publisher_record.go
-
15weed/mq/client/cmd/weed_sub_record/subscriber_record.go
-
12weed/mq/client/sub_client/on_each_partition.go
-
1weed/mq/client/sub_client/subscriber.go
-
454weed/mq/logstore/log_to_parquet.go
-
41weed/mq/logstore/merged_read.go
-
144weed/mq/logstore/read_log_from_disk.go
-
162weed/mq/logstore/read_parquet_to_log.go
-
4weed/mq/pub_balancer/broker_stats.go
-
7weed/mq/schema/schema_builder.go
-
10weed/mq/sub_coordinator/partition_consumer_mapping.go
-
14weed/mq/sub_coordinator/partition_list.go
-
2weed/mq/topic/local_manager.go
-
1weed/mq/topic/local_partition.go
-
18weed/mq/topic/partition.go
-
44weed/mq/topic/topic.go
-
2weed/mq/topic/topic_partition.go
-
80weed/operation/submit.go
-
94weed/shell/command_mq_topic_compact.go
-
20weed/util/chunk_cache/chunk_cache_in_memory.go
@ -1,6 +1,7 @@ |
|||||
package filer |
package filer |
||||
|
|
||||
const ( |
const ( |
||||
TopicsDir = "/topics" |
|
||||
SystemLogDir = TopicsDir + "/.system/log" |
|
||||
|
TopicsDir = "/topics" |
||||
|
SystemLogDir = TopicsDir + "/.system/log" |
||||
|
TopicConfFile = "topic.conf" |
||||
) |
) |
@ -0,0 +1,454 @@ |
|||||
|
package logstore |
||||
|
|
||||
|
import ( |
||||
|
"encoding/binary" |
||||
|
"fmt" |
||||
|
"github.com/parquet-go/parquet-go" |
||||
|
"github.com/parquet-go/parquet-go/compress/zstd" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/filer" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/mq/schema" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/mq/topic" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/operation" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/pb/schema_pb" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/util" |
||||
|
util_http "github.com/seaweedfs/seaweedfs/weed/util/http" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/util/log_buffer" |
||||
|
"google.golang.org/protobuf/proto" |
||||
|
"io" |
||||
|
"os" |
||||
|
"strings" |
||||
|
"time" |
||||
|
) |
||||
|
|
||||
|
const ( |
||||
|
SW_COLUMN_NAME_TS = "_ts_ns" |
||||
|
SW_COLUMN_NAME_KEY = "_key" |
||||
|
) |
||||
|
|
||||
|
func CompactTopicPartitions(filerClient filer_pb.FilerClient, t topic.Topic, timeAgo time.Duration, recordType *schema_pb.RecordType, preference *operation.StoragePreference) error { |
||||
|
// list the topic partition versions
|
||||
|
topicVersions, err := collectTopicVersions(filerClient, t, timeAgo) |
||||
|
if err != nil { |
||||
|
return fmt.Errorf("list topic files: %v", err) |
||||
|
} |
||||
|
|
||||
|
// compact the partitions
|
||||
|
for _, topicVersion := range topicVersions { |
||||
|
partitions, err := collectTopicVersionsPartitions(filerClient, t, topicVersion) |
||||
|
if err != nil { |
||||
|
return fmt.Errorf("list partitions %s/%s/%s: %v", t.Namespace, t.Name, topicVersion, err) |
||||
|
} |
||||
|
for _, partition := range partitions { |
||||
|
err := compactTopicPartition(filerClient, t, timeAgo, recordType, partition, preference) |
||||
|
if err != nil { |
||||
|
return fmt.Errorf("compact partition %s/%s/%s/%s: %v", t.Namespace, t.Name, topicVersion, partition, err) |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
return nil |
||||
|
} |
||||
|
|
||||
|
func collectTopicVersions(filerClient filer_pb.FilerClient, t topic.Topic, timeAgo time.Duration) (partitionVersions []time.Time, err error) { |
||||
|
err = filer_pb.ReadDirAllEntries(filerClient, util.FullPath(t.Dir()), "", func(entry *filer_pb.Entry, isLast bool) error { |
||||
|
t, err := topic.ParseTopicVersion(entry.Name) |
||||
|
if err != nil { |
||||
|
// skip non-partition directories
|
||||
|
return nil |
||||
|
} |
||||
|
if t.Unix() < time.Now().Unix()-int64(timeAgo/time.Second) { |
||||
|
partitionVersions = append(partitionVersions, t) |
||||
|
} |
||||
|
return nil |
||||
|
}) |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
func collectTopicVersionsPartitions(filerClient filer_pb.FilerClient, t topic.Topic, topicVersion time.Time) (partitions []topic.Partition, err error) { |
||||
|
version := topicVersion.Format(topic.PartitionGenerationFormat) |
||||
|
err = filer_pb.ReadDirAllEntries(filerClient, util.FullPath(t.Dir()).Child(version), "", func(entry *filer_pb.Entry, isLast bool) error { |
||||
|
if !entry.IsDirectory { |
||||
|
return nil |
||||
|
} |
||||
|
start, stop := topic.ParsePartitionBoundary(entry.Name) |
||||
|
if start != stop { |
||||
|
partitions = append(partitions, topic.Partition{ |
||||
|
RangeStart: start, |
||||
|
RangeStop: stop, |
||||
|
RingSize: topic.PartitionCount, |
||||
|
UnixTimeNs: topicVersion.UnixNano(), |
||||
|
}) |
||||
|
} |
||||
|
return nil |
||||
|
}) |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
func compactTopicPartition(filerClient filer_pb.FilerClient, t topic.Topic, timeAgo time.Duration, recordType *schema_pb.RecordType, partition topic.Partition, preference *operation.StoragePreference) error { |
||||
|
partitionDir := topic.PartitionDir(t, partition) |
||||
|
|
||||
|
// compact the partition directory
|
||||
|
return compactTopicPartitionDir(filerClient, t.Name, partitionDir, timeAgo, recordType, preference) |
||||
|
} |
||||
|
|
||||
|
func compactTopicPartitionDir(filerClient filer_pb.FilerClient, topicName, partitionDir string, timeAgo time.Duration, recordType *schema_pb.RecordType, preference *operation.StoragePreference) error { |
||||
|
// read all existing parquet files
|
||||
|
minTsNs, maxTsNs, err := readAllParquetFiles(filerClient, partitionDir) |
||||
|
if err != nil { |
||||
|
return err |
||||
|
} |
||||
|
|
||||
|
// read all log files
|
||||
|
logFiles, err := readAllLogFiles(filerClient, partitionDir, timeAgo, minTsNs, maxTsNs) |
||||
|
if err != nil { |
||||
|
return err |
||||
|
} |
||||
|
if len(logFiles) == 0 { |
||||
|
return nil |
||||
|
} |
||||
|
|
||||
|
// divide log files into groups of 128MB
|
||||
|
logFileGroups := groupFilesBySize(logFiles, 128*1024*1024) |
||||
|
|
||||
|
// write to parquet file
|
||||
|
parquetLevels, err := schema.ToParquetLevels(recordType) |
||||
|
if err != nil { |
||||
|
return fmt.Errorf("ToParquetLevels failed %+v: %v", recordType, err) |
||||
|
} |
||||
|
|
||||
|
// create a parquet schema
|
||||
|
parquetSchema, err := schema.ToParquetSchema(topicName, recordType) |
||||
|
if err != nil { |
||||
|
return fmt.Errorf("ToParquetSchema failed: %v", err) |
||||
|
} |
||||
|
|
||||
|
// TODO parallelize the writing
|
||||
|
for _, logFileGroup := range logFileGroups { |
||||
|
if err = writeLogFilesToParquet(filerClient, partitionDir, recordType, logFileGroup, parquetSchema, parquetLevels, preference); err != nil { |
||||
|
return err |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return nil |
||||
|
} |
||||
|
|
||||
|
func groupFilesBySize(logFiles []*filer_pb.Entry, maxGroupSize int64) (logFileGroups [][]*filer_pb.Entry) { |
||||
|
var logFileGroup []*filer_pb.Entry |
||||
|
var groupSize int64 |
||||
|
for _, logFile := range logFiles { |
||||
|
if groupSize+int64(logFile.Attributes.FileSize) > maxGroupSize { |
||||
|
logFileGroups = append(logFileGroups, logFileGroup) |
||||
|
logFileGroup = nil |
||||
|
groupSize = 0 |
||||
|
} |
||||
|
logFileGroup = append(logFileGroup, logFile) |
||||
|
groupSize += int64(logFile.Attributes.FileSize) |
||||
|
} |
||||
|
if len(logFileGroup) > 0 { |
||||
|
logFileGroups = append(logFileGroups, logFileGroup) |
||||
|
} |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
func readAllLogFiles(filerClient filer_pb.FilerClient, partitionDir string, timeAgo time.Duration, minTsNs, maxTsNs int64) (logFiles []*filer_pb.Entry, err error) { |
||||
|
err = filer_pb.ReadDirAllEntries(filerClient, util.FullPath(partitionDir), "", func(entry *filer_pb.Entry, isLast bool) error { |
||||
|
if strings.HasSuffix(entry.Name, ".parquet") { |
||||
|
return nil |
||||
|
} |
||||
|
if entry.Attributes.Crtime > time.Now().Unix()-int64(timeAgo/time.Second) { |
||||
|
return nil |
||||
|
} |
||||
|
logTime, err := time.Parse(topic.TIME_FORMAT, entry.Name) |
||||
|
if err != nil { |
||||
|
// glog.Warningf("parse log time %s: %v", entry.Name, err)
|
||||
|
return nil |
||||
|
} |
||||
|
if maxTsNs > 0 && logTime.UnixNano() <= maxTsNs { |
||||
|
return nil |
||||
|
} |
||||
|
logFiles = append(logFiles, entry) |
||||
|
return nil |
||||
|
}) |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
func readAllParquetFiles(filerClient filer_pb.FilerClient, partitionDir string) (minTsNs, maxTsNs int64, err error) { |
||||
|
err = filer_pb.ReadDirAllEntries(filerClient, util.FullPath(partitionDir), "", func(entry *filer_pb.Entry, isLast bool) error { |
||||
|
if !strings.HasSuffix(entry.Name, ".parquet") { |
||||
|
return nil |
||||
|
} |
||||
|
if len(entry.Extended) == 0 { |
||||
|
return nil |
||||
|
} |
||||
|
|
||||
|
// read min ts
|
||||
|
minTsBytes := entry.Extended["min"] |
||||
|
if len(minTsBytes) != 8 { |
||||
|
return nil |
||||
|
} |
||||
|
minTs := int64(binary.BigEndian.Uint64(minTsBytes)) |
||||
|
if minTsNs == 0 || minTs < minTsNs { |
||||
|
minTsNs = minTs |
||||
|
} |
||||
|
|
||||
|
// read max ts
|
||||
|
maxTsBytes := entry.Extended["max"] |
||||
|
if len(maxTsBytes) != 8 { |
||||
|
return nil |
||||
|
} |
||||
|
maxTs := int64(binary.BigEndian.Uint64(maxTsBytes)) |
||||
|
if maxTsNs == 0 || maxTs > maxTsNs { |
||||
|
maxTsNs = maxTs |
||||
|
} |
||||
|
return nil |
||||
|
}) |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
func writeLogFilesToParquet(filerClient filer_pb.FilerClient, partitionDir string, recordType *schema_pb.RecordType, logFileGroups []*filer_pb.Entry, parquetSchema *parquet.Schema, parquetLevels *schema.ParquetLevels, preference *operation.StoragePreference) (err error) { |
||||
|
|
||||
|
tempFile, err := os.CreateTemp(".", "t*.parquet") |
||||
|
if err != nil { |
||||
|
return fmt.Errorf("create temp file: %v", err) |
||||
|
} |
||||
|
defer func() { |
||||
|
tempFile.Close() |
||||
|
os.Remove(tempFile.Name()) |
||||
|
}() |
||||
|
|
||||
|
writer := parquet.NewWriter(tempFile, parquetSchema, parquet.Compression(&zstd.Codec{Level: zstd.DefaultLevel})) |
||||
|
rowBuilder := parquet.NewRowBuilder(parquetSchema) |
||||
|
|
||||
|
var startTsNs, stopTsNs int64 |
||||
|
|
||||
|
for _, logFile := range logFileGroups { |
||||
|
fmt.Printf("compact %s/%s ", partitionDir, logFile.Name) |
||||
|
var rows []parquet.Row |
||||
|
if err := iterateLogEntries(filerClient, logFile, func(entry *filer_pb.LogEntry) error { |
||||
|
|
||||
|
if startTsNs == 0 { |
||||
|
startTsNs = entry.TsNs |
||||
|
} |
||||
|
stopTsNs = entry.TsNs |
||||
|
|
||||
|
if len(entry.Key) == 0 { |
||||
|
return nil |
||||
|
} |
||||
|
|
||||
|
// write to parquet file
|
||||
|
rowBuilder.Reset() |
||||
|
|
||||
|
record := &schema_pb.RecordValue{} |
||||
|
if err := proto.Unmarshal(entry.Data, record); err != nil { |
||||
|
return fmt.Errorf("unmarshal record value: %v", err) |
||||
|
} |
||||
|
|
||||
|
record.Fields[SW_COLUMN_NAME_TS] = &schema_pb.Value{ |
||||
|
Kind: &schema_pb.Value_Int64Value{ |
||||
|
Int64Value: entry.TsNs, |
||||
|
}, |
||||
|
} |
||||
|
record.Fields[SW_COLUMN_NAME_KEY] = &schema_pb.Value{ |
||||
|
Kind: &schema_pb.Value_BytesValue{ |
||||
|
BytesValue: entry.Key, |
||||
|
}, |
||||
|
} |
||||
|
|
||||
|
if err := schema.AddRecordValue(rowBuilder, recordType, parquetLevels, record); err != nil { |
||||
|
return fmt.Errorf("add record value: %v", err) |
||||
|
} |
||||
|
|
||||
|
rows = append(rows, rowBuilder.Row()) |
||||
|
|
||||
|
return nil |
||||
|
|
||||
|
}); err != nil { |
||||
|
return fmt.Errorf("iterate log entry %v/%v: %v", partitionDir, logFile.Name, err) |
||||
|
} |
||||
|
|
||||
|
fmt.Printf("processed %d rows\n", len(rows)) |
||||
|
|
||||
|
if _, err := writer.WriteRows(rows); err != nil { |
||||
|
return fmt.Errorf("write rows: %v", err) |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if err := writer.Close(); err != nil { |
||||
|
return fmt.Errorf("close writer: %v", err) |
||||
|
} |
||||
|
|
||||
|
// write to parquet file to partitionDir
|
||||
|
parquetFileName := fmt.Sprintf("%s.parquet", time.Unix(0, startTsNs).UTC().Format("2006-01-02-15-04-05")) |
||||
|
if err := saveParquetFileToPartitionDir(filerClient, tempFile, partitionDir, parquetFileName, preference, startTsNs, stopTsNs); err != nil { |
||||
|
return fmt.Errorf("save parquet file %s: %v", parquetFileName, err) |
||||
|
} |
||||
|
|
||||
|
return nil |
||||
|
|
||||
|
} |
||||
|
|
||||
|
func saveParquetFileToPartitionDir(filerClient filer_pb.FilerClient, sourceFile *os.File, partitionDir, parquetFileName string, preference *operation.StoragePreference, startTsNs, stopTsNs int64) error { |
||||
|
uploader, err := operation.NewUploader() |
||||
|
if err != nil { |
||||
|
return fmt.Errorf("new uploader: %v", err) |
||||
|
} |
||||
|
|
||||
|
// get file size
|
||||
|
fileInfo, err := sourceFile.Stat() |
||||
|
if err != nil { |
||||
|
return fmt.Errorf("stat source file: %v", err) |
||||
|
} |
||||
|
|
||||
|
// upload file in chunks
|
||||
|
chunkSize := int64(4 * 1024 * 1024) |
||||
|
chunkCount := (fileInfo.Size() + chunkSize - 1) / chunkSize |
||||
|
entry := &filer_pb.Entry{ |
||||
|
Name: parquetFileName, |
||||
|
Attributes: &filer_pb.FuseAttributes{ |
||||
|
Crtime: time.Now().Unix(), |
||||
|
Mtime: time.Now().Unix(), |
||||
|
FileMode: uint32(os.FileMode(0644)), |
||||
|
FileSize: uint64(fileInfo.Size()), |
||||
|
Mime: "application/vnd.apache.parquet", |
||||
|
}, |
||||
|
} |
||||
|
entry.Extended = make(map[string][]byte) |
||||
|
minTsBytes := make([]byte, 8) |
||||
|
binary.BigEndian.PutUint64(minTsBytes, uint64(startTsNs)) |
||||
|
entry.Extended["min"] = minTsBytes |
||||
|
maxTsBytes := make([]byte, 8) |
||||
|
binary.BigEndian.PutUint64(maxTsBytes, uint64(stopTsNs)) |
||||
|
entry.Extended["max"] = maxTsBytes |
||||
|
|
||||
|
for i := int64(0); i < chunkCount; i++ { |
||||
|
fileId, uploadResult, err, _ := uploader.UploadWithRetry( |
||||
|
filerClient, |
||||
|
&filer_pb.AssignVolumeRequest{ |
||||
|
Count: 1, |
||||
|
Replication: preference.Replication, |
||||
|
Collection: preference.Collection, |
||||
|
TtlSec: 0, // TODO set ttl
|
||||
|
DiskType: preference.DiskType, |
||||
|
Path: partitionDir + "/" + parquetFileName, |
||||
|
}, |
||||
|
&operation.UploadOption{ |
||||
|
Filename: parquetFileName, |
||||
|
Cipher: false, |
||||
|
IsInputCompressed: false, |
||||
|
MimeType: "application/vnd.apache.parquet", |
||||
|
PairMap: nil, |
||||
|
}, |
||||
|
func(host, fileId string) string { |
||||
|
return fmt.Sprintf("http://%s/%s", host, fileId) |
||||
|
}, |
||||
|
io.NewSectionReader(sourceFile, i*chunkSize, chunkSize), |
||||
|
) |
||||
|
if err != nil { |
||||
|
return fmt.Errorf("upload chunk %d: %v", i, err) |
||||
|
} |
||||
|
if uploadResult.Error != "" { |
||||
|
return fmt.Errorf("upload result: %v", uploadResult.Error) |
||||
|
} |
||||
|
entry.Chunks = append(entry.Chunks, uploadResult.ToPbFileChunk(fileId, i*chunkSize, time.Now().UnixNano())) |
||||
|
} |
||||
|
|
||||
|
// write the entry to partitionDir
|
||||
|
if err := filerClient.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error { |
||||
|
return filer_pb.CreateEntry(client, &filer_pb.CreateEntryRequest{ |
||||
|
Directory: partitionDir, |
||||
|
Entry: entry, |
||||
|
}) |
||||
|
}); err != nil { |
||||
|
return fmt.Errorf("create entry: %v", err) |
||||
|
} |
||||
|
fmt.Printf("saved to %s/%s\n", partitionDir, parquetFileName) |
||||
|
|
||||
|
return nil |
||||
|
} |
||||
|
|
||||
|
func iterateLogEntries(filerClient filer_pb.FilerClient, logFile *filer_pb.Entry, eachLogEntryFn func(entry *filer_pb.LogEntry) error) error { |
||||
|
lookupFn := filer.LookupFn(filerClient) |
||||
|
_, err := eachFile(logFile, lookupFn, func(logEntry *filer_pb.LogEntry) (isDone bool, err error) { |
||||
|
if err := eachLogEntryFn(logEntry); err != nil { |
||||
|
return true, err |
||||
|
} |
||||
|
return false, nil |
||||
|
}) |
||||
|
return err |
||||
|
} |
||||
|
|
||||
|
func eachFile(entry *filer_pb.Entry, lookupFileIdFn func(fileId string) (targetUrls []string, err error), eachLogEntryFn log_buffer.EachLogEntryFuncType) (processedTsNs int64, err error) { |
||||
|
if len(entry.Content) > 0 { |
||||
|
// skip .offset files
|
||||
|
return |
||||
|
} |
||||
|
var urlStrings []string |
||||
|
for _, chunk := range entry.Chunks { |
||||
|
if chunk.Size == 0 { |
||||
|
continue |
||||
|
} |
||||
|
if chunk.IsChunkManifest { |
||||
|
fmt.Printf("this should not happen. unexpected chunk manifest in %s", entry.Name) |
||||
|
return |
||||
|
} |
||||
|
urlStrings, err = lookupFileIdFn(chunk.FileId) |
||||
|
if err != nil { |
||||
|
err = fmt.Errorf("lookup %s: %v", chunk.FileId, err) |
||||
|
return |
||||
|
} |
||||
|
if len(urlStrings) == 0 { |
||||
|
err = fmt.Errorf("no url found for %s", chunk.FileId) |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
// try one of the urlString until util.Get(urlString) succeeds
|
||||
|
var processed bool |
||||
|
for _, urlString := range urlStrings { |
||||
|
var data []byte |
||||
|
if data, _, err = util_http.Get(urlString); err == nil { |
||||
|
processed = true |
||||
|
if processedTsNs, err = eachChunk(data, eachLogEntryFn); err != nil { |
||||
|
return |
||||
|
} |
||||
|
break |
||||
|
} |
||||
|
} |
||||
|
if !processed { |
||||
|
err = fmt.Errorf("no data processed for %s %s", entry.Name, chunk.FileId) |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
} |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
func eachChunk(buf []byte, eachLogEntryFn log_buffer.EachLogEntryFuncType) (processedTsNs int64, err error) { |
||||
|
for pos := 0; pos+4 < len(buf); { |
||||
|
|
||||
|
size := util.BytesToUint32(buf[pos : pos+4]) |
||||
|
if pos+4+int(size) > len(buf) { |
||||
|
err = fmt.Errorf("reach each log chunk: read [%d,%d) from [0,%d)", pos, pos+int(size)+4, len(buf)) |
||||
|
return |
||||
|
} |
||||
|
entryData := buf[pos+4 : pos+4+int(size)] |
||||
|
|
||||
|
logEntry := &filer_pb.LogEntry{} |
||||
|
if err = proto.Unmarshal(entryData, logEntry); err != nil { |
||||
|
pos += 4 + int(size) |
||||
|
err = fmt.Errorf("unexpected unmarshal mq_pb.Message: %v", err) |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
if _, err = eachLogEntryFn(logEntry); err != nil { |
||||
|
err = fmt.Errorf("process log entry %v: %v", logEntry, err) |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
processedTsNs = logEntry.TsNs |
||||
|
|
||||
|
pos += 4 + int(size) |
||||
|
|
||||
|
} |
||||
|
|
||||
|
return |
||||
|
} |
@ -0,0 +1,41 @@ |
|||||
|
package logstore |
||||
|
|
||||
|
import ( |
||||
|
"github.com/seaweedfs/seaweedfs/weed/mq/topic" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/util/log_buffer" |
||||
|
) |
||||
|
|
||||
|
func GenMergedReadFunc(filerClient filer_pb.FilerClient, t topic.Topic, p topic.Partition) log_buffer.LogReadFromDiskFuncType { |
||||
|
fromParquetFn := GenParquetReadFunc(filerClient, t, p) |
||||
|
readLogDirectFn := GenLogOnDiskReadFunc(filerClient, t, p) |
||||
|
return mergeReadFuncs(fromParquetFn, readLogDirectFn) |
||||
|
} |
||||
|
|
||||
|
func mergeReadFuncs(fromParquetFn, readLogDirectFn log_buffer.LogReadFromDiskFuncType) log_buffer.LogReadFromDiskFuncType { |
||||
|
var exhaustedParquet bool |
||||
|
var lastProcessedPosition log_buffer.MessagePosition |
||||
|
return func(startPosition log_buffer.MessagePosition, stopTsNs int64, eachLogEntryFn log_buffer.EachLogEntryFuncType) (lastReadPosition log_buffer.MessagePosition, isDone bool, err error) { |
||||
|
if !exhaustedParquet { |
||||
|
// glog.V(4).Infof("reading from parquet startPosition: %v\n", startPosition.UTC())
|
||||
|
lastReadPosition, isDone, err = fromParquetFn(startPosition, stopTsNs, eachLogEntryFn) |
||||
|
// glog.V(4).Infof("read from parquet: %v %v %v %v\n", startPosition, lastReadPosition, isDone, err)
|
||||
|
if isDone { |
||||
|
isDone = false |
||||
|
} |
||||
|
if err != nil { |
||||
|
return |
||||
|
} |
||||
|
lastProcessedPosition = lastReadPosition |
||||
|
} |
||||
|
exhaustedParquet = true |
||||
|
|
||||
|
if startPosition.Before(lastProcessedPosition.Time) { |
||||
|
startPosition = lastProcessedPosition |
||||
|
} |
||||
|
|
||||
|
// glog.V(4).Infof("reading from direct log startPosition: %v\n", startPosition.UTC())
|
||||
|
lastReadPosition, isDone, err = readLogDirectFn(startPosition, stopTsNs, eachLogEntryFn) |
||||
|
return |
||||
|
} |
||||
|
} |
@ -0,0 +1,144 @@ |
|||||
|
package logstore |
||||
|
|
||||
|
import ( |
||||
|
"fmt" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/filer" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/glog" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/mq/topic" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/util" |
||||
|
util_http "github.com/seaweedfs/seaweedfs/weed/util/http" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/util/log_buffer" |
||||
|
"google.golang.org/protobuf/proto" |
||||
|
"math" |
||||
|
"strings" |
||||
|
"time" |
||||
|
) |
||||
|
|
||||
|
func GenLogOnDiskReadFunc(filerClient filer_pb.FilerClient, t topic.Topic, p topic.Partition) log_buffer.LogReadFromDiskFuncType { |
||||
|
partitionDir := topic.PartitionDir(t, p) |
||||
|
|
||||
|
lookupFileIdFn := filer.LookupFn(filerClient) |
||||
|
|
||||
|
eachChunkFn := func(buf []byte, eachLogEntryFn log_buffer.EachLogEntryFuncType, starTsNs, stopTsNs int64) (processedTsNs int64, err error) { |
||||
|
for pos := 0; pos+4 < len(buf); { |
||||
|
|
||||
|
size := util.BytesToUint32(buf[pos : pos+4]) |
||||
|
if pos+4+int(size) > len(buf) { |
||||
|
err = fmt.Errorf("GenLogOnDiskReadFunc: read [%d,%d) from [0,%d)", pos, pos+int(size)+4, len(buf)) |
||||
|
return |
||||
|
} |
||||
|
entryData := buf[pos+4 : pos+4+int(size)] |
||||
|
|
||||
|
logEntry := &filer_pb.LogEntry{} |
||||
|
if err = proto.Unmarshal(entryData, logEntry); err != nil { |
||||
|
pos += 4 + int(size) |
||||
|
err = fmt.Errorf("unexpected unmarshal mq_pb.Message: %v", err) |
||||
|
return |
||||
|
} |
||||
|
if logEntry.TsNs < starTsNs { |
||||
|
pos += 4 + int(size) |
||||
|
continue |
||||
|
} |
||||
|
if stopTsNs != 0 && logEntry.TsNs > stopTsNs { |
||||
|
println("stopTsNs", stopTsNs, "logEntry.TsNs", logEntry.TsNs) |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
// fmt.Printf(" read logEntry: %v, ts %v\n", string(logEntry.Key), time.Unix(0, logEntry.TsNs).UTC())
|
||||
|
if _, err = eachLogEntryFn(logEntry); err != nil { |
||||
|
err = fmt.Errorf("process log entry %v: %v", logEntry, err) |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
processedTsNs = logEntry.TsNs |
||||
|
|
||||
|
pos += 4 + int(size) |
||||
|
|
||||
|
} |
||||
|
|
||||
|
return |
||||
|
} |
||||
|
|
||||
|
eachFileFn := func(entry *filer_pb.Entry, eachLogEntryFn log_buffer.EachLogEntryFuncType, starTsNs, stopTsNs int64) (processedTsNs int64, err error) { |
||||
|
if len(entry.Content) > 0 { |
||||
|
// skip .offset files
|
||||
|
return |
||||
|
} |
||||
|
var urlStrings []string |
||||
|
for _, chunk := range entry.Chunks { |
||||
|
if chunk.Size == 0 { |
||||
|
continue |
||||
|
} |
||||
|
if chunk.IsChunkManifest { |
||||
|
glog.Warningf("this should not happen. unexpected chunk manifest in %s/%s", partitionDir, entry.Name) |
||||
|
return |
||||
|
} |
||||
|
urlStrings, err = lookupFileIdFn(chunk.FileId) |
||||
|
if err != nil { |
||||
|
err = fmt.Errorf("lookup %s: %v", chunk.FileId, err) |
||||
|
return |
||||
|
} |
||||
|
if len(urlStrings) == 0 { |
||||
|
err = fmt.Errorf("no url found for %s", chunk.FileId) |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
// try one of the urlString until util.Get(urlString) succeeds
|
||||
|
var processed bool |
||||
|
for _, urlString := range urlStrings { |
||||
|
// TODO optimization opportunity: reuse the buffer
|
||||
|
var data []byte |
||||
|
// fmt.Printf("reading %s/%s %s\n", partitionDir, entry.Name, urlString)
|
||||
|
if data, _, err = util_http.Get(urlString); err == nil { |
||||
|
processed = true |
||||
|
if processedTsNs, err = eachChunkFn(data, eachLogEntryFn, starTsNs, stopTsNs); err != nil { |
||||
|
return |
||||
|
} |
||||
|
break |
||||
|
} |
||||
|
} |
||||
|
if !processed { |
||||
|
err = fmt.Errorf("no data processed for %s %s", entry.Name, chunk.FileId) |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
} |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
return func(startPosition log_buffer.MessagePosition, stopTsNs int64, eachLogEntryFn log_buffer.EachLogEntryFuncType) (lastReadPosition log_buffer.MessagePosition, isDone bool, err error) { |
||||
|
startFileName := startPosition.UTC().Format(topic.TIME_FORMAT) |
||||
|
startTsNs := startPosition.Time.UnixNano() |
||||
|
stopTime := time.Unix(0, stopTsNs) |
||||
|
var processedTsNs int64 |
||||
|
err = filerClient.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error { |
||||
|
return filer_pb.SeaweedList(client, partitionDir, "", func(entry *filer_pb.Entry, isLast bool) error { |
||||
|
if entry.IsDirectory { |
||||
|
return nil |
||||
|
} |
||||
|
if strings.HasSuffix(entry.Name, ".parquet") { |
||||
|
return nil |
||||
|
} |
||||
|
// FIXME: this is a hack to skip the .offset files
|
||||
|
if strings.HasSuffix(entry.Name, ".offset") { |
||||
|
return nil |
||||
|
} |
||||
|
if stopTsNs != 0 && entry.Name > stopTime.UTC().Format(topic.TIME_FORMAT) { |
||||
|
isDone = true |
||||
|
return nil |
||||
|
} |
||||
|
if entry.Name < startPosition.UTC().Format(topic.TIME_FORMAT) { |
||||
|
return nil |
||||
|
} |
||||
|
if processedTsNs, err = eachFileFn(entry, eachLogEntryFn, startTsNs, stopTsNs); err != nil { |
||||
|
return err |
||||
|
} |
||||
|
return nil |
||||
|
|
||||
|
}, startFileName, true, math.MaxInt32) |
||||
|
}) |
||||
|
lastReadPosition = log_buffer.NewMessagePosition(processedTsNs, -2) |
||||
|
return |
||||
|
} |
||||
|
} |
@ -0,0 +1,162 @@ |
|||||
|
package logstore |
||||
|
|
||||
|
import ( |
||||
|
"encoding/binary" |
||||
|
"fmt" |
||||
|
"github.com/parquet-go/parquet-go" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/filer" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/mq/schema" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/mq/topic" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/pb/mq_pb" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/util/chunk_cache" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/util/log_buffer" |
||||
|
"google.golang.org/protobuf/proto" |
||||
|
"io" |
||||
|
"math" |
||||
|
"strings" |
||||
|
) |
||||
|
|
||||
|
var ( |
||||
|
chunkCache = chunk_cache.NewChunkCacheInMemory(256) // 256 entries, 8MB max per entry
|
||||
|
) |
||||
|
|
||||
|
func GenParquetReadFunc(filerClient filer_pb.FilerClient, t topic.Topic, p topic.Partition) log_buffer.LogReadFromDiskFuncType { |
||||
|
partitionDir := topic.PartitionDir(t, p) |
||||
|
|
||||
|
lookupFileIdFn := filer.LookupFn(filerClient) |
||||
|
|
||||
|
// read topic conf from filer
|
||||
|
var topicConf *mq_pb.ConfigureTopicResponse |
||||
|
var err error |
||||
|
if err := filerClient.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error { |
||||
|
topicConf, err = t.ReadConfFile(client) |
||||
|
return err |
||||
|
}); err != nil { |
||||
|
return nil |
||||
|
} |
||||
|
recordType := topicConf.GetRecordType() |
||||
|
recordType = schema.NewRecordTypeBuilder(recordType). |
||||
|
WithField(SW_COLUMN_NAME_TS, schema.TypeInt64). |
||||
|
WithField(SW_COLUMN_NAME_KEY, schema.TypeBytes). |
||||
|
RecordTypeEnd() |
||||
|
|
||||
|
parquetSchema, err := schema.ToParquetSchema(t.Name, recordType) |
||||
|
if err != nil { |
||||
|
return nil |
||||
|
} |
||||
|
parquetLevels, err := schema.ToParquetLevels(recordType) |
||||
|
if err != nil { |
||||
|
return nil |
||||
|
} |
||||
|
|
||||
|
// eachFileFn reads a parquet file and calls eachLogEntryFn for each log entry
|
||||
|
eachFileFn := func(entry *filer_pb.Entry, eachLogEntryFn log_buffer.EachLogEntryFuncType, starTsNs, stopTsNs int64) (processedTsNs int64, err error) { |
||||
|
// create readerAt for the parquet file
|
||||
|
fileSize := filer.FileSize(entry) |
||||
|
visibleIntervals, _ := filer.NonOverlappingVisibleIntervals(lookupFileIdFn, entry.Chunks, 0, int64(fileSize)) |
||||
|
chunkViews := filer.ViewFromVisibleIntervals(visibleIntervals, 0, int64(fileSize)) |
||||
|
readerCache := filer.NewReaderCache(32, chunkCache, lookupFileIdFn) |
||||
|
readerAt := filer.NewChunkReaderAtFromClient(readerCache, chunkViews, int64(fileSize)) |
||||
|
|
||||
|
// create parquet reader
|
||||
|
parquetReader := parquet.NewReader(readerAt, parquetSchema) |
||||
|
rows := make([]parquet.Row, 128) |
||||
|
for { |
||||
|
rowCount, readErr := parquetReader.ReadRows(rows) |
||||
|
|
||||
|
for i := 0; i < rowCount; i++ { |
||||
|
row := rows[i] |
||||
|
// convert parquet row to schema_pb.RecordValue
|
||||
|
recordValue, err := schema.ToRecordValue(recordType, parquetLevels, row) |
||||
|
if err != nil { |
||||
|
return processedTsNs, fmt.Errorf("ToRecordValue failed: %v", err) |
||||
|
} |
||||
|
processedTsNs = recordValue.Fields[SW_COLUMN_NAME_TS].GetInt64Value() |
||||
|
if processedTsNs < starTsNs { |
||||
|
continue |
||||
|
} |
||||
|
if stopTsNs != 0 && processedTsNs >= stopTsNs { |
||||
|
return processedTsNs, nil |
||||
|
} |
||||
|
|
||||
|
data, marshalErr := proto.Marshal(recordValue) |
||||
|
if marshalErr != nil { |
||||
|
return processedTsNs, fmt.Errorf("marshal record value: %v", marshalErr) |
||||
|
} |
||||
|
|
||||
|
logEntry := &filer_pb.LogEntry{ |
||||
|
Key: recordValue.Fields[SW_COLUMN_NAME_KEY].GetBytesValue(), |
||||
|
TsNs: processedTsNs, |
||||
|
Data: data, |
||||
|
} |
||||
|
|
||||
|
// fmt.Printf(" parquet entry %s ts %v\n", string(logEntry.Key), time.Unix(0, logEntry.TsNs).UTC())
|
||||
|
|
||||
|
if _, err = eachLogEntryFn(logEntry); err != nil { |
||||
|
return processedTsNs, fmt.Errorf("process log entry %v: %v", logEntry, err) |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if readErr != nil { |
||||
|
if readErr == io.EOF { |
||||
|
return processedTsNs, nil |
||||
|
} |
||||
|
return processedTsNs, readErr |
||||
|
} |
||||
|
} |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
return func(startPosition log_buffer.MessagePosition, stopTsNs int64, eachLogEntryFn log_buffer.EachLogEntryFuncType) (lastReadPosition log_buffer.MessagePosition, isDone bool, err error) { |
||||
|
startFileName := startPosition.UTC().Format(topic.TIME_FORMAT) |
||||
|
startTsNs := startPosition.Time.UnixNano() |
||||
|
var processedTsNs int64 |
||||
|
|
||||
|
err = filerClient.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error { |
||||
|
|
||||
|
return filer_pb.SeaweedList(client, partitionDir, "", func(entry *filer_pb.Entry, isLast bool) error { |
||||
|
if entry.IsDirectory { |
||||
|
return nil |
||||
|
} |
||||
|
if !strings.HasSuffix(entry.Name, ".parquet") { |
||||
|
return nil |
||||
|
} |
||||
|
if len(entry.Extended) == 0 { |
||||
|
return nil |
||||
|
} |
||||
|
|
||||
|
// read minTs from the parquet file
|
||||
|
minTsBytes := entry.Extended["min"] |
||||
|
if len(minTsBytes) != 8 { |
||||
|
return nil |
||||
|
} |
||||
|
minTsNs := int64(binary.BigEndian.Uint64(minTsBytes)) |
||||
|
|
||||
|
// read max ts
|
||||
|
maxTsBytes := entry.Extended["max"] |
||||
|
if len(maxTsBytes) != 8 { |
||||
|
return nil |
||||
|
} |
||||
|
maxTsNs := int64(binary.BigEndian.Uint64(maxTsBytes)) |
||||
|
|
||||
|
if stopTsNs != 0 && stopTsNs <= minTsNs { |
||||
|
isDone = true |
||||
|
return nil |
||||
|
} |
||||
|
|
||||
|
if maxTsNs < startTsNs { |
||||
|
return nil |
||||
|
} |
||||
|
|
||||
|
if processedTsNs, err = eachFileFn(entry, eachLogEntryFn, startTsNs, stopTsNs); err != nil { |
||||
|
return err |
||||
|
} |
||||
|
return nil |
||||
|
|
||||
|
}, startFileName, true, math.MaxInt32) |
||||
|
}) |
||||
|
lastReadPosition = log_buffer.NewMessagePosition(processedTsNs, -2) |
||||
|
return |
||||
|
} |
||||
|
} |
@ -0,0 +1,94 @@ |
|||||
|
package shell |
||||
|
|
||||
|
import ( |
||||
|
"flag" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/filer_client" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/mq/logstore" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/mq/schema" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/mq/topic" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/operation" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/pb" |
||||
|
"google.golang.org/grpc" |
||||
|
"io" |
||||
|
"time" |
||||
|
) |
||||
|
|
||||
|
func init() { |
||||
|
Commands = append(Commands, &commandMqTopicCompact{}) |
||||
|
} |
||||
|
|
||||
|
type commandMqTopicCompact struct { |
||||
|
} |
||||
|
|
||||
|
func (c *commandMqTopicCompact) Name() string { |
||||
|
return "mq.topic.compact" |
||||
|
} |
||||
|
|
||||
|
func (c *commandMqTopicCompact) Help() string { |
||||
|
return `compact the topic storage into parquet format |
||||
|
|
||||
|
Example: |
||||
|
mq.topic.compact -namespace <namespace> -topic <topic_name> -timeAgo <time_ago> |
||||
|
|
||||
|
` |
||||
|
} |
||||
|
|
||||
|
func (c *commandMqTopicCompact) HasTag(tag CommandTag) bool { |
||||
|
return ResourceHeavy == tag |
||||
|
} |
||||
|
|
||||
|
func (c *commandMqTopicCompact) Do(args []string, commandEnv *CommandEnv, writer io.Writer) error { |
||||
|
|
||||
|
// parse parameters
|
||||
|
mqCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError) |
||||
|
namespace := mqCommand.String("namespace", "", "namespace name") |
||||
|
topicName := mqCommand.String("topic", "", "topic name") |
||||
|
timeAgo := mqCommand.Duration("timeAgo", 2*time.Minute, "start time before now. \"300ms\", \"1.5h\" or \"2h45m\". Valid time units are \"ns\", \"us\" (or \"µs\"), \"ms\", \"s\", \"m\", \"h\"") |
||||
|
replication := mqCommand.String("replication", "", "replication type") |
||||
|
collection := mqCommand.String("collection", "", "optional collection name") |
||||
|
dataCenter := mqCommand.String("dataCenter", "", "optional data center name") |
||||
|
diskType := mqCommand.String("disk", "", "[hdd|ssd|<tag>] hard drive or solid state drive or any tag") |
||||
|
maxMB := mqCommand.Int("maxMB", 4, "split files larger than the limit") |
||||
|
|
||||
|
if err := mqCommand.Parse(args); err != nil { |
||||
|
return err |
||||
|
} |
||||
|
|
||||
|
storagePreference := &operation.StoragePreference{ |
||||
|
Replication: *replication, |
||||
|
Collection: *collection, |
||||
|
DataCenter: *dataCenter, |
||||
|
DiskType: *diskType, |
||||
|
MaxMB: *maxMB, |
||||
|
} |
||||
|
|
||||
|
// read topic configuration
|
||||
|
fca := &filer_client.FilerClientAccessor{ |
||||
|
GetFiler: func() pb.ServerAddress { |
||||
|
return commandEnv.option.FilerAddress |
||||
|
}, |
||||
|
GetGrpcDialOption: func() grpc.DialOption { |
||||
|
return commandEnv.option.GrpcDialOption |
||||
|
}, |
||||
|
} |
||||
|
t := topic.NewTopic(*namespace, *topicName) |
||||
|
topicConf, err := fca.ReadTopicConfFromFiler(t) |
||||
|
if err != nil { |
||||
|
return err |
||||
|
} |
||||
|
|
||||
|
// get record type
|
||||
|
recordType := topicConf.GetRecordType() |
||||
|
recordType = schema.NewRecordTypeBuilder(recordType). |
||||
|
WithField(logstore.SW_COLUMN_NAME_TS, schema.TypeInt64). |
||||
|
WithField(logstore.SW_COLUMN_NAME_KEY, schema.TypeBytes). |
||||
|
RecordTypeEnd() |
||||
|
|
||||
|
// compact the topic partition versions
|
||||
|
if err = logstore.CompactTopicPartitions(commandEnv, t, *timeAgo, recordType, storagePreference); err != nil { |
||||
|
return err |
||||
|
} |
||||
|
|
||||
|
return nil |
||||
|
|
||||
|
} |
Write
Preview
Loading…
Cancel
Save
Reference in new issue