You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

173 lines
4.9 KiB

package iceberg
import (
"encoding/json"
"errors"
"github.com/apache/iceberg-go/table"
)
type statisticsUpdate struct {
set *table.StatisticsFile
remove *int64
}
var ErrIncompleteSetStatistics = errors.New("set-statistics requires snapshot-id, statistics-path, file-size-in-bytes, and file-footer-size-in-bytes")
type commitAction struct {
Action string `json:"action"`
}
type setStatisticsUpdate struct {
Action string `json:"action"`
SnapshotID *int64 `json:"snapshot-id,omitempty"`
StatisticsPath string `json:"statistics-path,omitempty"`
FileSizeInBytes *int64 `json:"file-size-in-bytes,omitempty"`
FileFooterSizeInBytes *int64 `json:"file-footer-size-in-bytes,omitempty"`
KeyMetadata *string `json:"key-metadata,omitempty"`
BlobMetadata []table.BlobMetadata `json:"blob-metadata,omitempty"`
Statistics *table.StatisticsFile `json:"statistics,omitempty"`
}
func (u *setStatisticsUpdate) asStatisticsFile() (*table.StatisticsFile, error) {
if u.Statistics != nil {
if u.Statistics.BlobMetadata == nil {
u.Statistics.BlobMetadata = []table.BlobMetadata{}
}
return u.Statistics, nil
}
if u.SnapshotID == nil || u.StatisticsPath == "" || u.FileSizeInBytes == nil || u.FileFooterSizeInBytes == nil {
return nil, ErrIncompleteSetStatistics
}
stats := &table.StatisticsFile{
SnapshotID: *u.SnapshotID,
StatisticsPath: u.StatisticsPath,
FileSizeInBytes: *u.FileSizeInBytes,
FileFooterSizeInBytes: *u.FileFooterSizeInBytes,
KeyMetadata: u.KeyMetadata,
BlobMetadata: u.BlobMetadata,
}
if stats.BlobMetadata == nil {
stats.BlobMetadata = []table.BlobMetadata{}
}
return stats, nil
}
type removeStatisticsUpdate struct {
Action string `json:"action"`
SnapshotID int64 `json:"snapshot-id"`
}
func parseCommitUpdates(rawUpdates []json.RawMessage) (table.Updates, []statisticsUpdate, error) {
filtered := make([]json.RawMessage, 0, len(rawUpdates))
statisticsUpdates := make([]statisticsUpdate, 0)
for _, raw := range rawUpdates {
var action commitAction
if err := json.Unmarshal(raw, &action); err != nil {
return nil, nil, err
}
switch action.Action {
case "set-statistics":
var setUpdate setStatisticsUpdate
if err := json.Unmarshal(raw, &setUpdate); err != nil {
return nil, nil, err
}
stats, err := setUpdate.asStatisticsFile()
if err != nil {
return nil, nil, err
}
statisticsUpdates = append(statisticsUpdates, statisticsUpdate{set: stats})
case "remove-statistics":
var removeUpdate removeStatisticsUpdate
if err := json.Unmarshal(raw, &removeUpdate); err != nil {
return nil, nil, err
}
snapshotID := removeUpdate.SnapshotID
statisticsUpdates = append(statisticsUpdates, statisticsUpdate{remove: &snapshotID})
default:
filtered = append(filtered, raw)
}
}
if len(filtered) == 0 {
return nil, statisticsUpdates, nil
}
data, err := json.Marshal(filtered)
if err != nil {
return nil, nil, err
}
var updates table.Updates
if err := json.Unmarshal(data, &updates); err != nil {
return nil, nil, err
}
return updates, statisticsUpdates, nil
}
func applyStatisticsUpdates(metadataBytes []byte, updates []statisticsUpdate) ([]byte, error) {
if len(updates) == 0 {
return metadataBytes, nil
}
var metadata map[string]json.RawMessage
if err := json.Unmarshal(metadataBytes, &metadata); err != nil {
return nil, err
}
var statistics []table.StatisticsFile
if rawStatistics, ok := metadata["statistics"]; ok && len(rawStatistics) > 0 {
if err := json.Unmarshal(rawStatistics, &statistics); err != nil {
return nil, err
}
}
statisticsBySnapshot := make(map[int64]table.StatisticsFile, len(statistics))
orderedSnapshotIDs := make([]int64, 0, len(statistics))
inOrder := make(map[int64]bool, len(statistics))
for _, stat := range statistics {
statisticsBySnapshot[stat.SnapshotID] = stat
if !inOrder[stat.SnapshotID] {
orderedSnapshotIDs = append(orderedSnapshotIDs, stat.SnapshotID)
inOrder[stat.SnapshotID] = true
}
}
for _, update := range updates {
if update.set != nil {
statisticsBySnapshot[update.set.SnapshotID] = *update.set
if !inOrder[update.set.SnapshotID] {
orderedSnapshotIDs = append(orderedSnapshotIDs, update.set.SnapshotID)
inOrder[update.set.SnapshotID] = true
}
continue
}
if update.remove != nil {
delete(statisticsBySnapshot, *update.remove)
}
}
statistics = make([]table.StatisticsFile, 0, len(statisticsBySnapshot))
for _, snapshotID := range orderedSnapshotIDs {
stat, ok := statisticsBySnapshot[snapshotID]
if !ok {
continue
}
statistics = append(statistics, stat)
}
if len(statistics) == 0 {
delete(metadata, "statistics")
} else {
data, err := json.Marshal(statistics)
if err != nil {
return nil, err
}
metadata["statistics"] = data
}
return json.Marshal(metadata)
}