Browse Source

fmt

pull/7185/head
chrislu 1 month ago
parent
commit
8498240460
  1. 9
      weed/mq/logstore/log_to_parquet.go
  2. 22
      weed/query/engine/parquet_scanner.go

9
weed/mq/logstore/log_to_parquet.go

@ -4,6 +4,11 @@ import (
"context" "context"
"encoding/binary" "encoding/binary"
"fmt" "fmt"
"io"
"os"
"strings"
"time"
"github.com/parquet-go/parquet-go" "github.com/parquet-go/parquet-go"
"github.com/parquet-go/parquet-go/compress/zstd" "github.com/parquet-go/parquet-go/compress/zstd"
"github.com/seaweedfs/seaweedfs/weed/filer" "github.com/seaweedfs/seaweedfs/weed/filer"
@ -16,10 +21,6 @@ import (
util_http "github.com/seaweedfs/seaweedfs/weed/util/http" util_http "github.com/seaweedfs/seaweedfs/weed/util/http"
"github.com/seaweedfs/seaweedfs/weed/util/log_buffer" "github.com/seaweedfs/seaweedfs/weed/util/log_buffer"
"google.golang.org/protobuf/proto" "google.golang.org/protobuf/proto"
"io"
"os"
"strings"
"time"
) )
const ( const (

22
weed/query/engine/parquet_scanner.go

@ -29,10 +29,10 @@ const (
// 3. System columns (_ts_ns, _key) are added to user schema // 3. System columns (_ts_ns, _key) are added to user schema
// 4. Predicate pushdown is used for efficient scanning // 4. Predicate pushdown is used for efficient scanning
type ParquetScanner struct { type ParquetScanner struct {
filerClient filer_pb.FilerClient
chunkCache chunk_cache.ChunkCache
topic topic.Topic
recordSchema *schema_pb.RecordType
filerClient filer_pb.FilerClient
chunkCache chunk_cache.ChunkCache
topic topic.Topic
recordSchema *schema_pb.RecordType
parquetLevels *schema.ParquetLevels parquetLevels *schema.ParquetLevels
} }
@ -47,7 +47,7 @@ func NewParquetScanner(filerClient filer_pb.FilerClient, namespace, topicName st
// Create topic reference // Create topic reference
t := topic.Topic{ t := topic.Topic{
Namespace: namespace, Namespace: namespace,
Name: topicName,
Name: topicName,
} }
// Read topic configuration to get schema // Read topic configuration to get schema
@ -257,27 +257,27 @@ func (ps *ParquetScanner) generateSampleData(options ScanOptions) []ScanResult {
sampleData := []ScanResult{ sampleData := []ScanResult{
{ {
Values: map[string]*schema_pb.Value{ Values: map[string]*schema_pb.Value{
"user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 1001}},
"user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 1001}},
"event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "login"}}, "event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "login"}},
"data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"ip": "192.168.1.1"}`}},
"data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"ip": "192.168.1.1"}`}},
}, },
Timestamp: now - 3600000000000, // 1 hour ago Timestamp: now - 3600000000000, // 1 hour ago
Key: []byte("user-1001"), Key: []byte("user-1001"),
}, },
{ {
Values: map[string]*schema_pb.Value{ Values: map[string]*schema_pb.Value{
"user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 1002}},
"user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 1002}},
"event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "page_view"}}, "event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "page_view"}},
"data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"page": "/dashboard"}`}},
"data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"page": "/dashboard"}`}},
}, },
Timestamp: now - 1800000000000, // 30 minutes ago Timestamp: now - 1800000000000, // 30 minutes ago
Key: []byte("user-1002"), Key: []byte("user-1002"),
}, },
{ {
Values: map[string]*schema_pb.Value{ Values: map[string]*schema_pb.Value{
"user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 1001}},
"user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 1001}},
"event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "logout"}}, "event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "logout"}},
"data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"session_duration": 3600}`}},
"data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"session_duration": 3600}`}},
}, },
Timestamp: now - 900000000000, // 15 minutes ago Timestamp: now - 900000000000, // 15 minutes ago
Key: []byte("user-1001"), Key: []byte("user-1001"),

Loading…
Cancel
Save