You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
487 lines
16 KiB
487 lines
16 KiB
package iceberg
|
|
|
|
import (
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"net/http"
|
|
"os"
|
|
"path"
|
|
"strings"
|
|
|
|
"github.com/apache/iceberg-go"
|
|
"github.com/apache/iceberg-go/table"
|
|
"github.com/google/uuid"
|
|
"github.com/gorilla/mux"
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
|
|
"github.com/seaweedfs/seaweedfs/weed/s3api/s3tables"
|
|
)
|
|
|
|
var errTableNameRequired = errors.New("table name is required")
|
|
|
|
func validateCreateTableRequest(req CreateTableRequest) error {
|
|
if req.Name == "" {
|
|
return errTableNameRequired
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// handleListTables lists tables in a namespace.
|
|
func (s *Server) handleListTables(w http.ResponseWriter, r *http.Request) {
|
|
vars := mux.Vars(r)
|
|
namespace := parseNamespace(vars["namespace"])
|
|
if len(namespace) == 0 {
|
|
writeError(w, http.StatusBadRequest, "BadRequestException", "Namespace is required")
|
|
return
|
|
}
|
|
|
|
bucketName := getBucketFromPrefix(r)
|
|
bucketARN := buildTableBucketARN(bucketName)
|
|
|
|
// Extract identity from context
|
|
identityName := s3_constants.GetIdentityNameFromContext(r)
|
|
|
|
pageToken, pageSize, err := parsePagination(r)
|
|
if err != nil {
|
|
writeError(w, http.StatusBadRequest, "BadRequestException", err.Error())
|
|
return
|
|
}
|
|
|
|
listReq := &s3tables.ListTablesRequest{
|
|
TableBucketARN: bucketARN,
|
|
Namespace: namespace,
|
|
ContinuationToken: pageToken,
|
|
MaxTables: pageSize,
|
|
}
|
|
var listResp s3tables.ListTablesResponse
|
|
|
|
err = s.filerClient.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
|
|
mgrClient := s3tables.NewManagerClient(client)
|
|
return s.tablesManager.Execute(r.Context(), mgrClient, "ListTables", listReq, &listResp, identityName)
|
|
})
|
|
|
|
if err != nil {
|
|
if strings.Contains(err.Error(), "not found") {
|
|
writeError(w, http.StatusNotFound, "NoSuchNamespaceException", fmt.Sprintf("Namespace does not exist: %v", namespace))
|
|
return
|
|
}
|
|
glog.V(1).Infof("Iceberg: ListTables error: %v", err)
|
|
writeError(w, http.StatusInternalServerError, "InternalServerError", err.Error())
|
|
return
|
|
}
|
|
|
|
// Convert to Iceberg format
|
|
identifiers := make([]TableIdentifier, 0, len(listResp.Tables))
|
|
for _, t := range listResp.Tables {
|
|
identifiers = append(identifiers, TableIdentifier{
|
|
Namespace: namespace,
|
|
Name: t.Name,
|
|
})
|
|
}
|
|
|
|
result := ListTablesResponse{
|
|
NextPageToken: listResp.ContinuationToken,
|
|
Identifiers: identifiers,
|
|
}
|
|
writeJSON(w, http.StatusOK, result)
|
|
}
|
|
|
|
// handleCreateTable creates a new table.
|
|
func (s *Server) handleCreateTable(w http.ResponseWriter, r *http.Request) {
|
|
vars := mux.Vars(r)
|
|
namespace := parseNamespace(vars["namespace"])
|
|
if len(namespace) == 0 {
|
|
writeError(w, http.StatusBadRequest, "BadRequestException", "Namespace is required")
|
|
return
|
|
}
|
|
|
|
var req CreateTableRequest
|
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
writeError(w, http.StatusBadRequest, "BadRequestException", "Invalid request body")
|
|
return
|
|
}
|
|
|
|
if err := validateCreateTableRequest(req); err != nil {
|
|
writeError(w, http.StatusBadRequest, "BadRequestException", err.Error())
|
|
return
|
|
}
|
|
if req.StageCreate && !isStageCreateEnabled() {
|
|
writeError(w, http.StatusNotImplemented, "NotImplementedException", "stage-create is disabled")
|
|
return
|
|
}
|
|
|
|
bucketName := getBucketFromPrefix(r)
|
|
bucketARN := buildTableBucketARN(bucketName)
|
|
|
|
// Extract identity from context
|
|
identityName := s3_constants.GetIdentityNameFromContext(r)
|
|
|
|
// Generate UUID for the new table
|
|
tableUUID := uuid.New()
|
|
tablePath := path.Join(encodeNamespace(namespace), req.Name)
|
|
location := strings.TrimSuffix(req.Location, "/")
|
|
if location == "" {
|
|
if req.Properties != nil {
|
|
if warehouse := strings.TrimSuffix(req.Properties["warehouse"], "/"); warehouse != "" {
|
|
location = fmt.Sprintf("%s/%s", warehouse, tablePath)
|
|
}
|
|
}
|
|
if location == "" {
|
|
if warehouse := strings.TrimSuffix(os.Getenv("ICEBERG_WAREHOUSE"), "/"); warehouse != "" {
|
|
location = fmt.Sprintf("%s/%s", warehouse, tablePath)
|
|
}
|
|
}
|
|
if location == "" {
|
|
location = fmt.Sprintf("s3://%s/%s", bucketName, tablePath)
|
|
}
|
|
} else {
|
|
parsedBucket, parsedPath, err := parseS3Location(location)
|
|
if err != nil {
|
|
writeError(w, http.StatusBadRequest, "BadRequestException", "Invalid table location: "+err.Error())
|
|
return
|
|
}
|
|
if parsedPath == "" {
|
|
location = fmt.Sprintf("s3://%s/%s", parsedBucket, tablePath)
|
|
}
|
|
}
|
|
|
|
// Build proper Iceberg table metadata using iceberg-go types
|
|
metadata := newTableMetadata(tableUUID, location, req.Schema, req.PartitionSpec, req.WriteOrder, req.Properties)
|
|
if metadata == nil {
|
|
writeError(w, http.StatusInternalServerError, "InternalServerError", "Failed to build table metadata")
|
|
return
|
|
}
|
|
|
|
// Serialize metadata to JSON
|
|
metadataBytes, err := json.Marshal(metadata)
|
|
if err != nil {
|
|
writeError(w, http.StatusInternalServerError, "InternalServerError", "Failed to serialize metadata: "+err.Error())
|
|
return
|
|
}
|
|
|
|
tableName := req.Name
|
|
metadataFileName := "v1.metadata.json" // Initial version is always 1
|
|
metadataLocation := fmt.Sprintf("%s/metadata/%s", location, metadataFileName)
|
|
metadataBucket, metadataPath, err := parseS3Location(location)
|
|
if err != nil {
|
|
writeError(w, http.StatusInternalServerError, "InternalServerError", "Invalid table location: "+err.Error())
|
|
return
|
|
}
|
|
|
|
// Stage-create persists metadata in the internal staged area and skips S3Tables registration.
|
|
if req.StageCreate {
|
|
stagedTablePath := stageCreateStagedTablePath(namespace, tableName, tableUUID)
|
|
if err := s.saveMetadataFile(r.Context(), metadataBucket, stagedTablePath, metadataFileName, metadataBytes); err != nil {
|
|
writeError(w, http.StatusInternalServerError, "InternalServerError", "Failed to save staged metadata file: "+err.Error())
|
|
return
|
|
}
|
|
stagedMetadataLocation := fmt.Sprintf("s3://%s/%s/metadata/%s", metadataBucket, stagedTablePath, metadataFileName)
|
|
if markerErr := s.writeStageCreateMarker(r.Context(), bucketName, namespace, tableName, tableUUID, location, stagedMetadataLocation); markerErr != nil {
|
|
glog.V(1).Infof("Iceberg: failed to persist stage-create marker for %s.%s: %v", encodeNamespace(namespace), tableName, markerErr)
|
|
}
|
|
result := LoadTableResult{
|
|
MetadataLocation: metadataLocation,
|
|
Metadata: metadata,
|
|
Config: make(iceberg.Properties),
|
|
}
|
|
writeJSON(w, http.StatusOK, result)
|
|
return
|
|
}
|
|
if err := s.saveMetadataFile(r.Context(), metadataBucket, metadataPath, metadataFileName, metadataBytes); err != nil {
|
|
writeError(w, http.StatusInternalServerError, "InternalServerError", "Failed to save metadata file: "+err.Error())
|
|
return
|
|
}
|
|
|
|
// Use S3 Tables manager to create table
|
|
createReq := &s3tables.CreateTableRequest{
|
|
TableBucketARN: bucketARN,
|
|
Namespace: namespace,
|
|
Name: tableName,
|
|
Format: "ICEBERG",
|
|
Metadata: &s3tables.TableMetadata{
|
|
Iceberg: &s3tables.IcebergMetadata{
|
|
TableUUID: tableUUID.String(),
|
|
},
|
|
FullMetadata: metadataBytes,
|
|
},
|
|
MetadataLocation: metadataLocation,
|
|
MetadataVersion: 1,
|
|
}
|
|
var createResp s3tables.CreateTableResponse
|
|
|
|
err = s.filerClient.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
|
|
mgrClient := s3tables.NewManagerClient(client)
|
|
return s.tablesManager.Execute(r.Context(), mgrClient, "CreateTable", createReq, &createResp, identityName)
|
|
})
|
|
|
|
if err != nil {
|
|
if tableErr, ok := err.(*s3tables.S3TablesError); ok && tableErr.Type == s3tables.ErrCodeTableAlreadyExists {
|
|
getReq := &s3tables.GetTableRequest{
|
|
TableBucketARN: bucketARN,
|
|
Namespace: namespace,
|
|
Name: tableName,
|
|
}
|
|
var getResp s3tables.GetTableResponse
|
|
getErr := s.filerClient.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
|
|
mgrClient := s3tables.NewManagerClient(client)
|
|
return s.tablesManager.Execute(r.Context(), mgrClient, "GetTable", getReq, &getResp, identityName)
|
|
})
|
|
if getErr != nil {
|
|
writeError(w, http.StatusConflict, "AlreadyExistsException", err.Error())
|
|
return
|
|
}
|
|
result := buildLoadTableResult(getResp, bucketName, namespace, tableName)
|
|
writeJSON(w, http.StatusOK, result)
|
|
return
|
|
}
|
|
if strings.Contains(err.Error(), "already exists") {
|
|
getReq := &s3tables.GetTableRequest{
|
|
TableBucketARN: bucketARN,
|
|
Namespace: namespace,
|
|
Name: tableName,
|
|
}
|
|
var getResp s3tables.GetTableResponse
|
|
getErr := s.filerClient.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
|
|
mgrClient := s3tables.NewManagerClient(client)
|
|
return s.tablesManager.Execute(r.Context(), mgrClient, "GetTable", getReq, &getResp, identityName)
|
|
})
|
|
if getErr != nil {
|
|
writeError(w, http.StatusConflict, "AlreadyExistsException", err.Error())
|
|
return
|
|
}
|
|
result := buildLoadTableResult(getResp, bucketName, namespace, tableName)
|
|
writeJSON(w, http.StatusOK, result)
|
|
return
|
|
}
|
|
glog.V(1).Infof("Iceberg: CreateTable error: %v", err)
|
|
writeError(w, http.StatusInternalServerError, "InternalServerError", err.Error())
|
|
return
|
|
}
|
|
|
|
// Use returned location if available, otherwise fallback to local one
|
|
finalLocation := createResp.MetadataLocation
|
|
if finalLocation == "" {
|
|
finalLocation = metadataLocation
|
|
}
|
|
if markerErr := s.deleteStageCreateMarkers(r.Context(), bucketName, namespace, tableName); markerErr != nil {
|
|
glog.V(1).Infof("Iceberg: failed to cleanup stage-create markers for %s.%s after create: %v", encodeNamespace(namespace), tableName, markerErr)
|
|
}
|
|
|
|
result := LoadTableResult{
|
|
MetadataLocation: finalLocation,
|
|
Metadata: metadata,
|
|
Config: make(iceberg.Properties),
|
|
}
|
|
writeJSON(w, http.StatusOK, result)
|
|
}
|
|
|
|
// handleLoadTable loads table metadata.
|
|
func (s *Server) handleLoadTable(w http.ResponseWriter, r *http.Request) {
|
|
vars := mux.Vars(r)
|
|
namespace := parseNamespace(vars["namespace"])
|
|
tableName := vars["table"]
|
|
|
|
if len(namespace) == 0 || tableName == "" {
|
|
writeError(w, http.StatusBadRequest, "BadRequestException", "Namespace and table name are required")
|
|
return
|
|
}
|
|
|
|
bucketName := getBucketFromPrefix(r)
|
|
bucketARN := buildTableBucketARN(bucketName)
|
|
|
|
// Extract identity from context
|
|
identityName := s3_constants.GetIdentityNameFromContext(r)
|
|
|
|
getReq := &s3tables.GetTableRequest{
|
|
TableBucketARN: bucketARN,
|
|
Namespace: namespace,
|
|
Name: tableName,
|
|
}
|
|
var getResp s3tables.GetTableResponse
|
|
|
|
err := s.filerClient.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
|
|
mgrClient := s3tables.NewManagerClient(client)
|
|
return s.tablesManager.Execute(r.Context(), mgrClient, "GetTable", getReq, &getResp, identityName)
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
if strings.Contains(err.Error(), "not found") {
|
|
writeError(w, http.StatusNotFound, "NoSuchTableException", fmt.Sprintf("Table does not exist: %s", tableName))
|
|
return
|
|
}
|
|
glog.V(1).Infof("Iceberg: LoadTable error: %v", err)
|
|
writeError(w, http.StatusInternalServerError, "InternalServerError", err.Error())
|
|
return
|
|
}
|
|
|
|
result := buildLoadTableResult(getResp, bucketName, namespace, tableName)
|
|
writeJSON(w, http.StatusOK, result)
|
|
}
|
|
|
|
func buildLoadTableResult(getResp s3tables.GetTableResponse, bucketName string, namespace []string, tableName string) LoadTableResult {
|
|
location := tableLocationFromMetadataLocation(getResp.MetadataLocation)
|
|
if location == "" {
|
|
location = fmt.Sprintf("s3://%s/%s/%s", bucketName, encodeNamespace(namespace), tableName)
|
|
}
|
|
tableUUID := uuid.Nil
|
|
if getResp.Metadata != nil && getResp.Metadata.Iceberg != nil && getResp.Metadata.Iceberg.TableUUID != "" {
|
|
if parsed, err := uuid.Parse(getResp.Metadata.Iceberg.TableUUID); err == nil {
|
|
tableUUID = parsed
|
|
}
|
|
}
|
|
// Use Nil UUID if not found in storage (legacy table)
|
|
// Stability is guaranteed by not generating random UUIDs on read
|
|
|
|
var metadata table.Metadata
|
|
if getResp.Metadata != nil && len(getResp.Metadata.FullMetadata) > 0 {
|
|
var err error
|
|
metadata, err = table.ParseMetadataBytes(getResp.Metadata.FullMetadata)
|
|
if err != nil {
|
|
glog.Warningf("Iceberg: Failed to parse persisted metadata for %s: %v", tableName, err)
|
|
// Attempt to reconstruct from IcebergMetadata if available, otherwise synthetic
|
|
// TODO: Extract schema/spec from getResp.Metadata.Iceberg if FullMetadata fails but partial info exists?
|
|
// For now, fallback to empty metadata
|
|
metadata = newTableMetadata(tableUUID, location, nil, nil, nil, nil)
|
|
}
|
|
} else {
|
|
// No full metadata, create synthetic
|
|
// TODO: If we had stored schema in IcebergMetadata, we would pass it here
|
|
metadata = newTableMetadata(tableUUID, location, nil, nil, nil, nil)
|
|
}
|
|
|
|
return LoadTableResult{
|
|
MetadataLocation: getResp.MetadataLocation,
|
|
Metadata: metadata,
|
|
Config: make(iceberg.Properties),
|
|
}
|
|
}
|
|
|
|
// handleTableExists checks if a table exists.
|
|
func (s *Server) handleTableExists(w http.ResponseWriter, r *http.Request) {
|
|
vars := mux.Vars(r)
|
|
namespace := parseNamespace(vars["namespace"])
|
|
tableName := vars["table"]
|
|
|
|
if len(namespace) == 0 || tableName == "" {
|
|
w.WriteHeader(http.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
bucketName := getBucketFromPrefix(r)
|
|
bucketARN := buildTableBucketARN(bucketName)
|
|
|
|
// Extract identity from context
|
|
identityName := s3_constants.GetIdentityNameFromContext(r)
|
|
|
|
getReq := &s3tables.GetTableRequest{
|
|
TableBucketARN: bucketARN,
|
|
Namespace: namespace,
|
|
Name: tableName,
|
|
}
|
|
var getResp s3tables.GetTableResponse
|
|
|
|
err := s.filerClient.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
|
|
mgrClient := s3tables.NewManagerClient(client)
|
|
return s.tablesManager.Execute(r.Context(), mgrClient, "GetTable", getReq, &getResp, identityName)
|
|
})
|
|
|
|
if err != nil {
|
|
w.WriteHeader(http.StatusNotFound)
|
|
return
|
|
}
|
|
w.WriteHeader(http.StatusNoContent)
|
|
}
|
|
|
|
// handleDropTable deletes a table.
|
|
func (s *Server) handleDropTable(w http.ResponseWriter, r *http.Request) {
|
|
vars := mux.Vars(r)
|
|
namespace := parseNamespace(vars["namespace"])
|
|
tableName := vars["table"]
|
|
|
|
if len(namespace) == 0 || tableName == "" {
|
|
writeError(w, http.StatusBadRequest, "BadRequestException", "Namespace and table name are required")
|
|
return
|
|
}
|
|
|
|
bucketName := getBucketFromPrefix(r)
|
|
bucketARN := buildTableBucketARN(bucketName)
|
|
|
|
// Extract identity from context
|
|
identityName := s3_constants.GetIdentityNameFromContext(r)
|
|
|
|
deleteReq := &s3tables.DeleteTableRequest{
|
|
TableBucketARN: bucketARN,
|
|
Namespace: namespace,
|
|
Name: tableName,
|
|
}
|
|
|
|
err := s.filerClient.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
|
|
mgrClient := s3tables.NewManagerClient(client)
|
|
return s.tablesManager.Execute(r.Context(), mgrClient, "DeleteTable", deleteReq, nil, identityName)
|
|
})
|
|
|
|
if err != nil {
|
|
if strings.Contains(err.Error(), "not found") {
|
|
writeError(w, http.StatusNotFound, "NoSuchTableException", fmt.Sprintf("Table does not exist: %s", tableName))
|
|
return
|
|
}
|
|
glog.V(1).Infof("Iceberg: DropTable error: %v", err)
|
|
writeError(w, http.StatusInternalServerError, "InternalServerError", err.Error())
|
|
return
|
|
}
|
|
|
|
w.WriteHeader(http.StatusNoContent)
|
|
}
|
|
|
|
// newTableMetadata creates a new table.Metadata object with the given parameters.
|
|
// Uses iceberg-go's MetadataBuilder pattern for proper spec compliance.
|
|
func newTableMetadata(
|
|
tableUUID uuid.UUID,
|
|
location string,
|
|
schema *iceberg.Schema,
|
|
partitionSpec *iceberg.PartitionSpec,
|
|
sortOrder *table.SortOrder,
|
|
props iceberg.Properties,
|
|
) table.Metadata {
|
|
// Add schema - use provided or create empty schema
|
|
var s *iceberg.Schema
|
|
if schema != nil {
|
|
s = schema
|
|
} else {
|
|
s = iceberg.NewSchema(0)
|
|
}
|
|
|
|
// Add partition spec
|
|
var pSpec *iceberg.PartitionSpec
|
|
if partitionSpec != nil {
|
|
pSpec = partitionSpec
|
|
} else {
|
|
unpartitioned := iceberg.NewPartitionSpecID(0)
|
|
pSpec = &unpartitioned
|
|
}
|
|
|
|
// Add sort order
|
|
var so table.SortOrder
|
|
if sortOrder != nil {
|
|
so = *sortOrder
|
|
} else {
|
|
so = table.UnsortedSortOrder
|
|
}
|
|
|
|
// Create properties map if nil
|
|
if props == nil {
|
|
props = make(iceberg.Properties)
|
|
}
|
|
|
|
// Create metadata directly using the constructor which ensures spec compliance for V2
|
|
metadata, err := table.NewMetadataWithUUID(s, pSpec, so, location, props, tableUUID)
|
|
if err != nil {
|
|
glog.Errorf("Failed to create metadata: %v", err)
|
|
return nil
|
|
}
|
|
|
|
return metadata
|
|
}
|