package postgres3
/* * Copyright 2022 Splunk Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0
* * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */
import ( "context" "database/sql" "fmt" "os" "path" "strings" "sync" "time"
"github.com/lib/pq" _ "github.com/lib/pq" "github.com/seaweedfs/seaweedfs/weed/filer" "github.com/seaweedfs/seaweedfs/weed/filer/abstract_sql" "github.com/seaweedfs/seaweedfs/weed/glog" "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" "github.com/seaweedfs/seaweedfs/weed/util" )
const ( CONNECTION_URL_PATTERN = "host=%s port=%d sslmode=%s connect_timeout=30"
createTablePattern = `CREATE TABLE IF NOT EXISTS "%s" ( key varchar(65535) PRIMARY KEY, name varchar(65535), prefixes bigint[], meta bytea )` createTableIndexPattern = `CREATE INDEX on "%s" USING gin (prefixes);` deleteTablePattern = `DROP TABLE "%s";` insertEntryPattern = `INSERT INTO "%s" (key, name, prefixes, meta) VALUES ($1, $2, $3, $4) ON CONFLICT (key) DO UPDATE SET meta = EXCLUDED.meta;` findEntryPattern = `SELECT meta FROM "%s" WHERE key = $1` deleteEntryPattern = `DELETE FROM "%s" WHERE key = $1` listEntryQueryPattern = `SELECT key, name, isdir, meta FROM ( SELECT key, name, false as isdir, meta FROM "%s" WHERE prefixes @> $1 AND cardinality(prefixes) < $5 AND name __COMPARISON__ $3 AND name LIKE $4 ORDER BY key ASC LIMIT $6 ) s1 UNION ( SELECT dir, dir, true isdir, NULL::bytea meta FROM ( SELECT DISTINCT split_part(key, '/', $2) AS dir FROM "%s" WHERE prefixes @> $1 AND cardinality(prefixes) > $5 - 1 ORDER BY dir ASC ) t1 WHERE t1.dir > $3 AND t1.dir LIKE $4 ORDER BY dir ASC ) ORDER BY name ASC LIMIT $6` deleteFolderChildrenPattern = `DELETE FROM "%s" WHERE prefixes @> $1 and key like $2` )
var ( listEntryExclusivePattern string listEntryInclusivePattern string )
var _ filer.BucketAware = (*Postgres3Store)(nil)
func init() { filer.Stores = append(filer.Stores, &Postgres3Store{})
listEntryExclusivePattern = strings.ReplaceAll(listEntryQueryPattern, "__COMPARISON__", ">") listEntryInclusivePattern = strings.ReplaceAll(listEntryQueryPattern, "__COMPARISON__", ">=") }
type Postgres3Store struct { DB *sql.DB SupportBucketTable bool dbs map[string]bool dbsLock sync.Mutex }
func (store *Postgres3Store) GetName() string { return "postgres3" }
func (store *Postgres3Store) Initialize(configuration util.Configuration, prefix string) error { return store.initialize( configuration.GetString(prefix+"createTable"), configuration.GetString(prefix+"username"), configuration.GetString(prefix+"password"), configuration.GetString(prefix+"hostname"), configuration.GetInt(prefix+"port"), configuration.GetString(prefix+"database"), configuration.GetString(prefix+"schema"), configuration.GetString(prefix+"sslmode"), configuration.GetInt(prefix+"connection_max_idle"), configuration.GetInt(prefix+"connection_max_open"), configuration.GetInt(prefix+"connection_max_lifetime_seconds"), ) }
func (store *Postgres3Store) initialize(createTable, user, password, hostname string, port int, database, schema, sslmode string, maxIdle, maxOpen, maxLifetimeSeconds int) (err error) { store.SupportBucketTable = true sqlUrl := fmt.Sprintf(CONNECTION_URL_PATTERN, hostname, port, sslmode) if user != "" { sqlUrl += " user=" + user } adaptedSqlUrl := sqlUrl if password != "" { sqlUrl += " password=" + password adaptedSqlUrl += " password=ADAPTED" } if database != "" { sqlUrl += " dbname=" + database adaptedSqlUrl += " dbname=" + database } if schema != "" { sqlUrl += " search_path=" + schema adaptedSqlUrl += " search_path=" + schema } var dbErr error store.DB, dbErr = sql.Open("postgres", sqlUrl) if dbErr != nil { store.DB.Close() store.DB = nil return fmt.Errorf("can not connect to %s error:%v", adaptedSqlUrl, err) }
store.DB.SetMaxIdleConns(maxIdle) store.DB.SetMaxOpenConns(maxOpen) store.DB.SetConnMaxLifetime(time.Duration(maxLifetimeSeconds) * time.Second)
if err = store.DB.Ping(); err != nil { return fmt.Errorf("connect to %s error:%v", sqlUrl, err) }
if err = store.CreateTable(context.Background(), abstract_sql.DEFAULT_TABLE); err != nil { return fmt.Errorf("init table %s: %v", abstract_sql.DEFAULT_TABLE, err) }
return nil }
func (store *Postgres3Store) CanDropWholeBucket() bool { return store.SupportBucketTable }
func (store *Postgres3Store) OnBucketCreation(bucket string) { store.dbsLock.Lock() defer store.dbsLock.Unlock()
store.CreateTable(context.Background(), bucket)
if store.dbs == nil { return } store.dbs[bucket] = true }
func (store *Postgres3Store) OnBucketDeletion(bucket string) { store.dbsLock.Lock() defer store.dbsLock.Unlock()
store.deleteTable(context.Background(), bucket)
if store.dbs == nil { return } delete(store.dbs, bucket) }
func (store *Postgres3Store) getTxOrDB(ctx context.Context, fullpath util.FullPath, isForChildren bool) (txOrDB abstract_sql.TxOrDB, bucket string, shortPath util.FullPath, err error) {
shortPath = fullpath bucket = abstract_sql.DEFAULT_TABLE
if tx, ok := ctx.Value("tx").(*sql.Tx); ok { txOrDB = tx } else { txOrDB = store.DB }
if !store.SupportBucketTable { return }
if !strings.HasPrefix(string(fullpath), "/buckets/") { return }
// detect bucket
bucketAndObjectKey := string(fullpath)[len("/buckets/"):] t := strings.Index(bucketAndObjectKey, "/") if t < 0 && !isForChildren { return } bucket = bucketAndObjectKey shortPath = "/" if t > 0 { bucket = bucketAndObjectKey[:t] shortPath = util.FullPath(bucketAndObjectKey[t:]) }
if isValidBucket(bucket) { store.dbsLock.Lock() defer store.dbsLock.Unlock()
if store.dbs == nil { store.dbs = make(map[string]bool) }
if _, found := store.dbs[bucket]; !found { if err = store.CreateTable(ctx, bucket); err == nil { store.dbs[bucket] = true } }
return }
func (store *Postgres3Store) InsertEntry(ctx context.Context, entry *filer.Entry) error { db, bucket, shortPath, err := store.getTxOrDB(ctx, entry.FullPath, false) if err != nil { return fmt.Errorf("findDB %s : %v", entry.FullPath, err) }
if entry.IsDirectory() { if isValidBucket(bucket) && !strings.HasPrefix(string(shortPath), "/.uploads") { // Ignore directory creations, but not bucket creations or multipart uploads
return nil } }
meta, err := entry.EncodeAttributesAndChunks() if err != nil { return fmt.Errorf("encode %s: %s", entry.FullPath, err) }
if len(entry.Chunks) > 50 { meta = util.MaybeGzipData(meta) }
prefixes := calculatePrefixes(string(shortPath)) hashedPrefixes := hashPrefixArray(prefixes) _, err = db.ExecContext(ctx, fmt.Sprintf(insertEntryPattern, bucket), shortPath, path.Base(string(shortPath)), pq.Array(hashedPrefixes), meta) if err != nil { return fmt.Errorf("insert/upsert %s: %s", entry.FullPath, err) } return nil }
func (store *Postgres3Store) UpdateEntry(ctx context.Context, entry *filer.Entry) error { return store.InsertEntry(ctx, entry) }
func (store *Postgres3Store) FindEntry(ctx context.Context, fullpath util.FullPath) (*filer.Entry, error) {
db, bucket, shortPath, err := store.getTxOrDB(ctx, fullpath, false) if err != nil { return nil, fmt.Errorf("findDB %s : %v", fullpath, err) }
row := db.QueryRowContext(ctx, fmt.Sprintf(findEntryPattern, bucket), shortPath)
var data []byte if err := row.Scan(&data); err != nil { if err == sql.ErrNoRows { return nil, filer_pb.ErrNotFound } return nil, fmt.Errorf("find %s: %v", fullpath, err) }
entry := &filer.Entry{ FullPath: fullpath, } if err := entry.DecodeAttributesAndChunks(util.MaybeDecompressData(data)); err != nil { return entry, fmt.Errorf("decode %s : %v", entry.FullPath, err) }
return entry, nil }
func (store *Postgres3Store) DeleteEntry(ctx context.Context, fullpath util.FullPath) error { db, bucket, shortPath, err := store.getTxOrDB(ctx, fullpath, false) if err != nil { return fmt.Errorf("findDB %s : %v", fullpath, err) }
res, err := db.ExecContext(ctx, fmt.Sprintf(deleteEntryPattern, bucket), shortPath) if err != nil { return fmt.Errorf("delete %s: %s", fullpath, err) }
_, err = res.RowsAffected() if err != nil { return fmt.Errorf("delete %s but no rows affected: %s", fullpath, err) }
return nil }
func (store *Postgres3Store) DeleteFolderChildren(ctx context.Context, fullpath util.FullPath) (err error) { db, bucket, shortPath, err := store.getTxOrDB(ctx, fullpath, true) if err != nil { return fmt.Errorf("findDB %s : %v", fullpath, err) }
if isValidBucket(bucket) && shortPath == "/" { if err = store.deleteTable(ctx, bucket); err == nil { store.dbsLock.Lock() delete(store.dbs, bucket) store.dbsLock.Unlock() return nil } else { return err } }
sqlText := fmt.Sprintf(deleteFolderChildrenPattern, bucket) prefixes := calculatePrefixes(string(shortPath)) hashedPrefixes := hashPrefixArray(prefixes) glog.V(4).Infof("delete %s SQL %s %d", string(shortPath), sqlText, hashedPrefixes) res, err := db.ExecContext(ctx, sqlText, pq.Array(hashedPrefixes), string(shortPath)+"/%") if err != nil { return fmt.Errorf("deleteFolderChildren %s: %s", fullpath, err) }
_, err = res.RowsAffected() if err != nil { return fmt.Errorf("deleteFolderChildren %s but no rows affected: %s", fullpath, err) } return nil }
func (store *Postgres3Store) ListDirectoryEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { return store.ListDirectoryPrefixedEntries(ctx, dirPath, startFileName, includeStartFile, limit, "", nil) }
func (store *Postgres3Store) ListDirectoryPrefixedEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, prefix string, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { db, bucket, shortPath, err := store.getTxOrDB(ctx, dirPath, true) if err != nil { return lastFileName, fmt.Errorf("findDB %s : %v", dirPath, err) }
slashedShortPath := appendSlash(string(shortPath)) shortPathParts := len(strings.Split(slashedShortPath, "/"))
sqlText := fmt.Sprintf(listEntryExclusivePattern, bucket, bucket) if includeStartFile { sqlText = fmt.Sprintf(listEntryInclusivePattern, bucket, bucket) }
prefixes := calculatePrefixes(string(slashedShortPath)) hashedPrefixes := hashPrefixArray(prefixes)
rows, err := db.QueryContext(ctx, sqlText, pq.Array(hashedPrefixes), shortPathParts, startFileName, prefix+"%", shortPathParts-1, limit+1)
if err != nil { return lastFileName, fmt.Errorf("list %s : %v", dirPath, err) } defer rows.Close()
for rows.Next() { var key string var name string var isDir bool var data []byte if err = rows.Scan(&key, &name, &isDir, &data); err != nil { glog.V(0).Infof("scan %s : %v", dirPath, err) return lastFileName, fmt.Errorf("scan %s: %v", dirPath, err) }
if !isDir { lastFileName = name
entry := &filer.Entry{ FullPath: util.NewFullPath(string(dirPath), name), }
if err = entry.DecodeAttributesAndChunks(util.MaybeDecompressData(data)); err != nil { glog.V(0).Infof("scan decode %s : %v", entry.FullPath, err) return lastFileName, fmt.Errorf("scan decode %s : %v", entry.FullPath, err) }
if !eachEntryFunc(entry) { break } } else { lastFileName = key dirName := key entry := &filer.Entry{ FullPath: util.NewFullPath(string(dirPath), dirName), }
entry.Attr.Mode |= os.ModeDir | 0775 if !eachEntryFunc(entry) { break } } }
return lastFileName, nil }
func (store *Postgres3Store) BeginTransaction(ctx context.Context) (context.Context, error) { tx, err := store.DB.BeginTx(ctx, &sql.TxOptions{ Isolation: sql.LevelReadCommitted, ReadOnly: false, }) if err != nil { return ctx, err }
return context.WithValue(ctx, "tx", tx), nil }
func (store *Postgres3Store) CommitTransaction(ctx context.Context) error { if tx, ok := ctx.Value("tx").(*sql.Tx); ok { return tx.Commit() } return nil }
func (store *Postgres3Store) RollbackTransaction(ctx context.Context) error { if tx, ok := ctx.Value("tx").(*sql.Tx); ok { return tx.Rollback() } return nil }
func (store *Postgres3Store) Shutdown() { store.DB.Close() }
func (store *Postgres3Store) CreateTable(ctx context.Context, bucket string) error { _, err := store.DB.ExecContext(ctx, fmt.Sprintf(createTablePattern, bucket)) if err != nil { return fmt.Errorf("create bucket table: %v", err) }
_, err = store.DB.ExecContext(ctx, fmt.Sprintf(createTableIndexPattern, bucket)) if err != nil { return fmt.Errorf("create bucket index: %v", err) } return err }
func (store *Postgres3Store) deleteTable(ctx context.Context, bucket string) error { if !store.SupportBucketTable { return nil } _, err := store.DB.ExecContext(ctx, fmt.Sprintf(deleteTablePattern, bucket)) return err }
func isValidBucket(bucket string) bool { return bucket != abstract_sql.DEFAULT_TABLE && bucket != "" }
// calculatePrefixes returns the prefixes for a given path. The root prefix "/" is ignored to
// save space in the returned array
func calculatePrefixes(fullPath string) []string { res := strings.Split(fullPath, "/") maxPrefixes := len(res)
var retval []string for i := 1; i < maxPrefixes; i++ { calculatedPrefix := strings.Join(res[0:i], "/") + "/" if calculatedPrefix == "/" { continue } retval = append(retval, calculatedPrefix) } return retval }
// hashPrefixArray converts input prefix array into int64 hashes
func hashPrefixArray(a []string) []int64 { hashed := make([]int64, len(a)) for i := range a { hashed[i] = util.HashStringToLong(a[i]) } return hashed }
func appendSlash(s string) string { if !strings.HasSuffix(s, "/") { return s + "/" } return s }