package postgres3 /* * Copyright 2022 Splunk Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import ( "context" "database/sql" "fmt" "os" "path" "strings" "sync" "time" "github.com/lib/pq" _ "github.com/lib/pq" "github.com/seaweedfs/seaweedfs/weed/filer" "github.com/seaweedfs/seaweedfs/weed/filer/abstract_sql" "github.com/seaweedfs/seaweedfs/weed/glog" "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" "github.com/seaweedfs/seaweedfs/weed/util" ) const ( CONNECTION_URL_PATTERN = "host=%s port=%d sslmode=%s connect_timeout=30" createTablePattern = `CREATE TABLE IF NOT EXISTS "%s" ( key varchar(65535) PRIMARY KEY, name varchar(65535), prefixes bigint[], meta bytea )` createTableIndexPattern = `CREATE INDEX on "%s" USING gin (prefixes);` deleteTablePattern = `DROP TABLE "%s";` insertEntryPattern = `INSERT INTO "%s" (key, name, prefixes, meta) VALUES ($1, $2, $3, $4) ON CONFLICT (key) DO UPDATE SET meta = EXCLUDED.meta;` findEntryPattern = `SELECT meta FROM "%s" WHERE key = $1` deleteEntryPattern = `DELETE FROM "%s" WHERE key = $1` listEntryQueryPattern = `SELECT key, name, isdir, meta FROM ( SELECT key, name, false as isdir, meta FROM "%s" WHERE prefixes @> $1 AND cardinality(prefixes) < $5 AND name __COMPARISON__ $3 AND name LIKE $4 ORDER BY key ASC LIMIT $6 ) s1 UNION ( SELECT dir, dir, true isdir, NULL::bytea meta FROM ( SELECT DISTINCT split_part(key, '/', $2) AS dir FROM "%s" WHERE prefixes @> $1 AND cardinality(prefixes) > $5 - 1 ORDER BY dir ASC ) t1 WHERE t1.dir > $3 AND t1.dir LIKE $4 ORDER BY dir ASC ) ORDER BY name ASC LIMIT $6` deleteFolderChildrenPattern = `DELETE FROM "%s" WHERE prefixes @> $1 and key like $2` ) var ( listEntryExclusivePattern string listEntryInclusivePattern string ) var _ filer.BucketAware = (*Postgres3Store)(nil) func init() { filer.Stores = append(filer.Stores, &Postgres3Store{}) listEntryExclusivePattern = strings.ReplaceAll(listEntryQueryPattern, "__COMPARISON__", ">") listEntryInclusivePattern = strings.ReplaceAll(listEntryQueryPattern, "__COMPARISON__", ">=") } type Postgres3Store struct { DB *sql.DB SupportBucketTable bool dbs map[string]bool dbsLock sync.Mutex } func (store *Postgres3Store) GetName() string { return "postgres3" } func (store *Postgres3Store) Initialize(configuration util.Configuration, prefix string) error { return store.initialize( configuration.GetString(prefix+"createTable"), configuration.GetString(prefix+"username"), configuration.GetString(prefix+"password"), configuration.GetString(prefix+"hostname"), configuration.GetInt(prefix+"port"), configuration.GetString(prefix+"database"), configuration.GetString(prefix+"schema"), configuration.GetString(prefix+"sslmode"), configuration.GetInt(prefix+"connection_max_idle"), configuration.GetInt(prefix+"connection_max_open"), configuration.GetInt(prefix+"connection_max_lifetime_seconds"), ) } func (store *Postgres3Store) initialize(createTable, user, password, hostname string, port int, database, schema, sslmode string, maxIdle, maxOpen, maxLifetimeSeconds int) (err error) { store.SupportBucketTable = true sqlUrl := fmt.Sprintf(CONNECTION_URL_PATTERN, hostname, port, sslmode) if user != "" { sqlUrl += " user=" + user } adaptedSqlUrl := sqlUrl if password != "" { sqlUrl += " password=" + password adaptedSqlUrl += " password=ADAPTED" } if database != "" { sqlUrl += " dbname=" + database adaptedSqlUrl += " dbname=" + database } if schema != "" { sqlUrl += " search_path=" + schema adaptedSqlUrl += " search_path=" + schema } var dbErr error store.DB, dbErr = sql.Open("postgres", sqlUrl) if dbErr != nil { store.DB.Close() store.DB = nil return fmt.Errorf("can not connect to %s error:%v", adaptedSqlUrl, err) } store.DB.SetMaxIdleConns(maxIdle) store.DB.SetMaxOpenConns(maxOpen) store.DB.SetConnMaxLifetime(time.Duration(maxLifetimeSeconds) * time.Second) if err = store.DB.Ping(); err != nil { return fmt.Errorf("connect to %s error:%v", sqlUrl, err) } if err = store.CreateTable(context.Background(), abstract_sql.DEFAULT_TABLE); err != nil { return fmt.Errorf("init table %s: %v", abstract_sql.DEFAULT_TABLE, err) } return nil } func (store *Postgres3Store) CanDropWholeBucket() bool { return store.SupportBucketTable } func (store *Postgres3Store) OnBucketCreation(bucket string) { store.dbsLock.Lock() defer store.dbsLock.Unlock() store.CreateTable(context.Background(), bucket) if store.dbs == nil { return } store.dbs[bucket] = true } func (store *Postgres3Store) OnBucketDeletion(bucket string) { store.dbsLock.Lock() defer store.dbsLock.Unlock() store.deleteTable(context.Background(), bucket) if store.dbs == nil { return } delete(store.dbs, bucket) } func (store *Postgres3Store) getTxOrDB(ctx context.Context, fullpath util.FullPath, isForChildren bool) (txOrDB abstract_sql.TxOrDB, bucket string, shortPath util.FullPath, err error) { shortPath = fullpath bucket = abstract_sql.DEFAULT_TABLE if tx, ok := ctx.Value("tx").(*sql.Tx); ok { txOrDB = tx } else { txOrDB = store.DB } if !store.SupportBucketTable { return } if !strings.HasPrefix(string(fullpath), "/buckets/") { return } // detect bucket bucketAndObjectKey := string(fullpath)[len("/buckets/"):] t := strings.Index(bucketAndObjectKey, "/") if t < 0 && !isForChildren { return } bucket = bucketAndObjectKey shortPath = "/" if t > 0 { bucket = bucketAndObjectKey[:t] shortPath = util.FullPath(bucketAndObjectKey[t:]) } if isValidBucket(bucket) { store.dbsLock.Lock() defer store.dbsLock.Unlock() if store.dbs == nil { store.dbs = make(map[string]bool) } if _, found := store.dbs[bucket]; !found { if err = store.CreateTable(ctx, bucket); err == nil { store.dbs[bucket] = true } } } return } func (store *Postgres3Store) InsertEntry(ctx context.Context, entry *filer.Entry) error { db, bucket, shortPath, err := store.getTxOrDB(ctx, entry.FullPath, false) if err != nil { return fmt.Errorf("findDB %s : %v", entry.FullPath, err) } if entry.IsDirectory() { if isValidBucket(bucket) && !strings.HasPrefix(string(shortPath), "/.uploads") { // Ignore directory creations, but not bucket creations or multipart uploads return nil } } meta, err := entry.EncodeAttributesAndChunks() if err != nil { return fmt.Errorf("encode %s: %s", entry.FullPath, err) } if len(entry.Chunks) > 50 { meta = util.MaybeGzipData(meta) } prefixes := calculatePrefixes(string(shortPath)) hashedPrefixes := hashPrefixArray(prefixes) _, err = db.ExecContext(ctx, fmt.Sprintf(insertEntryPattern, bucket), shortPath, path.Base(string(shortPath)), pq.Array(hashedPrefixes), meta) if err != nil { return fmt.Errorf("insert/upsert %s: %s", entry.FullPath, err) } return nil } func (store *Postgres3Store) UpdateEntry(ctx context.Context, entry *filer.Entry) error { return store.InsertEntry(ctx, entry) } func (store *Postgres3Store) FindEntry(ctx context.Context, fullpath util.FullPath) (*filer.Entry, error) { db, bucket, shortPath, err := store.getTxOrDB(ctx, fullpath, false) if err != nil { return nil, fmt.Errorf("findDB %s : %v", fullpath, err) } row := db.QueryRowContext(ctx, fmt.Sprintf(findEntryPattern, bucket), shortPath) var data []byte if err := row.Scan(&data); err != nil { if err == sql.ErrNoRows { return nil, filer_pb.ErrNotFound } return nil, fmt.Errorf("find %s: %v", fullpath, err) } entry := &filer.Entry{ FullPath: fullpath, } if err := entry.DecodeAttributesAndChunks(util.MaybeDecompressData(data)); err != nil { return entry, fmt.Errorf("decode %s : %v", entry.FullPath, err) } return entry, nil } func (store *Postgres3Store) DeleteEntry(ctx context.Context, fullpath util.FullPath) error { db, bucket, shortPath, err := store.getTxOrDB(ctx, fullpath, false) if err != nil { return fmt.Errorf("findDB %s : %v", fullpath, err) } res, err := db.ExecContext(ctx, fmt.Sprintf(deleteEntryPattern, bucket), shortPath) if err != nil { return fmt.Errorf("delete %s: %s", fullpath, err) } _, err = res.RowsAffected() if err != nil { return fmt.Errorf("delete %s but no rows affected: %s", fullpath, err) } return nil } func (store *Postgres3Store) DeleteFolderChildren(ctx context.Context, fullpath util.FullPath) (err error) { db, bucket, shortPath, err := store.getTxOrDB(ctx, fullpath, true) if err != nil { return fmt.Errorf("findDB %s : %v", fullpath, err) } if isValidBucket(bucket) && shortPath == "/" { if err = store.deleteTable(ctx, bucket); err == nil { store.dbsLock.Lock() delete(store.dbs, bucket) store.dbsLock.Unlock() return nil } else { return err } } sqlText := fmt.Sprintf(deleteFolderChildrenPattern, bucket) prefixes := calculatePrefixes(string(shortPath)) hashedPrefixes := hashPrefixArray(prefixes) glog.V(4).Infof("delete %s SQL %s %d", string(shortPath), sqlText, hashedPrefixes) res, err := db.ExecContext(ctx, sqlText, pq.Array(hashedPrefixes), string(shortPath)+"/%") if err != nil { return fmt.Errorf("deleteFolderChildren %s: %s", fullpath, err) } _, err = res.RowsAffected() if err != nil { return fmt.Errorf("deleteFolderChildren %s but no rows affected: %s", fullpath, err) } return nil } func (store *Postgres3Store) ListDirectoryEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { return store.ListDirectoryPrefixedEntries(ctx, dirPath, startFileName, includeStartFile, limit, "", nil) } func (store *Postgres3Store) ListDirectoryPrefixedEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, prefix string, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { db, bucket, shortPath, err := store.getTxOrDB(ctx, dirPath, true) if err != nil { return lastFileName, fmt.Errorf("findDB %s : %v", dirPath, err) } slashedShortPath := appendSlash(string(shortPath)) shortPathParts := len(strings.Split(slashedShortPath, "/")) sqlText := fmt.Sprintf(listEntryExclusivePattern, bucket, bucket) if includeStartFile { sqlText = fmt.Sprintf(listEntryInclusivePattern, bucket, bucket) } prefixes := calculatePrefixes(string(slashedShortPath)) hashedPrefixes := hashPrefixArray(prefixes) rows, err := db.QueryContext(ctx, sqlText, pq.Array(hashedPrefixes), shortPathParts, startFileName, prefix+"%", shortPathParts-1, limit+1) if err != nil { return lastFileName, fmt.Errorf("list %s : %v", dirPath, err) } defer rows.Close() for rows.Next() { var key string var name string var isDir bool var data []byte if err = rows.Scan(&key, &name, &isDir, &data); err != nil { glog.V(0).Infof("scan %s : %v", dirPath, err) return lastFileName, fmt.Errorf("scan %s: %v", dirPath, err) } if !isDir { lastFileName = name entry := &filer.Entry{ FullPath: util.NewFullPath(string(dirPath), name), } if err = entry.DecodeAttributesAndChunks(util.MaybeDecompressData(data)); err != nil { glog.V(0).Infof("scan decode %s : %v", entry.FullPath, err) return lastFileName, fmt.Errorf("scan decode %s : %v", entry.FullPath, err) } if !eachEntryFunc(entry) { break } } else { lastFileName = key dirName := key entry := &filer.Entry{ FullPath: util.NewFullPath(string(dirPath), dirName), } entry.Attr.Mode |= os.ModeDir | 0775 if !eachEntryFunc(entry) { break } } } return lastFileName, nil } func (store *Postgres3Store) BeginTransaction(ctx context.Context) (context.Context, error) { tx, err := store.DB.BeginTx(ctx, &sql.TxOptions{ Isolation: sql.LevelReadCommitted, ReadOnly: false, }) if err != nil { return ctx, err } return context.WithValue(ctx, "tx", tx), nil } func (store *Postgres3Store) CommitTransaction(ctx context.Context) error { if tx, ok := ctx.Value("tx").(*sql.Tx); ok { return tx.Commit() } return nil } func (store *Postgres3Store) RollbackTransaction(ctx context.Context) error { if tx, ok := ctx.Value("tx").(*sql.Tx); ok { return tx.Rollback() } return nil } func (store *Postgres3Store) Shutdown() { store.DB.Close() } func (store *Postgres3Store) CreateTable(ctx context.Context, bucket string) error { _, err := store.DB.ExecContext(ctx, fmt.Sprintf(createTablePattern, bucket)) if err != nil { return fmt.Errorf("create bucket table: %v", err) } _, err = store.DB.ExecContext(ctx, fmt.Sprintf(createTableIndexPattern, bucket)) if err != nil { return fmt.Errorf("create bucket index: %v", err) } return err } func (store *Postgres3Store) deleteTable(ctx context.Context, bucket string) error { if !store.SupportBucketTable { return nil } _, err := store.DB.ExecContext(ctx, fmt.Sprintf(deleteTablePattern, bucket)) return err } func isValidBucket(bucket string) bool { return bucket != abstract_sql.DEFAULT_TABLE && bucket != "" } // calculatePrefixes returns the prefixes for a given path. The root prefix "/" is ignored to // save space in the returned array func calculatePrefixes(fullPath string) []string { res := strings.Split(fullPath, "/") maxPrefixes := len(res) var retval []string for i := 1; i < maxPrefixes; i++ { calculatedPrefix := strings.Join(res[0:i], "/") + "/" if calculatedPrefix == "/" { continue } retval = append(retval, calculatedPrefix) } return retval } // hashPrefixArray converts input prefix array into int64 hashes func hashPrefixArray(a []string) []int64 { hashed := make([]int64, len(a)) for i := range a { hashed[i] = util.HashStringToLong(a[i]) } return hashed } func appendSlash(s string) string { if !strings.HasSuffix(s, "/") { return s + "/" } return s }