Browse Source

fix(tikv): replace DeleteRange with transaction-based batch deletes (#7557)

* fix(tikv): replace DeleteRange with transaction-based batch deletes

Fixes #7187

Problem:
TiKV's DeleteRange API is a RawKV operation that bypasses transaction
isolation. When SeaweedFS filer uses TiKV with txn client and another
service uses RawKV client on the same cluster, DeleteFolderChildren
can accidentally delete KV pairs from the RawKV client because
DeleteRange operates at the raw key level without respecting
transaction boundaries.

Reproduction:
1. SeaweedFS filer using TiKV txn client for metadata
2. Another service using rawkv client on same TiKV cluster
3. Filer performs batch file deletion via DeleteFolderChildren
4. Result: ~50% of rawkv client's KV pairs get deleted

Solution:
Replace client.DeleteRange() (RawKV API) with transactional batch
deletes using txn.Delete() within transactions. This ensures:
- Transaction isolation - operations respect TiKV's MVCC boundaries
- Keyspace separation - txn client and RawKV client stay isolated
- Proper key handling - keys are copied to avoid iterator reuse issues
- Batch processing - deletes batched (10K default) to manage memory

Changes:
1. Core data structure:
   - Removed deleteRangeConcurrency field
   - Added batchCommitSize field (configurable, default 10000)

2. DeleteFolderChildren rewrite:
   - Replaced DeleteRange with iterative batch deletes
   - Added proper transaction lifecycle management
   - Implemented key copying to avoid iterator buffer reuse
   - Added batching to prevent memory exhaustion

3. New deleteBatch helper:
   - Handles transaction creation and lifecycle
   - Batches deletes within single transaction
   - Properly commits/rolls back based on context

4. Context propagation:
   - Updated RunInTxn to accept context parameter
   - All RunInTxn call sites now pass context
   - Enables proper timeout/cancellation handling

5. Configuration:
   - Removed deleterange_concurrency setting
   - Added batchdelete_count setting (default 10000)

All critical review comments from PR #7188 have been addressed:
- Proper key copying with append([]byte(nil), key...)
- Conditional transaction rollback based on inContext flag
- Context propagation for commits
- Proper transaction lifecycle management
- Configurable batch size

Co-authored-by: giftz <giftz@users.noreply.github.com>

* fix: remove extra closing brace causing syntax error in tikv_store.go

---------

Co-authored-by: giftz <giftz@users.noreply.github.com>
pull/7558/head
Chris Lu 3 days ago
committed by GitHub
parent
commit
5287d9f3e3
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 5
      weed/command/scaffold/filer.toml
  2. 92
      weed/filer/tikv/tikv_store.go
  3. 6
      weed/filer/tikv/tikv_store_kv.go

5
weed/command/scaffold/filer.toml

@ -381,10 +381,11 @@ enabled = false
# If you have many pd address, use ',' split then: # If you have many pd address, use ',' split then:
# pdaddrs = "pdhost1:2379, pdhost2:2379, pdhost3:2379" # pdaddrs = "pdhost1:2379, pdhost2:2379, pdhost3:2379"
pdaddrs = "localhost:2379" pdaddrs = "localhost:2379"
# Concurrency for TiKV delete range
deleterange_concurrency = 1
# Enable 1PC # Enable 1PC
enable_1pc = false enable_1pc = false
# batch delete count, default 10000 in code
#batchdelete_count = 20000
# Set the CA certificate path # Set the CA certificate path
ca_path="" ca_path=""
# Set the certificate path # Set the certificate path

92
weed/filer/tikv/tikv_store.go

@ -20,6 +20,8 @@ import (
"github.com/tikv/client-go/v2/txnkv" "github.com/tikv/client-go/v2/txnkv"
) )
const defaultBatchCommitSize = 10000
var ( var (
_ filer.FilerStore = ((*TikvStore)(nil)) _ filer.FilerStore = ((*TikvStore)(nil))
) )
@ -30,8 +32,8 @@ func init() {
type TikvStore struct { type TikvStore struct {
client *txnkv.Client client *txnkv.Client
deleteRangeConcurrency int
onePC bool onePC bool
batchCommitSize int
} }
// Basic APIs // Basic APIs
@ -46,12 +48,13 @@ func (store *TikvStore) Initialize(config util.Configuration, prefix string) err
verify_cn := strings.Split(config.GetString(prefix+"verify_cn"), ",") verify_cn := strings.Split(config.GetString(prefix+"verify_cn"), ",")
pdAddrs := strings.Split(config.GetString(prefix+"pdaddrs"), ",") pdAddrs := strings.Split(config.GetString(prefix+"pdaddrs"), ",")
drc := config.GetInt(prefix + "deleterange_concurrency")
if drc <= 0 {
drc = 1
bdc := config.GetInt(prefix + "batchdelete_count")
if bdc <= 0 {
bdc = defaultBatchCommitSize
} }
store.onePC = config.GetBool(prefix + "enable_1pc") store.onePC = config.GetBool(prefix + "enable_1pc")
store.deleteRangeConcurrency = drc
store.batchCommitSize = bdc
return store.initialize(ca, cert, key, verify_cn, pdAddrs) return store.initialize(ca, cert, key, verify_cn, pdAddrs)
} }
@ -86,7 +89,7 @@ func (store *TikvStore) InsertEntry(ctx context.Context, entry *filer.Entry) err
if err != nil { if err != nil {
return err return err
} }
err = txn.RunInTxn(func(txn *txnkv.KVTxn) error {
err = txn.RunInTxn(ctx, func(txn *txnkv.KVTxn) error {
return txn.Set(key, value) return txn.Set(key, value)
}) })
if err != nil { if err != nil {
@ -108,7 +111,7 @@ func (store *TikvStore) FindEntry(ctx context.Context, path util.FullPath) (*fil
return nil, err return nil, err
} }
var value []byte = nil var value []byte = nil
err = txn.RunInTxn(func(txn *txnkv.KVTxn) error {
err = txn.RunInTxn(ctx, func(txn *txnkv.KVTxn) error {
val, err := txn.Get(context.TODO(), key) val, err := txn.Get(context.TODO(), key)
if err == nil { if err == nil {
value = val value = val
@ -143,7 +146,7 @@ func (store *TikvStore) DeleteEntry(ctx context.Context, path util.FullPath) err
return err return err
} }
err = txn.RunInTxn(func(txn *txnkv.KVTxn) error {
err = txn.RunInTxn(ctx, func(txn *txnkv.KVTxn) error {
return txn.Delete(key) return txn.Delete(key)
}) })
if err != nil { if err != nil {
@ -158,54 +161,76 @@ func (store *TikvStore) DeleteEntry(ctx context.Context, path util.FullPath) err
func (store *TikvStore) DeleteFolderChildren(ctx context.Context, path util.FullPath) error { func (store *TikvStore) DeleteFolderChildren(ctx context.Context, path util.FullPath) error {
directoryPrefix := genDirectoryKeyPrefix(path, "") directoryPrefix := genDirectoryKeyPrefix(path, "")
txn, err := store.getTxn(ctx)
iterTxn, err := store.getTxn(ctx)
if err != nil { if err != nil {
return err return err
} }
var (
startKey []byte = nil
endKey []byte = nil
)
err = txn.RunInTxn(func(txn *txnkv.KVTxn) error {
iter, err := txn.Iter(directoryPrefix, nil)
if !iterTxn.inContext {
defer func() {
_ = iterTxn.Rollback()
}()
}
iter, err := iterTxn.Iter(directoryPrefix, nil)
if err != nil { if err != nil {
return err return err
} }
defer iter.Close() defer iter.Close()
var keys [][]byte
for iter.Valid() { for iter.Valid() {
key := iter.Key() key := iter.Key()
endKey = key
if !bytes.HasPrefix(key, directoryPrefix) { if !bytes.HasPrefix(key, directoryPrefix) {
break break
} }
if startKey == nil {
startKey = key
keys = append(keys, append([]byte(nil), key...))
if len(keys) >= store.batchCommitSize {
if err := store.deleteBatch(ctx, keys); err != nil {
return fmt.Errorf("delete batch in %s, error: %v", path, err)
}
keys = keys[:0]
} }
err = iter.Next()
if err != nil {
if err := iter.Next(); err != nil {
return err return err
} }
} }
// Only one Key matched just delete it.
if startKey != nil && bytes.Equal(startKey, endKey) {
return txn.Delete(startKey)
if len(keys) > 0 {
if err := store.deleteBatch(ctx, keys); err != nil {
return fmt.Errorf("delete batch in %s, error: %v", path, err)
}
} }
return nil return nil
})
if err != nil {
return fmt.Errorf("delete %s : %v", path, err)
} }
if startKey != nil && endKey != nil && !bytes.Equal(startKey, endKey) {
// has startKey and endKey and they are not equals, so use delete range
_, err = store.client.DeleteRange(context.Background(), startKey, endKey, store.deleteRangeConcurrency)
func (store *TikvStore) deleteBatch(ctx context.Context, keys [][]byte) error {
deleteTxn, err := store.getTxn(ctx)
if err != nil { if err != nil {
return fmt.Errorf("delete %s : %v", path, err)
return err
} }
if !deleteTxn.inContext {
defer func() { _ = deleteTxn.Rollback() }()
} }
for _, key := range keys {
if err := deleteTxn.Delete(key); err != nil {
return err return err
} }
}
if !deleteTxn.inContext {
return deleteTxn.Commit(ctx)
}
return nil
}
func (store *TikvStore) ListDirectoryEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, eachEntryFunc filer.ListEachEntryFunc) (string, error) { func (store *TikvStore) ListDirectoryEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, eachEntryFunc filer.ListEachEntryFunc) (string, error) {
return store.ListDirectoryPrefixedEntries(ctx, dirPath, startFileName, includeStartFile, limit, "", eachEntryFunc) return store.ListDirectoryPrefixedEntries(ctx, dirPath, startFileName, includeStartFile, limit, "", eachEntryFunc)
@ -224,7 +249,7 @@ func (store *TikvStore) ListDirectoryPrefixedEntries(ctx context.Context, dirPat
return lastFileName, err return lastFileName, err
} }
var callbackErr error var callbackErr error
err = txn.RunInTxn(func(txn *txnkv.KVTxn) error {
err = txn.RunInTxn(ctx, func(txn *txnkv.KVTxn) error {
iter, err := txn.Iter(lastFileStart, nil) iter, err := txn.Iter(lastFileStart, nil)
if err != nil { if err != nil {
return err return err
@ -353,15 +378,14 @@ type TxnWrapper struct {
inContext bool inContext bool
} }
func (w *TxnWrapper) RunInTxn(f func(txn *txnkv.KVTxn) error) error {
func (w *TxnWrapper) RunInTxn(ctx context.Context, f func(txn *txnkv.KVTxn) error) error {
err := f(w.KVTxn) err := f(w.KVTxn)
if !w.inContext { if !w.inContext {
if err != nil { if err != nil {
w.KVTxn.Rollback() w.KVTxn.Rollback()
return err return err
} }
w.KVTxn.Commit(context.Background())
return nil
return w.KVTxn.Commit(ctx)
} }
return err return err
} }

6
weed/filer/tikv/tikv_store_kv.go

@ -15,7 +15,7 @@ func (store *TikvStore) KvPut(ctx context.Context, key []byte, value []byte) err
if err != nil { if err != nil {
return err return err
} }
return tw.RunInTxn(func(txn *txnkv.KVTxn) error {
return tw.RunInTxn(ctx, func(txn *txnkv.KVTxn) error {
return txn.Set(key, value) return txn.Set(key, value)
}) })
} }
@ -26,7 +26,7 @@ func (store *TikvStore) KvGet(ctx context.Context, key []byte) ([]byte, error) {
return nil, err return nil, err
} }
var data []byte = nil var data []byte = nil
err = tw.RunInTxn(func(txn *txnkv.KVTxn) error {
err = tw.RunInTxn(ctx, func(txn *txnkv.KVTxn) error {
val, err := txn.Get(context.TODO(), key) val, err := txn.Get(context.TODO(), key)
if err == nil { if err == nil {
data = val data = val
@ -44,7 +44,7 @@ func (store *TikvStore) KvDelete(ctx context.Context, key []byte) error {
if err != nil { if err != nil {
return err return err
} }
return tw.RunInTxn(func(txn *txnkv.KVTxn) error {
return tw.RunInTxn(ctx, func(txn *txnkv.KVTxn) error {
return txn.Delete(key) return txn.Delete(key)
}) })
} }
Loading…
Cancel
Save