From 48db56ddade2ef076fae4498ca9357a280270bc5 Mon Sep 17 00:00:00 2001 From: gfx <86091021+gfxlabs@users.noreply.github.com> Date: Fri, 9 Sep 2022 11:43:42 -0500 Subject: [PATCH] arangodb s3 bucket name compatibility (#3588) * Update arangodb_store.go * update readme, properly escape queries, add name patching * use underscore * use underscore * better comment * fix readme Co-authored-by: a --- weed/filer/arangodb/arangodb_store.go | 10 ++++---- weed/filer/arangodb/helpers.go | 22 ++++++++++++++++-- weed/filer/arangodb/readme.md | 33 +++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 7 deletions(-) diff --git a/weed/filer/arangodb/arangodb_store.go b/weed/filer/arangodb/arangodb_store.go index 25ef60bf0..ab5f8db4f 100644 --- a/weed/filer/arangodb/arangodb_store.go +++ b/weed/filer/arangodb/arangodb_store.go @@ -66,8 +66,8 @@ func (store *ArangodbStore) Initialize(configuration util.Configuration, prefix } func (store *ArangodbStore) connection(uris []string, user string, pass string, insecure bool) (err error) { - ctx, _ := context.WithTimeout(context.Background(), 10*time.Second) - + ctx, cn := context.WithTimeout(context.Background(), 10*time.Second) + defer cn() store.connect, err = http.NewConnection(http.ConnectionConfig{ Endpoints: uris, TLSConfig: &tls.Config{ @@ -274,10 +274,10 @@ func (store *ArangodbStore) DeleteFolderChildren(ctx context.Context, fullpath u for d in %s filter starts_with(d.directory, "%s/") || d.directory == "%s" remove d._key in %s`, - targetCollection.Name(), + "`"+targetCollection.Name()+"`", strings.Join(strings.Split(string(fullpath), "/"), ","), string(fullpath), - targetCollection.Name(), + "`"+targetCollection.Name()+"`", ) cur, err := store.database.Query(ctx, query, nil) if err != nil { @@ -296,7 +296,7 @@ func (store *ArangodbStore) ListDirectoryPrefixedEntries(ctx context.Context, di if err != nil { return lastFileName, err } - query := "for d in " + targetCollection.Name() + query := "for d in " + "`" + targetCollection.Name() + "`" if includeStartFile { query = query + " filter d.name >= \"" + startFileName + "\" " } else { diff --git a/weed/filer/arangodb/helpers.go b/weed/filer/arangodb/helpers.go index 3f36acb0a..776e6d1b8 100644 --- a/weed/filer/arangodb/helpers.go +++ b/weed/filer/arangodb/helpers.go @@ -12,7 +12,7 @@ import ( "github.com/seaweedfs/seaweedfs/weed/util" ) -//convert a string into arango-key safe hex bytes hash +// convert a string into arango-key safe hex bytes hash func hashString(dir string) string { h := md5.New() io.WriteString(h, dir) @@ -98,8 +98,26 @@ func (store *ArangodbStore) ensureBucket(ctx context.Context, bucket string) (bc return store.buckets[bucket], nil } +// transform to an arango compliant name +func bucketToCollectionName(s string) string { + if len(s) == 0 { + return "" + } + // replace all "." with _ + s = strings.ReplaceAll(s, ".", "_") + + // if starts with number or '.' then add a special prefix + if (s[0] >= '0' && s[0] <= '9') || (s[0] == '.' || s[0] == '_' || s[0] == '-') { + s = "xN--" + s + } + return s +} + // creates collection if not exist, ensures indices if not exist -func (store *ArangodbStore) ensureCollection(ctx context.Context, name string) (c driver.Collection, err error) { +func (store *ArangodbStore) ensureCollection(ctx context.Context, bucket_name string) (c driver.Collection, err error) { + // convert the bucket to collection name + name := bucketToCollectionName(bucket_name) + ok, err := store.database.CollectionExists(ctx, name) if err != nil { return diff --git a/weed/filer/arangodb/readme.md b/weed/filer/arangodb/readme.md index e189811fb..57a594592 100644 --- a/weed/filer/arangodb/readme.md +++ b/weed/filer/arangodb/readme.md @@ -22,6 +22,39 @@ i test using this dev database: `docker run -p 8529:8529 -e ARANGO_ROOT_PASSWORD=test arangodb/arangodb:3.9.0` + +## database structure + + +arangodb has a few restrictions which require the use of a few tricks in order to losslessly store the data. + +### filer store + +arangodb does not support []byte, and will store such as a uint64 array. this would be a waste of space. to counteract this, we store the data as a length prefixed uint64 byteset. + +### filer kv + +same as above + +### filer buckets + +s3 buckets are implemented through arangodb collection. this allows us to do very fast bucket deletion by simply deleting the collection + + +arangodb collection name rules is character set `azAZ09_-` with a 256 character max. however the first character must be a letter. + + +s3 bucket name rule is the set `azAZ09.-` with a 63 characters max. + +the rules for collection names is then the following: + +1. if the bucket name is a valid arangodb collection name, then nothing is done. +2. if the bucket name contains a ".", the "." is replaced with "_" +3. if the bucket name now begins with a number or "_", the prefix "xN--" is prepended to the collection name + +this allows for these collection names to be used. + + ## features i don't personally need but are missing [ ] provide tls cert to arango [ ] authentication that is not basic auth