From b6d70a59023af60cbf2d16c19ec51be29a200043 Mon Sep 17 00:00:00 2001 From: Antonio SJ Musumeci Date: Wed, 11 Mar 2026 19:15:11 -0500 Subject: [PATCH] Rework string utils, more test cases --- src/branches.cpp | 13 ++- src/config.cpp | 5 +- src/config_set.cpp | 3 +- src/fs_xattr.cpp | 8 +- src/mergerfs_api.cpp | 2 +- src/str.cpp | 240 ++++++++++++++++++++----------------------- src/str.hpp | 111 +++++++++++--------- tests/tests.cpp | 207 ++++++++++++++++++++++++++++++++++--- 8 files changed, 377 insertions(+), 212 deletions(-) diff --git a/src/branches.cpp b/src/branches.cpp index a468da0b..716578e9 100644 --- a/src/branches.cpp +++ b/src/branches.cpp @@ -135,7 +135,7 @@ namespace l std::string options; std::vector v; - str::rsplit1(str_,'=',&v); + v = str::rsplit1(str_,'='); switch(v.size()) { case 1: @@ -145,8 +145,7 @@ namespace l case 2: *glob_ = v[0]; options = v[1]; - v.clear(); - str::split(options,',',&v); + v = str::split(options,','); switch(v.size()) { case 2: @@ -229,7 +228,7 @@ namespace l StrVec paths; Branches::Impl tmp_branches(branches_->minfreespace()); - str::split(str_,':',&paths); + paths = str::split(str_,':'); for(auto &path : paths) { rv = l::parse(path,&tmp_branches); @@ -251,7 +250,7 @@ namespace l std::vector paths; Branches::Impl tmp_branches(branches_->minfreespace()); - str::split(str_,':',&paths); + paths = str::split(str_,':'); for(auto &path : paths) { rv = l::parse(path,&tmp_branches); @@ -275,7 +274,7 @@ namespace l StrVec paths; Branches::Impl tmp_branches(branches_->minfreespace()); - str::split(str_,':',&paths); + paths = str::split(str_,':'); for(auto &path : paths) { rv = l::parse(path,&tmp_branches); @@ -315,7 +314,7 @@ namespace l { StrVec patterns; - str::split(str_,':',&patterns); + patterns = str::split(str_,':'); for(auto i = branches_->begin(); i != branches_->end();) { int match = FNM_NOMATCH; diff --git a/src/config.cpp b/src/config.cpp index 2304ae3f..6569ca4b 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -430,10 +430,7 @@ Config::set(const std::string &key_, int Config::set(const std::string &kv_) { - std::string key; - std::string val; - - str::splitkv(kv_,'=',&key,&val); + auto [key, val] = str::splitkv(kv_,'='); key = str::trim(key); val = str::trim(val); diff --git a/src/config_set.cpp b/src/config_set.cpp index eee62454..82f40c72 100644 --- a/src/config_set.cpp +++ b/src/config_set.cpp @@ -37,7 +37,8 @@ ConfigSet::from_string(const std::string_view str_) { this->clear(); - str::split(str_,'|',this); + auto tmp = str::split_to_set(str_,'|'); + this->insert(tmp.begin(), tmp.end()); return 0; } diff --git a/src/fs_xattr.cpp b/src/fs_xattr.cpp index e1c4e188..7615245d 100644 --- a/src/fs_xattr.cpp +++ b/src/fs_xattr.cpp @@ -90,8 +90,8 @@ fs::xattr::list(const int fd_, rv = fs::xattr::list(fd_,&attrs); if(rv > 0) { - string tmp(attrs.begin(),attrs.end()); - str::split(tmp,'\0',attrvector_); + string tmp(attrs.begin(),attrs.end()); + *attrvector_ = str::split(tmp,'\0'); } return rv; @@ -107,8 +107,8 @@ fs::xattr::list(const string &path_, rv = fs::xattr::list(path_,&attrs); if(rv > 0) { - string tmp(attrs.begin(),attrs.end()); - str::split(tmp,'\0',attrvector_); + string tmp(attrs.begin(),attrs.end()); + *attrvector_ = str::split(tmp,'\0'); } return rv; diff --git a/src/mergerfs_api.cpp b/src/mergerfs_api.cpp index 5c335dc1..ef9a8212 100644 --- a/src/mergerfs_api.cpp +++ b/src/mergerfs_api.cpp @@ -85,7 +85,7 @@ mergerfs::api::allpaths(const std::string &input_path_, if(rv < 0) return rv; - str::split_on_null(val,&output_paths_); + output_paths_ = str::split_on_null(val); return 0; } diff --git a/src/str.cpp b/src/str.cpp index b82b0ef0..5889d92d 100644 --- a/src/str.cpp +++ b/src/str.cpp @@ -26,141 +26,127 @@ #include #include #include +#include #include #include -using std::istringstream; using std::set; using std::string; using std::string_view; using std::vector; -void -str::split(const string_view str_, - const char delimiter_, - vector *result_) +std::vector +str::split(const string_view str_, + const char delimiter_) { - size_t pos; - size_t start; - size_t length; + std::vector result; - assert(result_ != nullptr); if(str_.empty()) - return; + return result; - start = 0; - pos = str_.find(delimiter_,start); + size_t start = 0; + size_t pos = str_.find(delimiter_, start); while(pos != std::string_view::npos) { - length = (pos - start); - result_->push_back(std::string{str_.substr(start,length)}); + size_t length = (pos - start); + result.emplace_back(str_.substr(start, length)); start = pos + 1; - pos = str_.find(delimiter_,start); + pos = str_.find(delimiter_, start); } - result_->push_back(std::string{str_.substr(start)}); + result.emplace_back(str_.substr(start)); + + return result; } -void -str::split(const string_view str_, - const char delimiter_, - set *result_) +std::set +str::split_to_set(const string_view str_, + const char delimiter_) { - size_t pos; - size_t start; - size_t length; + std::set result; - assert(result_ != nullptr); if(str_.empty()) - return; + return result; - start = 0; - pos = str_.find(delimiter_,start); + size_t start = 0; + size_t pos = str_.find(delimiter_, start); while(pos != std::string_view::npos) { - length = (pos - start); - result_->insert(std::string{str_.substr(start,length)}); + size_t length = (pos - start); + result.emplace(str_.substr(start, length)); start = pos + 1; - pos = str_.find(delimiter_,start); + pos = str_.find(delimiter_, start); } - result_->insert(std::string{str_.substr(start)}); + result.emplace(str_.substr(start)); + + return result; } -void -str::split_on_null(const std::string_view str_, - std::vector *result_) +std::vector +str::split_on_null(const std::string_view str_) { - return str::split(str_,'\0',result_); + return str::split(str_, '\0'); } -void -str::lsplit1(const string_view &str_, - const char delimiter_, - vector *result_) +std::vector +str::lsplit1(const string_view str_, + const char delimiter_) { - std::size_t off; + std::vector result; - assert(result_ != nullptr); if(str_.empty()) - return; + return result; - off = str_.find(delimiter_); + std::size_t off = str_.find(delimiter_); if(off == std::string_view::npos) { - result_->push_back(std::string{str_}); + result.emplace_back(str_); } else { - result_->push_back(std::string{str_.substr(0,off)}); - result_->push_back(std::string{str_.substr(off+1)}); + result.emplace_back(str_.substr(0, off)); + result.emplace_back(str_.substr(off + 1)); } + + return result; } -void -str::rsplit1(const string_view &str_, - const char delimiter_, - vector *result_) +std::vector +str::rsplit1(const string_view str_, + const char delimiter_) { - std::size_t off; + std::vector result; - assert(result_ != nullptr); if(str_.empty()) - return; + return result; - off = str_.rfind(delimiter_); - if(off == std::string::npos) + std::size_t off = str_.rfind(delimiter_); + if(off == std::string_view::npos) { - result_->push_back(std::string{str_}); + result.emplace_back(str_); } else { - result_->push_back(std::string{str_.substr(0,off)}); - result_->push_back(std::string{str_.substr(off+1)}); + result.emplace_back(str_.substr(0, off)); + result.emplace_back(str_.substr(off + 1)); } + + return result; } -void -str::splitkv(const std::string_view str_, - const char delimiter_, - std::string *key_, - std::string *val_) +std::pair +str::splitkv(const std::string_view str_, + const char delimiter_) { - size_t pos; - - pos = str_.find(delimiter_); + std::size_t pos = str_.find(delimiter_); if(pos != std::string_view::npos) - { - *key_ = str_.substr(0, pos); - *val_ = str_.substr(pos + 1); - } - else - { - *key_ = str_; - *val_ = ""; - } + return {std::string{str_.substr(0, pos)}, + std::string{str_.substr(pos + 1)}}; + + return {std::string{str_}, std::string{}}; } string @@ -182,7 +168,7 @@ string str::join(const vector &vec_, const char sep_) { - return str::join(vec_,0,sep_); + return str::join(vec_, 0, sep_); } string @@ -191,9 +177,14 @@ str::join(const set &set_, { string rv; + if(set_.empty()) + return {}; + for(auto const &s : set_) rv += s + sep_; - rv.pop_back(); + + if(!rv.empty()) + rv.pop_back(); return rv; } @@ -222,11 +213,9 @@ str::longest_common_prefix_index(const vector &vec_) string str::longest_common_prefix(const vector &vec_) { - size_t idx; - - idx = longest_common_prefix_index(vec_); + size_t idx = longest_common_prefix_index(vec_); if(idx != string::npos) - return vec_[0].substr(0,idx); + return vec_[0].substr(0, idx); return string(); } @@ -235,32 +224,27 @@ string str::remove_common_prefix_and_join(const vector &vec_, const char sep_) { - size_t idx; - - idx = str::longest_common_prefix_index(vec_); + size_t idx = str::longest_common_prefix_index(vec_); if(idx == string::npos) idx = 0; - return str::join(vec_,idx,sep_); + return str::join(vec_, idx, sep_); } void str::erase_fnmatches(const vector &patterns_, vector &strs_) { - vector::iterator si; - vector::const_iterator pi; - - si = strs_.begin(); + auto si = strs_.begin(); while(si != strs_.end()) { int match = FNM_NOMATCH; - for(pi = patterns_.begin(); + for(auto pi = patterns_.begin(); pi != patterns_.end() && match != 0; ++pi) { - match = fnmatch(pi->c_str(),si->c_str(),0); + match = fnmatch(pi->c_str(), si->c_str(), 0); } if(match == 0) @@ -271,58 +255,57 @@ str::erase_fnmatches(const vector &patterns_, } bool -str::isprefix(const string &s0_, - const string &s1_) -{ - return ((s0_.size() >= s1_.size()) && - (s0_.compare(0,s1_.size(),s1_) == 0)); -} - -bool -str::startswith(const string &str_, - const string &prefix_) +str::startswith(const string_view str_, + const string_view prefix_) noexcept { - return ((str_.size() >= prefix_.size()) && - (str_.compare(0,prefix_.size(),prefix_) == 0)); + if(prefix_.size() > str_.size()) + return false; + return str_.compare(0, prefix_.size(), prefix_) == 0; } bool -str::endswith(const string &str_, - const string &suffix_) +str::endswith(const string_view str_, + const string_view suffix_) noexcept { if(suffix_.size() > str_.size()) return false; - - return std::equal(suffix_.rbegin(), - suffix_.rend(), - str_.rbegin()); + return str_.compare(str_.size() - suffix_.size(), + suffix_.size(), + suffix_) == 0; } std::string -str::trim(const std::string &str_) +str::trim(const std::string_view str_) { - std::string rv; + if(str_.empty()) + return std::string{}; - rv = str_; + auto start = str_.begin(); + auto end = str_.end(); - while(!rv.empty() && std::isspace(static_cast(rv[0]))) - rv.erase(0,1); - while(!rv.empty() && std::isspace(static_cast(rv[rv.size()-1]))) - rv.erase(rv.size()-1,1); + while(start != end && std::isspace(static_cast(*start))) + ++start; - return rv; + if(start == end) + return std::string{}; + + --end; + while(end != start && std::isspace(static_cast(*end))) + --end; + + return std::string{start, end + 1}; } bool str::eq(const char *s0_, - const char *s1_) + const char *s1_) noexcept { - return (strcmp(s0_,s1_) == 0); + return (std::strcmp(s0_, s1_) == 0); } bool str::startswith(const char *s_, - const char *p_) + const char *p_) noexcept { while(*p_) { @@ -337,16 +320,15 @@ str::startswith(const char *s_, } std::string -str::replace(const std::string &s_, - const char src_, - const char dst_) +str::replace(const std::string_view str_, + const char src_, + const char dst_) { - std::string s(s_); + std::string result; + result.reserve(str_.size()); - std::replace(s.begin(), - s.end(), - src_, - dst_); + for(char c : str_) + result.push_back(c == src_ ? dst_ : c); - return s; + return result; } diff --git a/src/str.hpp b/src/str.hpp index 9000ba16..1050926e 100644 --- a/src/str.hpp +++ b/src/str.hpp @@ -20,92 +20,101 @@ #include #include +#include +#include #include namespace str { - void - split(const std::string_view str, - const char delimiter, - std::vector *result); - - void - split(const std::string_view str, - const char delimiter, - std::set *result); - - void - split_on_null(const std::string_view str, - std::vector *result); - - void - lsplit1(const std::string_view &str, - const char delimiter, - std::vector *result); - - void - rsplit1(const std::string_view &str, - const char delimiter, - std::vector *result); - - void - splitkv(const std::string_view str, - const char delimiter, - std::string *key, - std::string *value); - + [[nodiscard]] + std::vector + split(const std::string_view str, + const char delimiter); + + [[nodiscard]] + std::set + split_to_set(const std::string_view str, + const char delimiter); + + [[nodiscard]] + std::vector + split_on_null(const std::string_view str); + + [[nodiscard]] + std::vector + lsplit1(const std::string_view str, + const char delimiter); + + [[nodiscard]] + std::vector + rsplit1(const std::string_view str, + const char delimiter); + + [[nodiscard]] + std::pair + splitkv(const std::string_view str, + const char delimiter); + + [[nodiscard]] std::string join(const std::vector &vec, - const size_t substridx, - const char sep); + const size_t substridx, + const char sep); + [[nodiscard]] std::string join(const std::vector &vec, - const char sep); + const char sep); + [[nodiscard]] std::string join(const std::set &s, - const char sep); + const char sep); + [[nodiscard]] size_t longest_common_prefix_index(const std::vector &vec); + [[nodiscard]] std::string longest_common_prefix(const std::vector &vec); + [[nodiscard]] std::string remove_common_prefix_and_join(const std::vector &vec, - const char sep); + const char sep); void - erase_fnmatches(const std::vector &pattern, - std::vector &strs); - - bool - isprefix(const std::string &s0, - const std::string &s1); + erase_fnmatches(const std::vector &patterns, + std::vector &strs); + [[nodiscard]] bool - startswith(const std::string &str_, - const std::string &prefix_); + startswith(const std::string_view str, + const std::string_view prefix) noexcept; + [[nodiscard]] bool startswith(const char *str, - const char *prefix); + const char *prefix) noexcept; + [[nodiscard]] bool - endswith(const std::string &str_, - const std::string &suffix_); + endswith(const std::string_view str, + const std::string_view suffix) noexcept; + [[nodiscard]] std::string - trim(const std::string &str); + trim(const std::string_view str); + [[nodiscard]] bool eq(const char *s0, - const char *s1); + const char *s1) noexcept; + [[nodiscard]] std::string - replace(const std::string &s, - const char src, - const char dst); + replace(const std::string_view str, + const char src, + const char dst); } diff --git a/tests/tests.cpp b/tests/tests.cpp index 6a128e56..3ef94908 100644 --- a/tests/tests.cpp +++ b/tests/tests.cpp @@ -28,7 +28,7 @@ test_config_bool() void test_config_uint64() { - ConfigUINT64 v; + ConfigU64 v; TEST_CHECK(v.from_string("0") == 0); TEST_CHECK(v == (uint64_t)0); @@ -79,45 +79,222 @@ void test_str_stuff() { std::vector v; + std::set s; + std::pair kv; - v.clear(); - str::split("",':',&v); + // split() - vector + v = str::split("",':'); TEST_CHECK(v.size() == 0); - v.clear(); - str::split("a:b:c",':',&v); + v = str::split("a:b:c",':'); TEST_CHECK(v.size() == 3); TEST_CHECK(v[0] == "a"); TEST_CHECK(v[1] == "b"); TEST_CHECK(v[2] == "c"); - v.clear(); - str::split("a::b:c",':',&v); + v = str::split("a::b:c",':'); TEST_CHECK(v.size() == 4); TEST_CHECK(v[0] == "a"); TEST_CHECK(v[1] == ""); TEST_CHECK(v[2] == "b"); TEST_CHECK(v[3] == "c"); - v.clear(); - str::lsplit1("foo=bar=baz",'=',&v); + v = str::split("single",':'); + TEST_CHECK(v.size() == 1); + TEST_CHECK(v[0] == "single"); + + // split_to_set() + s = str::split_to_set("",':'); + TEST_CHECK(s.size() == 0); + + s = str::split_to_set("a:b:c",':'); + TEST_CHECK(s.size() == 3); + TEST_CHECK(s.count("a") == 1); + TEST_CHECK(s.count("b") == 1); + TEST_CHECK(s.count("c") == 1); + + s = str::split_to_set("a:a:b",':'); + TEST_CHECK(s.size() == 2); + TEST_CHECK(s.count("a") == 1); + TEST_CHECK(s.count("b") == 1); + + // split_on_null() + std::string nullstr = "hello"; + nullstr += '\0'; + nullstr += "world"; + nullstr += '\0'; + nullstr += "test"; + v = str::split_on_null(std::string_view(nullstr.data(), nullstr.size())); + TEST_CHECK(v.size() == 3); + TEST_CHECK(v[0] == "hello"); + TEST_CHECK(v[1] == "world"); + TEST_CHECK(v[2] == "test"); + + v = str::split_on_null(""); + TEST_CHECK(v.size() == 0); + + // lsplit1() + v = str::lsplit1("foo=bar=baz",'='); TEST_CHECK(v.size() == 2); TEST_CHECK(v[0] == "foo"); TEST_CHECK(v[1] == "bar=baz"); - v.clear(); - str::lsplit1("",'=',&v); + v = str::lsplit1("",'='); TEST_CHECK(v.size() == 0); - v.clear(); - str::rsplit1("foo=bar=baz",'=',&v); + v = str::lsplit1("no_delimiter",'='); + TEST_CHECK(v.size() == 1); + TEST_CHECK(v[0] == "no_delimiter"); + + // rsplit1() + v = str::rsplit1("foo=bar=baz",'='); TEST_CHECK(v.size() == 2); TEST_CHECK(v[0] == "foo=bar"); TEST_CHECK(v[1] == "baz"); - v.clear(); - str::rsplit1("",'=',&v); + v = str::rsplit1("",'='); TEST_CHECK(v.size() == 0); + + v = str::rsplit1("no_delimiter",'='); + TEST_CHECK(v.size() == 1); + TEST_CHECK(v[0] == "no_delimiter"); + + // splitkv() + kv = str::splitkv("key=value", '='); + TEST_CHECK(kv.first == "key"); + TEST_CHECK(kv.second == "value"); + + kv = str::splitkv("key=", '='); + TEST_CHECK(kv.first == "key"); + TEST_CHECK(kv.second == ""); + + kv = str::splitkv("key", '='); + TEST_CHECK(kv.first == "key"); + TEST_CHECK(kv.second == ""); + + kv = str::splitkv("", '='); + TEST_CHECK(kv.first == ""); + TEST_CHECK(kv.second == ""); + + // join() - vector with substr + v = {"abc", "def", "ghi"}; + std::string joined = str::join(v, 1, ':'); + TEST_CHECK(joined == "bc:ef:hi"); + + // join() - vector + joined = str::join(v, ':'); + TEST_CHECK(joined == "abc:def:ghi"); + + v = {"single"}; + joined = str::join(v, ':'); + TEST_CHECK(joined == "single"); + + v = {}; + joined = str::join(v, ':'); + TEST_CHECK(joined == ""); + + // join() - set + s = {"a", "b", "c"}; + joined = str::join(s, ':'); + TEST_CHECK(joined == "a:b:c"); + + s = {}; + joined = str::join(s, ':'); + TEST_CHECK(joined == ""); + + // longest_common_prefix_index() + v = {"/foo/bar", "/foo/baz", "/foo/qux"}; + TEST_CHECK(str::longest_common_prefix_index(v) == 5); + + v = {"/foo/bar"}; + TEST_CHECK(str::longest_common_prefix_index(v) == std::string::npos); + + v = {}; + TEST_CHECK(str::longest_common_prefix_index(v) == std::string::npos); + + v = {"abc", "xyz"}; + TEST_CHECK(str::longest_common_prefix_index(v) == 0); + + // longest_common_prefix() + v = {"/foo/bar", "/foo/baz"}; + TEST_CHECK(str::longest_common_prefix(v) == "/foo/ba"); + + v = {"/foo/bar"}; // Single element - returns empty string (implementation detail) + TEST_CHECK(str::longest_common_prefix(v) == ""); + + v = {}; + TEST_CHECK(str::longest_common_prefix(v) == ""); + + // remove_common_prefix_and_join() + v = {"/foo/bar", "/foo/baz", "/foo/qux"}; + joined = str::remove_common_prefix_and_join(v, ':'); + TEST_CHECK(joined == "bar:baz:qux"); + + v = {}; + joined = str::remove_common_prefix_and_join(v, ':'); + TEST_CHECK(joined == ""); + + // startswith() - string_view + TEST_CHECK(str::startswith("hello world", "hello") == true); + TEST_CHECK(str::startswith("hello world", "world") == false); + TEST_CHECK(str::startswith("hello", "hello world") == false); + TEST_CHECK(str::startswith("", "") == true); + TEST_CHECK(str::startswith("hello", "") == true); + + // startswith() - char* + TEST_CHECK(str::startswith("hello world", "hello") == true); + TEST_CHECK(str::startswith("hello world", "world") == false); + + // endswith() + TEST_CHECK(str::endswith("hello world", "world") == true); + TEST_CHECK(str::endswith("hello world", "hello") == false); + TEST_CHECK(str::endswith("hello", "hello world") == false); + TEST_CHECK(str::endswith("", "") == true); + TEST_CHECK(str::endswith("hello", "") == true); + + // trim() + TEST_CHECK(str::trim(" hello ") == "hello"); + TEST_CHECK(str::trim("hello") == "hello"); + TEST_CHECK(str::trim(" ") == ""); + TEST_CHECK(str::trim("") == ""); + TEST_CHECK(str::trim("\t\nhello\r\n") == "hello"); + TEST_CHECK(str::trim(" hello world ") == "hello world"); + + // eq() + TEST_CHECK(str::eq("hello", "hello") == true); + TEST_CHECK(str::eq("hello", "world") == false); + TEST_CHECK(str::eq("", "") == true); + + // replace() + TEST_CHECK(str::replace("hello world", ' ', '_') == "hello_world"); + TEST_CHECK(str::replace("abc", 'x', 'y') == "abc"); + TEST_CHECK(str::replace("", 'x', 'y') == ""); + TEST_CHECK(str::replace("aaa", 'a', 'b') == "bbb"); + + // erase_fnmatches() + v = {"foo.txt", "bar.cpp", "baz.h", "test.txt"}; + std::vector patterns = {"*.txt"}; + str::erase_fnmatches(patterns, v); + TEST_CHECK(v.size() == 2); + // Order may vary, just check elements exist + bool has_bar_cpp = false; + bool has_baz_h = false; + for(const auto& s : v) { + if(s == "bar.cpp") has_bar_cpp = true; + if(s == "baz.h") has_baz_h = true; + } + TEST_CHECK(has_bar_cpp); + TEST_CHECK(has_baz_h); + + v = {"foo.txt", "bar.cpp"}; + patterns = {"*.txt", "*.cpp"}; + str::erase_fnmatches(patterns, v); + TEST_CHECK(v.size() == 0); + + v = {"foo.txt", "bar.cpp"}; + patterns = {}; + str::erase_fnmatches(patterns, v); + TEST_CHECK(v.size() == 2); } void