Skip to content

Commit

Permalink
Avoid unnecessary copying when upper-casing or lower-casing ASCII str…
Browse files Browse the repository at this point in the history
…ing_view

PiperOrigin-RevId: 655151660
Change-Id: I1aeb8eaeb3892eebcd31f28c646677dc82a267af
  • Loading branch information
Abseil Team authored and copybara-github committed Jul 23, 2024
1 parent 58df17f commit 5ea745c
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 16 deletions.
37 changes: 25 additions & 12 deletions absl/strings/ascii.cc
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ constexpr bool AsciiInAZRange(unsigned char c) {
// Force-inline so the compiler won't merge the short and long implementations.
template <bool ToUpper>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline constexpr void AsciiStrCaseFoldImpl(
absl::Nonnull<char*> p, size_t size) {
absl::Nonnull<char*> dst, absl::Nonnull<const char*> src, size_t size) {
// The upper- and lowercase versions of ASCII characters differ by only 1 bit.
// When we need to flip the case, we can xor with this bit to achieve the
// desired result. Note that the choice of 'a' and 'A' here is arbitrary. We
Expand All @@ -189,9 +189,9 @@ ABSL_ATTRIBUTE_ALWAYS_INLINE inline constexpr void AsciiStrCaseFoldImpl(
constexpr unsigned char kAsciiCaseBitFlip = 'a' ^ 'A';

for (size_t i = 0; i < size; ++i) {
unsigned char v = static_cast<unsigned char>(p[i]);
unsigned char v = static_cast<unsigned char>(src[i]);
v ^= AsciiInAZRange<ToUpper>(v) ? kAsciiCaseBitFlip : 0;
p[i] = static_cast<char>(v);
dst[i] = static_cast<char>(v);
}
}

Expand All @@ -201,17 +201,28 @@ constexpr size_t kCaseFoldThreshold = 16;
// No-inline so the compiler won't merge the short and long implementations.
template <bool ToUpper>
ABSL_ATTRIBUTE_NOINLINE constexpr void AsciiStrCaseFoldLong(
absl::Nonnull<char*> p, size_t size) {
absl::Nonnull<char*> dst, absl::Nonnull<const char*> src, size_t size) {
ABSL_ASSUME(size >= kCaseFoldThreshold);
AsciiStrCaseFoldImpl<ToUpper>(p, size);
AsciiStrCaseFoldImpl<ToUpper>(dst, src, size);
}

// Splitting to short and long strings to allow vectorization decisions
// to be made separately in the long and short cases.
template <bool ToUpper>
constexpr void AsciiStrCaseFold(absl::Nonnull<char*> p, size_t size) {
size < kCaseFoldThreshold ? AsciiStrCaseFoldImpl<ToUpper>(p, size)
: AsciiStrCaseFoldLong<ToUpper>(p, size);
constexpr void AsciiStrCaseFold(absl::Nonnull<char*> dst,
absl::Nonnull<const char*> src, size_t size) {
size < kCaseFoldThreshold ? AsciiStrCaseFoldImpl<ToUpper>(dst, src, size)
: AsciiStrCaseFoldLong<ToUpper>(dst, src, size);
}

void AsciiStrToLower(absl::Nonnull<char*> dst, absl::Nonnull<const char*> src,
size_t n) {
return AsciiStrCaseFold<false>(dst, src, n);
}

void AsciiStrToUpper(absl::Nonnull<char*> dst, absl::Nonnull<const char*> src,
size_t n) {
return AsciiStrCaseFold<true>(dst, src, n);
}

static constexpr size_t ValidateAsciiCasefold() {
Expand All @@ -222,8 +233,8 @@ static constexpr size_t ValidateAsciiCasefold() {
for (unsigned int i = 0; i < num_chars; ++i) {
uppered[i] = lowered[i] = static_cast<char>(i);
}
AsciiStrCaseFold<false>(&lowered[0], num_chars);
AsciiStrCaseFold<true>(&uppered[0], num_chars);
AsciiStrCaseFold<false>(&lowered[0], &lowered[0], num_chars);
AsciiStrCaseFold<true>(&uppered[0], &uppered[0], num_chars);
for (size_t i = 0; i < num_chars; ++i) {
const char ch = static_cast<char>(i),
ch_upper = ('a' <= ch && ch <= 'z' ? 'A' + (ch - 'a') : ch),
Expand All @@ -241,11 +252,13 @@ static_assert(ValidateAsciiCasefold() == 0, "error in case conversion");
} // namespace ascii_internal

void AsciiStrToLower(absl::Nonnull<std::string*> s) {
return ascii_internal::AsciiStrCaseFold<false>(&(*s)[0], s->size());
char* p = &(*s)[0];
return ascii_internal::AsciiStrCaseFold<false>(p, p, s->size());
}

void AsciiStrToUpper(absl::Nonnull<std::string*> s) {
return ascii_internal::AsciiStrCaseFold<true>(&(*s)[0], s->size());
char* p = &(*s)[0];
return ascii_internal::AsciiStrCaseFold<true>(p, p, s->size());
}

void RemoveExtraAsciiWhitespace(absl::Nonnull<std::string*> str) {
Expand Down
17 changes: 13 additions & 4 deletions absl/strings/ascii.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/base/nullability.h"
#include "absl/strings/internal/resize_uninitialized.h"
#include "absl/strings/string_view.h"

namespace absl {
Expand All @@ -74,6 +75,12 @@ ABSL_DLL extern const char kToUpper[256];
// Declaration for the array of characters to lower-case characters.
ABSL_DLL extern const char kToLower[256];

void AsciiStrToLower(absl::Nonnull<char*> dst, absl::Nonnull<const char*> src,
size_t n);

void AsciiStrToUpper(absl::Nonnull<char*> dst, absl::Nonnull<const char*> src,
size_t n);

} // namespace ascii_internal

// ascii_isalpha()
Expand Down Expand Up @@ -171,8 +178,9 @@ void AsciiStrToLower(absl::Nonnull<std::string*> s);

// Creates a lowercase string from a given absl::string_view.
ABSL_MUST_USE_RESULT inline std::string AsciiStrToLower(absl::string_view s) {
std::string result(s);
absl::AsciiStrToLower(&result);
std::string result;
strings_internal::STLStringResizeUninitialized(&result, s.size());
ascii_internal::AsciiStrToLower(&result[0], s.data(), s.size());
return result;
}

Expand All @@ -189,8 +197,9 @@ void AsciiStrToUpper(absl::Nonnull<std::string*> s);

// Creates an uppercase string from a given absl::string_view.
ABSL_MUST_USE_RESULT inline std::string AsciiStrToUpper(absl::string_view s) {
std::string result(s);
absl::AsciiStrToUpper(&result);
std::string result;
strings_internal::STLStringResizeUninitialized(&result, s.size());
ascii_internal::AsciiStrToUpper(&result[0], s.data(), s.size());
return result;
}

Expand Down

0 comments on commit 5ea745c

Please sign in to comment.