Skip to content

Commit

Permalink
Add memory sanitizer to absl::Cord
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 504555535
Change-Id: Id40484e9f52c87e9d67def2735ee60481ca50526
  • Loading branch information
martijnvels authored and copybara-github committed Jan 25, 2023
1 parent b0a2b10 commit f1d1657
Show file tree
Hide file tree
Showing 4 changed files with 229 additions and 30 deletions.
1 change: 1 addition & 0 deletions absl/strings/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,7 @@ cc_library(
"//absl/base:raw_logging_internal",
"//absl/base:throw_delegate",
"//absl/container:compressed_tuple",
"//absl/container:container_memory",
"//absl/container:inlined_vector",
"//absl/container:layout",
"//absl/crc:crc_cord_state",
Expand Down
1 change: 1 addition & 0 deletions absl/strings/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -604,6 +604,7 @@ absl_cc_library(
absl::base_internal
absl::compressed_tuple
absl::config
absl::container_memory
absl::core_headers
absl::crc_cord_state
absl::endian
Expand Down
66 changes: 66 additions & 0 deletions absl/strings/cord_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3057,3 +3057,69 @@ TEST_P(CordTest, ChecksummedEmptyCord) {
EXPECT_EQ(absl::HashOf(c3), absl::HashOf(absl::Cord()));
EXPECT_EQ(absl::HashOf(c3), absl::HashOf(absl::string_view()));
}

#if defined(GTEST_HAS_DEATH_TEST) && defined(ABSL_INTERNAL_CORD_HAVE_SANITIZER)

// Returns an expected poison / uninitialized death message expression.
const char* MASanDeathExpr() {
return "(use-after-poison|use-of-uninitialized-value)";
}

TEST(CordSanitizerTest, SanitizesEmptyCord) {
absl::Cord cord;
const char* data = cord.Flatten().data();
EXPECT_DEATH(EXPECT_EQ(data[0], 0), MASanDeathExpr());
}

TEST(CordSanitizerTest, SanitizesSmallCord) {
absl::Cord cord("Hello");
const char* data = cord.Flatten().data();
EXPECT_DEATH(EXPECT_EQ(data[5], 0), MASanDeathExpr());
}

TEST(CordSanitizerTest, SanitizesCordOnSetSSOValue) {
absl::Cord cord("String that is too big to be an SSO value");
cord = "Hello";
const char* data = cord.Flatten().data();
EXPECT_DEATH(EXPECT_EQ(data[5], 0), MASanDeathExpr());
}

TEST(CordSanitizerTest, SanitizesCordOnCopyCtor) {
absl::Cord src("hello");
absl::Cord dst(src);
const char* data = dst.Flatten().data();
EXPECT_DEATH(EXPECT_EQ(data[5], 0), MASanDeathExpr());
}

TEST(CordSanitizerTest, SanitizesCordOnMoveCtor) {
absl::Cord src("hello");
absl::Cord dst(std::move(src));
const char* data = dst.Flatten().data();
EXPECT_DEATH(EXPECT_EQ(data[5], 0), MASanDeathExpr());
}

TEST(CordSanitizerTest, SanitizesCordOnAssign) {
absl::Cord src("hello");
absl::Cord dst;
dst = src;
const char* data = dst.Flatten().data();
EXPECT_DEATH(EXPECT_EQ(data[5], 0), MASanDeathExpr());
}

TEST(CordSanitizerTest, SanitizesCordOnMoveAssign) {
absl::Cord src("hello");
absl::Cord dst;
dst = std::move(src);
const char* data = dst.Flatten().data();
EXPECT_DEATH(EXPECT_EQ(data[5], 0), MASanDeathExpr());
}

TEST(CordSanitizerTest, SanitizesCordOnSsoAssign) {
absl::Cord src("hello");
absl::Cord dst("String that is too big to be an SSO value");
dst = src;
const char* data = dst.Flatten().data();
EXPECT_DEATH(EXPECT_EQ(data[5], 0), MASanDeathExpr());
}

#endif // GTEST_HAS_DEATH_TEST && ABSL_INTERNAL_CORD_HAVE_SANITIZER
191 changes: 161 additions & 30 deletions absl/strings/internal/cord_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,20 @@
#include "absl/base/internal/invoke.h"
#include "absl/base/optimization.h"
#include "absl/container/internal/compressed_tuple.h"
#include "absl/container/internal/container_memory.h"
#include "absl/meta/type_traits.h"
#include "absl/strings/string_view.h"

// We can only add poisoning if we can detect consteval executions.
#if defined(ABSL_HAVE_CONSTANT_EVALUATED) && \
(defined(ABSL_HAVE_ADDRESS_SANITIZER) || \
defined(ABSL_HAVE_MEMORY_SANITIZER))
#define ABSL_INTERNAL_CORD_HAVE_SANITIZER 1
#endif

#define ABSL_CORD_INTERNAL_NO_SANITIZE \
ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY

namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
Expand Down Expand Up @@ -267,7 +278,7 @@ struct CordRep {
// The following three fields have to be less than 32 bytes since
// that is the smallest supported flat node size. Some code optimizations rely
// on the specific layout of these fields. Notably: the non-trivial field
// `refcount` being preceeded by `length`, and being tailed by POD data
// `refcount` being preceded by `length`, and being tailed by POD data
// members only.
// # LINT.IfChange
size_t length;
Expand Down Expand Up @@ -506,26 +517,57 @@ class InlineData {
// is actively inspected and used by gdb pretty printing code.
static constexpr size_t kTagOffset = 0;

constexpr InlineData() = default;
explicit InlineData(DefaultInitType) : rep_(kDefaultInit) {}
explicit InlineData(CordRep* rep) : rep_(rep) {}
// Implement `~InlineData()` conditionally: we only need this destructor to
// unpoison poisoned instances under *SAN, and it will only compile correctly
// if the current compiler supports `absl::is_constant_evaluated()`.
#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
~InlineData() noexcept { unpoison(); }
#endif

constexpr InlineData() noexcept { poison_this(); }

explicit InlineData(DefaultInitType) noexcept : rep_(kDefaultInit) {
poison_this();
}

explicit InlineData(CordRep* rep) noexcept : rep_(rep) {
ABSL_ASSERT(rep != nullptr);
}

// Explicit constexpr constructor to create a constexpr InlineData
// value. Creates an inlined SSO value if `rep` is null, otherwise
// creates a tree instance value.
constexpr InlineData(absl::string_view sv, CordRep* rep)
: rep_(rep ? Rep(rep) : Rep(sv)) {}
constexpr InlineData(absl::string_view sv, CordRep* rep) noexcept
: rep_(rep ? Rep(rep) : Rep(sv)) {
poison();
}

constexpr InlineData(const InlineData& rhs) = default;
InlineData& operator=(const InlineData& rhs) = default;
constexpr InlineData(const InlineData& rhs) noexcept;
InlineData& operator=(const InlineData& rhs) noexcept;

friend bool operator==(const InlineData& lhs, const InlineData& rhs) {
#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
const Rep l = lhs.rep_.SanitizerSafeCopy();
const Rep r = rhs.rep_.SanitizerSafeCopy();
return memcmp(&l, &r, sizeof(l)) == 0;
#else
return memcmp(&lhs, &rhs, sizeof(lhs)) == 0;
#endif
}
friend bool operator!=(const InlineData& lhs, const InlineData& rhs) {
return !operator==(lhs, rhs);
}

// Poisons the unused inlined SSO data if the current instance
// is inlined, else un-poisons the entire instance.
constexpr void poison();

// Un-poisons this instance.
constexpr void unpoison();

// Poisons the current instance. This is used on default initialization.
constexpr void poison_this();

// Returns true if the current instance is empty.
// The 'empty value' is an inlined data value of zero length.
bool is_empty() const { return rep_.tag() == 0; }
Expand Down Expand Up @@ -610,18 +652,23 @@ class InlineData {

void set_inline_data(const char* data, size_t n) {
ABSL_ASSERT(n <= kMaxInline);
unpoison();
rep_.set_tag(static_cast<int8_t>(n << 1));
SmallMemmove<true>(rep_.as_chars(), data, n);
poison();
}

void copy_max_inline_to(char* dst) const {
assert(!is_tree());
memcpy(dst, as_chars(), kMaxInline);
memcpy(dst, rep_.SanitizerSafeCopy().as_chars(), kMaxInline);
}

// Initialize this instance to holding the tree value `rep`,
// initializing the cordz_info to null, i.e.: 'not profiled'.
void make_tree(CordRep* rep) { rep_.make_tree(rep); }
void make_tree(CordRep* rep) {
unpoison();
rep_.make_tree(rep);
}

// Set the tree value of this instance to 'rep`.
// Requires the current instance to already hold a tree value.
Expand All @@ -633,17 +680,15 @@ class InlineData {

// Returns the size of the inlined character data inside this instance.
// Requires the current instance to hold inline data.
size_t inline_size() const {
assert(!is_tree());
return static_cast<size_t>(rep_.tag()) >> 1;
}
size_t inline_size() const { return rep_.inline_size(); }

// Sets the size of the inlined character data inside this instance.
// Requires `size` to be <= kMaxInline.
// See the documentation on 'as_chars()' for more information and examples.
void set_inline_size(size_t size) {
ABSL_ASSERT(size <= kMaxInline);
rep_.set_tag(static_cast<int8_t>(size << 1));
unpoison();
rep_.set_inline_size(size);
poison();
}

// Compares 'this' inlined data with rhs. The comparison is a straightforward
Expand All @@ -653,20 +698,7 @@ class InlineData {
// 0 the InlineData instances are equal
// 1 'this' InlineData instance larger
int Compare(const InlineData& rhs) const {
uint64_t x, y;
memcpy(&x, as_chars(), sizeof(x));
memcpy(&y, rhs.as_chars(), sizeof(y));
if (x == y) {
memcpy(&x, as_chars() + 7, sizeof(x));
memcpy(&y, rhs.as_chars() + 7, sizeof(y));
if (x == y) {
if (inline_size() == rhs.inline_size()) return 0;
return inline_size() < rhs.inline_size() ? -1 : 1;
}
}
x = absl::big_endian::FromHost64(x);
y = absl::big_endian::FromHost64(y);
return x < y ? -1 : 1;
return Compare(rep_.SanitizerSafeCopy(), rhs.rep_.SanitizerSafeCopy());
}

private:
Expand Down Expand Up @@ -704,12 +736,26 @@ class InlineData {
GetOrNull(chars, 13),
GetOrNull(chars, 14)} {}

// Disable sanitizer as we must always be able to read `tag`.
ABSL_CORD_INTERNAL_NO_SANITIZE
int8_t tag() const { return reinterpret_cast<const int8_t*>(this)[0]; }
void set_tag(int8_t rhs) { reinterpret_cast<int8_t*>(this)[0] = rhs; }

char* as_chars() { return data + 1; }
const char* as_chars() const { return data + 1; }

bool is_tree() const { return (tag() & 1) != 0; }

size_t inline_size() const {
ABSL_ASSERT(!is_tree());
return static_cast<size_t>(tag()) >> 1;
}

void set_inline_size(size_t size) {
ABSL_ASSERT(size <= kMaxInline);
set_tag(static_cast<int8_t>(size << 1));
}

CordRep* tree() const { return as_tree.rep; }
void set_tree(CordRep* rhs) { as_tree.rep = rhs; }

Expand All @@ -721,6 +767,21 @@ class InlineData {
as_tree.cordz_info = kNullCordzInfo;
}

#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
Rep SanitizerSafeCopy() const {
Rep res;
if (is_tree()) {
res = *this;
} else {
res.set_tag(tag());
memcpy(res.as_chars(), as_chars(), inline_size());
}
return res;
}
#else
const Rep& SanitizerSafeCopy() const { return *this; }
#endif

// If the data has length <= kMaxInline, we store it in `data`, and
// store the size in the first char of `data` shifted left + 1.
// Else we store it in a tree and store a pointer to that tree in
Expand All @@ -731,11 +792,81 @@ class InlineData {
};
};

// Private implementation of `Compare()`
static inline int Compare(const Rep& lhs, const Rep& rhs) {
uint64_t x, y;
memcpy(&x, lhs.as_chars(), sizeof(x));
memcpy(&y, rhs.as_chars(), sizeof(y));
if (x == y) {
memcpy(&x, lhs.as_chars() + 7, sizeof(x));
memcpy(&y, rhs.as_chars() + 7, sizeof(y));
if (x == y) {
if (lhs.inline_size() == rhs.inline_size()) return 0;
return lhs.inline_size() < rhs.inline_size() ? -1 : 1;
}
}
x = absl::big_endian::FromHost64(x);
y = absl::big_endian::FromHost64(y);
return x < y ? -1 : 1;
}

Rep rep_;
};

static_assert(sizeof(InlineData) == kMaxInline + 1, "");

#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER

constexpr InlineData::InlineData(const InlineData& rhs) noexcept
: rep_(rhs.rep_.SanitizerSafeCopy()) {
poison();
}

inline InlineData& InlineData::operator=(const InlineData& rhs) noexcept {
unpoison();
rep_ = rhs.rep_.SanitizerSafeCopy();
poison();
return *this;
}

constexpr void InlineData::poison_this() {
if (!absl::is_constant_evaluated()) {
container_internal::SanitizerPoisonObject(this);
}
}

constexpr void InlineData::unpoison() {
if (!absl::is_constant_evaluated()) {
container_internal::SanitizerUnpoisonObject(this);
}
}

constexpr void InlineData::poison() {
if (!absl::is_constant_evaluated()) {
if (is_tree()) {
container_internal::SanitizerUnpoisonObject(this);
} else if (const size_t size = inline_size()) {
if (size < kMaxInline) {
const char* end = rep_.as_chars() + size;
container_internal::SanitizerPoisonMemoryRegion(end, kMaxInline - size);
}
} else {
container_internal::SanitizerPoisonObject(this);
}
}
}

#else // ABSL_INTERNAL_CORD_HAVE_SANITIZER

constexpr InlineData::InlineData(const InlineData&) noexcept = default;
inline InlineData& InlineData::operator=(const InlineData&) noexcept = default;

constexpr void InlineData::poison_this() {}
constexpr void InlineData::unpoison() {}
constexpr void InlineData::poison() {}

#endif // ABSL_INTERNAL_CORD_HAVE_SANITIZER

inline CordRepSubstring* CordRep::substring() {
assert(IsSubstring());
return static_cast<CordRepSubstring*>(this);
Expand Down

0 comments on commit f1d1657

Please sign in to comment.