Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce custom hash table data structures. #3940

Merged
merged 23 commits into from
Jun 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .bazelrc
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ build:clang-tidy --aspects @bazel_clang_tidy//clang_tidy:clang_tidy.bzl%clang_ti
build:clang-tidy --output_groups=report
build:clang-tidy --@bazel_clang_tidy//:clang_tidy_config=//:clang_tidy_config

# This warning seems to incorrectly fire in this build configuration, despite
# not firing in our normal builds.
build:clang-tidy --copt=-Wno-unknown-pragmas

# Default to using a disk cache to minimize re-building LLVM and Clang which we
# try to avoid updating too frequently to minimize rebuild cost. The location
# here can be overridden in the user configuration where needed.
Expand Down
1 change: 1 addition & 0 deletions .codespell_ignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ createor
crossreference
falsy
forin
groupt
inout
parameteras
pullrequest
Expand Down
175 changes: 175 additions & 0 deletions common/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,14 @@ cc_binary(
],
)

cc_library(
name = "hashtable_key_context",
hdrs = ["hashtable_key_context.h"],
deps = [
":hashing",
],
)

cc_library(
name = "indirect_value",
hdrs = ["indirect_value.h"],
Expand Down Expand Up @@ -203,6 +211,53 @@ cc_library(
alwayslink = 1,
)

cc_library(
name = "map",
hdrs = ["map.h"],
deps = [
":check",
":hashtable_key_context",
":raw_hashtable",
"@llvm-project//llvm:Support",
],
)

cc_test(
name = "map_test",
srcs = ["map_test.cpp"],
deps = [
":map",
":raw_hashtable_test_helpers",
"//testing/base:gtest_main",
"//testing/base:test_raw_ostream",
"@googletest//:gtest",
],
)

cc_binary(
name = "map_benchmark",
testonly = 1,
srcs = ["map_benchmark.cpp"],
deps = [
":map",
":raw_hashtable_benchmark_helpers",
"@abseil-cpp//absl/container:flat_hash_map",
"@abseil-cpp//absl/random",
"@google_benchmark//:benchmark_main",
"@llvm-project//llvm:Support",
],
)

sh_test(
name = "map_benchmark_test",
# The benchmark allocates a large amount of memory.
size = "enormous",
# We configure the test to run quickly.
timeout = "short",
srcs = ["map_benchmark_test.sh"],
data = [":map_benchmark"],
)

cc_library(
name = "ostream",
hdrs = ["ostream.h"],
Expand All @@ -211,6 +266,126 @@ cc_library(
],
)

cc_library(
name = "raw_hashtable",
srcs = ["raw_hashtable.cpp"],
hdrs = ["raw_hashtable.h"],
deps = [
":check",
":hashing",
":hashtable_key_context",
":raw_hashtable_metadata_group",
"@llvm-project//llvm:Support",
],
)

cc_library(
name = "raw_hashtable_metadata_group",
srcs = ["raw_hashtable_metadata_group.cpp"],
hdrs = ["raw_hashtable_metadata_group.h"],
deps = [
":check",
"@llvm-project//llvm:Support",
],
)

cc_binary(
name = "raw_hashtable_metadata_group_benchmark",
testonly = 1,
srcs = ["raw_hashtable_metadata_group_benchmark.cpp"],
deps = [
":raw_hashtable_metadata_group",
"@abseil-cpp//absl/random",
"@google_benchmark//:benchmark_main",
"@llvm-project//llvm:Support",
],
)

sh_test(
name = "raw_hashtable_metadata_group_benchmark_test",
srcs = ["raw_hashtable_metadata_group_benchmark_test.sh"],
data = [":raw_hashtable_metadata_group_benchmark"],
)

cc_library(
name = "raw_hashtable_benchmark_helpers",
testonly = 1,
srcs = ["raw_hashtable_benchmark_helpers.cpp"],
hdrs = ["raw_hashtable_benchmark_helpers.h"],
copts = [
"-O2", # Always optimize to make testing benchmarks faster.
],
deps = [
":check",
":hashing",
":raw_hashtable",
":set",
"@abseil-cpp//absl/base:no_destructor",
"@abseil-cpp//absl/hash",
"@abseil-cpp//absl/random",
"@google_benchmark//:benchmark",
"@llvm-project//llvm:Support",
],
)

cc_library(
name = "raw_hashtable_test_helpers",
testonly = 1,
hdrs = ["raw_hashtable_test_helpers.h"],
deps = [
":check",
":hashing",
":hashtable_key_context",
":ostream",
],
)

cc_library(
name = "set",
hdrs = ["set.h"],
deps = [
":check",
":hashtable_key_context",
":raw_hashtable",
"@llvm-project//llvm:Support",
],
)

cc_test(
name = "set_test",
srcs = ["set_test.cpp"],
deps = [
":raw_hashtable_test_helpers",
":set",
"//testing/base:gtest_main",
"//testing/base:test_raw_ostream",
"@googletest//:gtest",
],
)

cc_binary(
name = "set_benchmark",
testonly = 1,
srcs = ["set_benchmark.cpp"],
deps = [
":raw_hashtable_benchmark_helpers",
":set",
"@abseil-cpp//absl/container:flat_hash_set",
"@google_benchmark//:benchmark_main",
"@llvm-project//llvm:Support",
],
)

sh_test(
name = "set_benchmark_test",
# The benchmark allocates a large amount of memory.
size = "enormous",
# We configure the test to run quickly.
timeout = "short",
srcs = ["set_benchmark_test.sh"],
data = [":set_benchmark"],
)

cc_library(
name = "string_helpers",
srcs = ["string_helpers.cpp"],
Expand Down
4 changes: 2 additions & 2 deletions common/hashing.h
Original file line number Diff line number Diff line change
Expand Up @@ -573,9 +573,9 @@ constexpr auto HashCode::ExtractIndex() -> ssize_t { return value_; }
template <int N>
constexpr auto HashCode::ExtractIndexAndTag() -> std::pair<ssize_t, uint32_t> {
static_assert(N >= 1);
static_assert(N <= 32);
static_assert(N < 32);
return {static_cast<ssize_t>(value_ >> N),
static_cast<uint32_t>(value_ & ((1U << (N + 1)) - 1))};
static_cast<uint32_t>(value_ & ((1U << N) - 1))};
}

// Building with `-DCARBON_MCA_MARKERS` will enable `llvm-mca` annotations in
Expand Down
6 changes: 6 additions & 0 deletions common/hashing_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,12 @@ TEST(HashingTest, HashCodeAPI) {
EXPECT_THAT(a.ExtractIndex(), Ne(b.ExtractIndex()));
EXPECT_THAT(a.ExtractIndex(), Ne(empty.ExtractIndex()));

// The tag shouldn't have bits set outside the range requested.
EXPECT_THAT(HashValue("a").ExtractIndexAndTag<1>().second & ~0b1, Eq(0));
EXPECT_THAT(HashValue("a").ExtractIndexAndTag<2>().second & ~0b11, Eq(0));
EXPECT_THAT(HashValue("a").ExtractIndexAndTag<3>().second & ~0b111, Eq(0));
EXPECT_THAT(HashValue("a").ExtractIndexAndTag<4>().second & ~0b1111, Eq(0));

// Note that the index produced with a tag may be different from the index
// alone!
EXPECT_THAT(HashValue("a").ExtractIndexAndTag<2>(),
Expand Down
85 changes: 85 additions & 0 deletions common/hashtable_key_context.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
// Exceptions. See /LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#ifndef CARBON_COMMON_HASHTABLE_KEY_CONTEXT_H_
#define CARBON_COMMON_HASHTABLE_KEY_CONTEXT_H_

#include "common/hashing.h"

namespace Carbon {

// Customizable context for keys in hashtables.
//
// This type or customizations matching its API are used with the data
// structures in `map.h` and `set.h`. By providing a custom version of the
// `KeyContext` type parameter to those data structures, users can provide
// either stateless or stateful customization of the two core hashtable key
// operations: hashing and comparison.
//
// The default for hashing uses Carbon's `hashing.h`. Customizations must still
// return a `HashCode` as defined there, and it needs to have the same core
// properties of hashes produced by the `hashing.h` infrastructure.
//
// The default for comparison is `operator==`. The `KeyEq` method is always
// called with a key *stored in the hashtable* as the second or "RHS" parameter.
// This is to allow simplifying the set of overloads needed for heterogeneous
// contexts: only the first, LHS, parameter needs to support different lookup
// key types.
//
// Custom KeyContext types should have the the same API as the default type.
// They can choose to use templates to support heterogeneous key types or not as
// appropriate. The default context can also be used as a base class with only
// one or the other APIs customized.
//
// An important consideration is how the key context is constructed. When the
// key context can be default constructed, hashtable APIs trafficking in keys
// will have overloads that provide a default constructed key context. When the
// context is *not* default constructible, every API that accepts a key will
// also require a context argument to be called, and that argument will be used
// throughout that operation. The intent is to allow callers to provide stateful
// contexts to each API where it would be needed, while managing that state
// outside the hashtable. Often the needed state is trivially part of the
// caller's existing state and needn't be stored separately.
//
// Example for a stateful, customized key context for interned strings:
// ```cpp
// class InternedStringIndexKeyContext {
// public:
// InternedStringIndexKeyContext(
// llvm::ArrayRef<llvm::StringRef> interned_strings)
// : interned_strings_(interned_strings) {}
//
// auto HashKey(llvm::StringRef s, uint64_t seed) const -> HashCode {
// return HashValue(s);
// }
// auto HashKey(int index_key, uint64_t seed) const -> HashCode {
// return HashKey(interned_strings_[index_key]);
// }
//
// auto KeyEq(llvm::StringRef lhs, int rhs_index) const -> bool {
// return lhs == interned_strings_[rhs_index];
// }
// auto KeyEq(int lhs_index, int rhs_index) const -> bool {
// return KeyEq(interned_strings_[lhs_index], rhs_index);
// }
//
// private:
// llvm::ArrayRef<llvm::StringRef> interned_strings_;
// };
// ```
struct DefaultKeyContext {
template <typename KeyT>
auto HashKey(const KeyT& key, uint64_t seed) const -> HashCode {
return HashValue(key, seed);
}

template <typename LHSKeyT, typename RHSKeyT>
auto KeyEq(const LHSKeyT& lhs_key, const RHSKeyT& rhs_key) const -> bool {
return lhs_key == rhs_key;
}
};

} // namespace Carbon

#endif // CARBON_COMMON_HASHTABLE_KEY_CONTEXT_H_
Loading
Loading