From ef07f439793befb99d4f760fca1cbeb48fd04207 Mon Sep 17 00:00:00 2001 From: Colin Leach Date: Fri, 11 Oct 2024 16:04:39 -0700 Subject: [PATCH 1/4] WIP draft of `pairs-and-dicts` concept --- .../pairs-and-dicts/.meta/config.json | 7 ++ concepts.wip/pairs-and-dicts/about.md | 110 ++++++++++++++++++ concepts.wip/pairs-and-dicts/introduction.md | 1 + concepts.wip/pairs-and-dicts/links.json | 10 ++ 4 files changed, 128 insertions(+) create mode 100644 concepts.wip/pairs-and-dicts/.meta/config.json create mode 100644 concepts.wip/pairs-and-dicts/about.md create mode 100644 concepts.wip/pairs-and-dicts/introduction.md create mode 100644 concepts.wip/pairs-and-dicts/links.json diff --git a/concepts.wip/pairs-and-dicts/.meta/config.json b/concepts.wip/pairs-and-dicts/.meta/config.json new file mode 100644 index 00000000..a999f50e --- /dev/null +++ b/concepts.wip/pairs-and-dicts/.meta/config.json @@ -0,0 +1,7 @@ +{ + "authors": [ + "colinleach" + ], + "contributors": [], + "blurb": "A Dict is a collection of key => value Pairs, though the same Pair type has many other uses." +} diff --git a/concepts.wip/pairs-and-dicts/about.md b/concepts.wip/pairs-and-dicts/about.md new file mode 100644 index 00000000..9b13f206 --- /dev/null +++ b/concepts.wip/pairs-and-dicts/about.md @@ -0,0 +1,110 @@ +# About + +## Pairs + +A [`Pair`][pair] is just two items joined together. +The items are then imaginatively called `first` and `second`. + +Create them either with the `=>` operator or the `Pair()` constructor. + +```julia-repl +julia> p1 = "k" => 2 +"k" => 2 + +julia> p2 = Pair("k", 2) +"k" => 2 + +# Both forms of syntax give the same result +julia> p1 == p2 +true + +# Each component has its own separate type +julia> dump(p1) +Pair{String, Int64} + first: String "k" + second: Int64 2 + +# Get a component using dot syntax +julia> p1.first +"k" + +julia> p1.second +2 +``` + +## Dicts + +A `Vector` of Pairs is like any other array: ordered, homogeneous in type, and stored consecutively in memory. + +```julia-repl +julia> pv = ['a' => 1, 'b' => 2, 'c' => 3] +3-element Vector{Pair{Char, Int64}}: + 'a' => 1 + 'b' => 2 + 'c' => 3 + +# Each pair is a single entry +julia> length(pv) +3 +``` + +A [`Dict`][dict] is superficially similar, but storage is now implemented in a way that allows fast retrieval by key, even when the number of entries grows large. + +```julia-repl +julia> pd = Dict(pv) +Dict{Char, Int64} with 3 entries: + 'a' => 1 + 'c' => 3 + 'b' => 2 + +julia> pd['b'] +2 + +# Key must exist +julia> pd['d'] +ERROR: KeyError: key 'd' not found + +# Generators are accepted in the constructor (and note the unordered output) +julia> Dict(x => x^2 for x in 1:5) +julia> Dict(x => 1 / x for x in 1:5) +Dict{Int64, Float64} with 5 entries: + 5 => 0.2 + 4 => 0.25 + 2 => 0.5 + 3 => 0.333333 + 1 => 1.0 + ``` + +Entries can be added with a new key or overwritten with an existing key. + +```julia-repl +# Add +julia> pd['d'] = 4 +4 + +# Overwrite +julia> pd['a'] = 42 +42 + +julia> pd +Dict{Char, Int64} with 4 entries: + 'a' => 42 + 'c' => 3 + 'd' => 4 + 'b' => 2 +``` + +In other languages, something very similar to a `Dict` might be called a dictionary (Python), a Hash (Ruby) or a HashMap (Java). + +For Pairs, whether in isolation or in a Vector, there are few constraints on the type of each component. + +To be valid in a `Dict`, the `Pair` must be a `key => value` pair, where the `key` is "hashable". +Most importantly, this means the `key` must be _immutable_, so `Char`, `Int`, `String`, `Symbol`, and `Tuple` are all fine, but `Vector` is not allowed. + +If mutable keys are important to you, there is a separate but much less common [`IdDict`][iddict] type that can allow this. +See the [manual][dict] for several other variants on the `Dict` type. + + +[pair]: https://docs.julialang.org/en/v1/base/collections/#Core.Pair +[dict]: https://docs.julialang.org/en/v1/base/collections/#Dictionaries +[iddict]: https://docs.julialang.org/en/v1/base/collections/#Base.IdDict diff --git a/concepts.wip/pairs-and-dicts/introduction.md b/concepts.wip/pairs-and-dicts/introduction.md new file mode 100644 index 00000000..e10b99d0 --- /dev/null +++ b/concepts.wip/pairs-and-dicts/introduction.md @@ -0,0 +1 @@ +# Introduction diff --git a/concepts.wip/pairs-and-dicts/links.json b/concepts.wip/pairs-and-dicts/links.json new file mode 100644 index 00000000..52f5a99e --- /dev/null +++ b/concepts.wip/pairs-and-dicts/links.json @@ -0,0 +1,10 @@ +[ + { + "url": "https://docs.julialang.org/en/v1/base/collections/#Core.Pair", + "description": "Pair section in the manual." + }, + { + "url": "https://docs.julialang.org/en/v1/base/collections/#Dictionaries", + "description": "" + } +] From d8f9c2c823627022dffa09d79fc5311964cad224 Mon Sep 17 00:00:00 2001 From: Colin Leach Date: Fri, 11 Oct 2024 16:27:57 -0700 Subject: [PATCH 2/4] expanded info on modifying and searching --- concepts.wip/pairs-and-dicts/about.md | 49 +++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/concepts.wip/pairs-and-dicts/about.md b/concepts.wip/pairs-and-dicts/about.md index 9b13f206..d4c949e8 100644 --- a/concepts.wip/pairs-and-dicts/about.md +++ b/concepts.wip/pairs-and-dicts/about.md @@ -75,6 +75,18 @@ Dict{Int64, Float64} with 5 entries: 1 => 1.0 ``` +In other languages, something very similar to a `Dict` might be called a dictionary (Python), a Hash (Ruby) or a HashMap (Java). + +For Pairs, whether in isolation or in a Vector, there are few constraints on the type of each component. + +To be valid in a `Dict`, the `Pair` must be a `key => value` pair, where the `key` is "hashable". +Most importantly, this means the `key` must be _immutable_, so `Char`, `Int`, `String`, `Symbol`, and `Tuple` are all fine, but `Vector` is not allowed. + +If mutable keys are important to you, there is a separate but much less common [`IdDict`][iddict] type that can allow this. +See the [manual][dict] for several other variants on the `Dict` type. + +### Modifying a Dict + Entries can be added with a new key or overwritten with an existing key. ```julia-repl @@ -94,15 +106,40 @@ Dict{Char, Int64} with 4 entries: 'b' => 2 ``` -In other languages, something very similar to a `Dict` might be called a dictionary (Python), a Hash (Ruby) or a HashMap (Java). +To remove an entry, use the `delete!()` function, which will change the Dict if the key exists and silently do nothing otherwise. -For Pairs, whether in isolation or in a Vector, there are few constraints on the type of each component. +```julia-repl +julia> delete!(pd, 'd') +Dict{Char, Int64} with 3 entries: + 'a' => 42 + 'c' => 3 + 'b' => 2 +``` -To be valid in a `Dict`, the `Pair` must be a `key => value` pair, where the `key` is "hashable". -Most importantly, this means the `key` must be _immutable_, so `Char`, `Int`, `String`, `Symbol`, and `Tuple` are all fine, but `Vector` is not allowed. +### Checking if a key or value exists -If mutable keys are important to you, there is a separate but much less common [`IdDict`][iddict] type that can allow this. -See the [manual][dict] for several other variants on the `Dict` type. +There are different approaches. +To check a key, there is a `haskey()` function: + +```julia-repl +julia> haskey(pd, 'b') +true +``` + +Alternatively, search either the keys or the values: + +```julia-repl +julia> 'b' in keys(pd) +true + +julia> 43 in values(pd) +false + +julia> 42 ∈ values(pd) +true +``` + +This remains efficient at scale, as the `keys()` and `values()` functions each return an iterator with a fast search algorithm. [pair]: https://docs.julialang.org/en/v1/base/collections/#Core.Pair From 40ddebb8b5c63c8199e9a3560b636cd29ba5b755 Mon Sep 17 00:00:00 2001 From: colinleach Date: Fri, 11 Oct 2024 19:14:36 -0500 Subject: [PATCH 3/4] Update about.md --- concepts.wip/pairs-and-dicts/about.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/concepts.wip/pairs-and-dicts/about.md b/concepts.wip/pairs-and-dicts/about.md index d4c949e8..2598e6e7 100644 --- a/concepts.wip/pairs-and-dicts/about.md +++ b/concepts.wip/pairs-and-dicts/about.md @@ -48,7 +48,7 @@ julia> length(pv) 3 ``` -A [`Dict`][dict] is superficially similar, but storage is now implemented in a way that allows fast retrieval by key, even when the number of entries grows large. +A [`Dict`][dict] is superficially similar, but storage is now implemented in a way that allows fast retrieval by key, known as a "hash table", even when the number of entries grows large. ```julia-repl julia> pd = Dict(pv) From 646ae32d333bd01269f485f797cebf7b0f983b33 Mon Sep 17 00:00:00 2001 From: colinleach Date: Sat, 12 Oct 2024 14:43:22 -0500 Subject: [PATCH 4/4] Update links.json Oops, I forgot to add some description text. --- concepts.wip/pairs-and-dicts/links.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/concepts.wip/pairs-and-dicts/links.json b/concepts.wip/pairs-and-dicts/links.json index 52f5a99e..6487b479 100644 --- a/concepts.wip/pairs-and-dicts/links.json +++ b/concepts.wip/pairs-and-dicts/links.json @@ -5,6 +5,6 @@ }, { "url": "https://docs.julialang.org/en/v1/base/collections/#Dictionaries", - "description": "" + "description": "Introduction to dictionaries in the manual." } ]