Skip to content

Commit 3c52d2d

Browse files
authored
Improve the performance of the formatter instability check job (#14471)
We should probably get rid of this entirely and subsume it's functionality in the normal ecosystem checks? I don't think we're using the black comparison tests anymore, but maybe someone wants it? There are a few major parts to this: 1. Making the formatter script idempotent, so it can be run repeatedly and is robust to changing commits 2. Reducing the overhead of the git operations, minimizing the data transfer 3. Parallelizing all the git operations by repository This reduces the setup time from 80s to 16s (locally). The initial motivation for idempotency was to include the repositories in the GitHub Actions cache. I'm not sure it's worth it yet — they're about 1GB and would consume our limited cache space. Regardless, it improves correctness for local invocations. The total runtime of the job is reduced from ~4m to ~3m. I also made some cosmetic changes to the output paths and such.
1 parent 942d6ee commit 3c52d2d

File tree

2 files changed

+76
-45
lines changed

2 files changed

+76
-45
lines changed

.github/workflows/ci.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -563,12 +563,12 @@ jobs:
563563
run: rustup show
564564
- name: "Cache rust"
565565
uses: Swatinem/rust-cache@v2
566-
- name: "Formatter progress"
566+
- name: "Run checks"
567567
run: scripts/formatter_ecosystem_checks.sh
568568
- name: "Github step summary"
569-
run: cat target/progress_projects_stats.txt > $GITHUB_STEP_SUMMARY
569+
run: cat target/formatter-ecosystem/stats.txt > $GITHUB_STEP_SUMMARY
570570
- name: "Remove checkouts from cache"
571-
run: rm -r target/progress_projects
571+
run: rm -r target/formatter-ecosystem
572572

573573
check-ruff-lsp:
574574
name: "test ruff-lsp"

scripts/formatter_ecosystem_checks.sh

+73-42
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
# errors.
99
#
1010
# This script will first clone a diverse set of (mostly) black formatted
11-
# repositories with fixed revisions to target/progress_projects. Each project
11+
# repositories with fixed revisions to target/formatter-ecosystem. Each project
1212
# gets formatted (without modifying the files on disk) to check how
1313
# similar our style is to black. It also catches common issues such as
1414
# unstable formatting, internal formatter errors and printing invalid syntax.
@@ -18,72 +18,103 @@
1818
set -e
1919

2020
target=$(git rev-parse --show-toplevel)/target
21-
dir="$target/progress_projects"
21+
dir="$target/formatter-ecosystem"
2222
mkdir -p "$dir"
2323

24+
# Perform an idempotent clone and checkout of a commit
25+
clone_commit() {
26+
local repo="$1"
27+
local name="$2"
28+
local ref="$3"
29+
30+
if [ -z "$repo" ] || [ -z "$name" ] || [ -z "$ref" ]; then
31+
echo "Usage: clone_commit <repo> <name> <ref>"
32+
return 1
33+
fi
34+
35+
local target="$dir/projects/$name"
36+
37+
if [ ! -d "$target/.git" ]; then
38+
echo "Cloning $repo to $name"
39+
# Perform a minimal clone, we only need a single commit
40+
git clone --filter=blob:none --depth=1 --no-tags --no-checkout --single-branch "$repo" "$target"
41+
fi
42+
43+
echo "Using $repo at $ref"
44+
git -C "$target" fetch --filter=blob:none --depth=1 --no-tags origin "$ref"
45+
git -C "$target" checkout -q "$ref"
46+
}
47+
2448
# small util library
25-
if [ ! -d "$dir/twine/.git" ]; then
26-
git clone --filter=tree:0 https://github.com/pypa/twine "$dir/twine"
27-
fi
28-
git -C "$dir/twine" checkout -q ae71822a3cb0478d0f6a0cccb65d6f8e6275ece5
49+
clone_commit \
50+
"https://github.com/pypa/twine" \
51+
"twine" \
52+
"ae71822a3cb0478d0f6a0cccb65d6f8e6275ece5" &
2953

3054
# web framework that implements a lot of magic
31-
if [ ! -d "$dir/django/.git" ]; then
32-
git clone --filter=tree:0 https://github.com/django/django "$dir/django"
33-
fi
34-
git -C "$dir/django" checkout -q ee5147cfd7de2add74a285537a8968ec074e70cd
55+
clone_commit \
56+
"https://github.com/django/django" \
57+
"django" \
58+
"ee5147cfd7de2add74a285537a8968ec074e70cd" &
3559

3660
# an ML project
37-
if [ ! -d "$dir/transformers/.git" ]; then
38-
git clone --filter=tree:0 https://github.com/huggingface/transformers "$dir/transformers"
39-
fi
40-
git -C "$dir/transformers" checkout -q ac5a0556f14dec503b064d5802da1092e0b558ea
61+
clone_commit \
62+
"https://github.com/huggingface/transformers" \
63+
"transformers" \
64+
"ac5a0556f14dec503b064d5802da1092e0b558ea" &
4165

4266
# type annotations
43-
if [ ! -d "$dir/typeshed/.git" ]; then
44-
git clone --filter=tree:0 https://github.com/python/typeshed "$dir/typeshed"
45-
fi
46-
git -C "$dir/typeshed" checkout -q d34ef50754de993d01630883dbcd1d27ba507143
67+
clone_commit \
68+
"https://github.com/python/typeshed" \
69+
"typeshed" \
70+
"d34ef50754de993d01630883dbcd1d27ba507143" &
4771

4872
# python 3.11, typing and 100% test coverage
49-
if [ ! -d "$dir/warehouse/.git" ]; then
50-
git clone --filter=tree:0 https://github.com/pypi/warehouse "$dir/warehouse"
51-
fi
52-
git -C "$dir/warehouse" checkout -q 5a4d2cadec641b5d6a6847d0127940e0f532f184
73+
clone_commit \
74+
"https://github.com/pypi/warehouse" \
75+
"warehouse" \
76+
"5a4d2cadec641b5d6a6847d0127940e0f532f184" &
5377

5478
# zulip, a django user
55-
if [ ! -d "$dir/zulip/.git" ]; then
56-
git clone --filter=tree:0 https://github.com/zulip/zulip "$dir/zulip"
57-
fi
58-
git -C "$dir/zulip" checkout -q ccddbba7a3074283ccaac3bde35fd32b19faf042
79+
clone_commit \
80+
"https://github.com/zulip/zulip" \
81+
"zulip" \
82+
"ccddbba7a3074283ccaac3bde35fd32b19faf042" &
5983

6084
# home-assistant, home automation with 1ok files
61-
if [ ! -d "$dir/home-assistant/.git" ]; then
62-
git clone --filter=tree:0 https://github.com/home-assistant/core "$dir/home-assistant"
63-
fi
64-
git -C "$dir/home-assistant" checkout -q 3601c531f400255d10b82529549e564fbe483a54
85+
clone_commit \
86+
"https://github.com/home-assistant/core" \
87+
"home-assistant" \
88+
"3601c531f400255d10b82529549e564fbe483a54" &
6589

6690
# poetry, a package manager that uses black preview style
67-
if [ ! -d "$dir/poetry/.git" ]; then
68-
git clone --filter=tree:0 https://github.com/python-poetry/poetry "$dir/poetry"
69-
fi
70-
git -C "$dir/poetry" checkout -q 36fedb59b8e655252168055b536ead591068e1e4
91+
clone_commit \
92+
"https://github.com/python-poetry/poetry" \
93+
"poetry" \
94+
"36fedb59b8e655252168055b536ead591068e1e4" &
7195

7296
# cpython itself
73-
if [ ! -d "$dir/cpython/.git" ]; then
74-
git clone --filter=tree:0 https://github.com/python/cpython "$dir/cpython"
75-
fi
76-
git -C "$dir/cpython" checkout -q 28aea5d07d163105b42acd81c1651397ef95ea57
97+
clone_commit \
98+
"https://github.com/python/cpython" \
99+
"cpython" \
100+
"28aea5d07d163105b42acd81c1651397ef95ea57" &
101+
102+
# wait for the concurrent clones to complete
103+
wait
77104

78105
# Uncomment if you want to update the hashes
79106
#for i in "$dir"/*/; do git -C "$i" switch main && git -C "$i" pull; done
80107
#for i in "$dir"/*/; do echo "# $(basename "$i") $(git -C "$i" rev-parse HEAD)"; done
81108

82109
time cargo run --bin ruff_dev -- format-dev --stability-check \
83-
--error-file "$target/progress_projects_errors.txt" --log-file "$target/progress_projects_log.txt" --stats-file "$target/progress_projects_stats.txt" \
84-
--files-with-errors 3 --multi-project "$dir" || (
110+
--error-file "$dir/errors.txt" \
111+
--log-file "$dir/log.txt" \
112+
--stats-file "$dir/stats.txt" \
113+
--files-with-errors 3 --multi-project "$dir/projects" \
114+
|| (
85115
echo "Ecosystem check failed"
86-
cat "$target/progress_projects_log.txt"
116+
cat "$dir/log.txt"
87117
exit 1
88118
)
89-
cat "$target/progress_projects_stats.txt"
119+
120+
cat "$dir/stats.txt"

0 commit comments

Comments
 (0)