Skip to content

Commit 76f4fd8

Browse files
valyalaianlancetaylor
authored andcommitted
runtime: improve timers scalability on multi-CPU systems
Use per-P timers, so each P may work with its own timers. This CL improves performance on multi-CPU systems in the following cases: - When serving high number of concurrent connections with read/write deadlines set (for instance, highly loaded net/http server). - When using high number of concurrent timers. These timers may be implicitly created via context.WithDeadline or context.WithTimeout. Production servers should usually set timeout on connections and external requests in order to prevent from resource leakage. See https://blog.cloudflare.com/the-complete-guide-to-golang-net-http-timeouts/ Below are relevant benchmark results for various GOMAXPROCS values on linux/amd64: context package: name old time/op new time/op delta WithTimeout/concurrency=40 4.92µs ± 0% 5.17µs ± 1% +5.07% (p=0.000 n=9+9) WithTimeout/concurrency=4000 6.03µs ± 1% 6.49µs ± 0% +7.63% (p=0.000 n=8+10) WithTimeout/concurrency=400000 8.58µs ± 7% 9.02µs ± 4% +5.02% (p=0.019 n=10+10) name old time/op new time/op delta WithTimeout/concurrency=40-2 3.70µs ± 1% 2.78µs ± 4% -24.90% (p=0.000 n=8+9) WithTimeout/concurrency=4000-2 4.49µs ± 4% 3.67µs ± 5% -18.26% (p=0.000 n=10+10) WithTimeout/concurrency=400000-2 6.16µs ±10% 5.15µs ±13% -16.30% (p=0.000 n=10+10) name old time/op new time/op delta WithTimeout/concurrency=40-4 3.58µs ± 1% 2.64µs ± 2% -26.13% (p=0.000 n=9+10) WithTimeout/concurrency=4000-4 4.17µs ± 0% 3.32µs ± 1% -20.36% (p=0.000 n=10+10) WithTimeout/concurrency=400000-4 5.57µs ± 9% 4.83µs ±10% -13.27% (p=0.001 n=10+10) time package: name old time/op new time/op delta AfterFunc 6.15ms ± 3% 6.07ms ± 2% ~ (p=0.133 n=10+9) AfterFunc-2 3.43ms ± 1% 3.56ms ± 1% +3.91% (p=0.000 n=10+9) AfterFunc-4 5.04ms ± 2% 2.36ms ± 0% -53.20% (p=0.000 n=10+9) After 6.54ms ± 2% 6.49ms ± 3% ~ (p=0.393 n=10+10) After-2 3.68ms ± 1% 3.87ms ± 0% +5.14% (p=0.000 n=9+9) After-4 6.66ms ± 1% 2.87ms ± 1% -56.89% (p=0.000 n=10+10) Stop 698µs ± 2% 689µs ± 1% -1.26% (p=0.011 n=10+10) Stop-2 729µs ± 2% 434µs ± 3% -40.49% (p=0.000 n=10+10) Stop-4 837µs ± 3% 333µs ± 2% -60.20% (p=0.000 n=10+10) SimultaneousAfterFunc 694µs ± 1% 692µs ± 7% ~ (p=0.481 n=10+10) SimultaneousAfterFunc-2 714µs ± 3% 569µs ± 2% -20.33% (p=0.000 n=10+10) SimultaneousAfterFunc-4 782µs ± 2% 386µs ± 2% -50.67% (p=0.000 n=10+10) StartStop 267µs ± 3% 274µs ± 0% +2.64% (p=0.000 n=8+9) StartStop-2 238µs ± 2% 140µs ± 3% -40.95% (p=0.000 n=10+8) StartStop-4 320µs ± 1% 125µs ± 1% -61.02% (p=0.000 n=9+9) Reset 75.0µs ± 1% 77.5µs ± 2% +3.38% (p=0.000 n=10+10) Reset-2 150µs ± 2% 40µs ± 5% -73.09% (p=0.000 n=10+9) Reset-4 226µs ± 1% 33µs ± 1% -85.42% (p=0.000 n=10+10) Sleep 857µs ± 6% 878µs ± 9% ~ (p=0.079 n=10+9) Sleep-2 617µs ± 4% 585µs ± 2% -5.21% (p=0.000 n=10+10) Sleep-4 689µs ± 3% 465µs ± 4% -32.53% (p=0.000 n=10+10) Ticker 55.9ms ± 2% 55.9ms ± 2% ~ (p=0.971 n=10+10) Ticker-2 28.7ms ± 2% 28.1ms ± 1% -2.06% (p=0.000 n=10+10) Ticker-4 14.6ms ± 0% 13.6ms ± 1% -6.80% (p=0.000 n=9+10) Fixes #15133 Change-Id: I6f4b09d2db8c5bec93146db6501b44dbfe5c0ac4 Reviewed-on: https://go-review.googlesource.com/34784 Reviewed-by: Austin Clements <[email protected]>
1 parent 7377d0c commit 76f4fd8

File tree

9 files changed

+285
-113
lines changed

9 files changed

+285
-113
lines changed

src/context/benchmark_test.go

+55-1
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,64 @@ package context_test
77
import (
88
. "context"
99
"fmt"
10+
"runtime"
11+
"sync"
1012
"testing"
13+
"time"
1114
)
1215

13-
func BenchmarkContextCancelTree(b *testing.B) {
16+
func BenchmarkWithTimeout(b *testing.B) {
17+
for concurrency := 40; concurrency <= 4e5; concurrency *= 100 {
18+
name := fmt.Sprintf("concurrency=%d", concurrency)
19+
b.Run(name, func(b *testing.B) {
20+
benchmarkWithTimeout(b, concurrency)
21+
})
22+
}
23+
}
24+
25+
func benchmarkWithTimeout(b *testing.B, concurrentContexts int) {
26+
gomaxprocs := runtime.GOMAXPROCS(0)
27+
perPContexts := concurrentContexts / gomaxprocs
28+
root := Background()
29+
30+
// Generate concurrent contexts.
31+
var wg sync.WaitGroup
32+
ccf := make([][]CancelFunc, gomaxprocs)
33+
for i := range ccf {
34+
wg.Add(1)
35+
go func(i int) {
36+
defer wg.Done()
37+
cf := make([]CancelFunc, perPContexts)
38+
for j := range cf {
39+
_, cf[j] = WithTimeout(root, time.Hour)
40+
}
41+
ccf[i] = cf
42+
}(i)
43+
}
44+
wg.Wait()
45+
46+
b.ResetTimer()
47+
b.RunParallel(func(pb *testing.PB) {
48+
wcf := make([]CancelFunc, 10)
49+
for pb.Next() {
50+
for i := range wcf {
51+
_, wcf[i] = WithTimeout(root, time.Hour)
52+
}
53+
for _, f := range wcf {
54+
f()
55+
}
56+
}
57+
})
58+
b.StopTimer()
59+
60+
for _, cf := range ccf {
61+
for _, f := range cf {
62+
f()
63+
}
64+
}
65+
}
66+
67+
func BenchmarkCancelTree(b *testing.B) {
1468
depths := []int{1, 10, 100, 1000}
1569
for _, d := range depths {
1670
b.Run(fmt.Sprintf("depth=%d", d), func(b *testing.B) {

src/internal/trace/parser.go

+4-3
Original file line numberDiff line numberDiff line change
@@ -270,8 +270,9 @@ func parseHeader(buf []byte) (int, error) {
270270
// It does analyze and verify per-event-type arguments.
271271
func parseEvents(ver int, rawEvents []rawEvent, strings map[uint64]string) (events []*Event, stacks map[uint64][]*Frame, err error) {
272272
var ticksPerSec, lastSeq, lastTs int64
273-
var lastG, timerGoid uint64
273+
var lastG uint64
274274
var lastP int
275+
timerGoids := make(map[uint64]bool)
275276
lastGs := make(map[int]uint64) // last goroutine running on P
276277
stacks = make(map[uint64][]*Frame)
277278
batches := make(map[int][]*Event) // events by P
@@ -308,7 +309,7 @@ func parseEvents(ver int, rawEvents []rawEvent, strings map[uint64]string) (even
308309
return
309310
}
310311
case EvTimerGoroutine:
311-
timerGoid = raw.args[0]
312+
timerGoids[raw.args[0]] = true
312313
case EvStack:
313314
if len(raw.args) < 2 {
314315
err = fmt.Errorf("EvStack has wrong number of arguments at offset 0x%x: want at least 2, got %v",
@@ -431,7 +432,7 @@ func parseEvents(ver int, rawEvents []rawEvent, strings map[uint64]string) (even
431432
for _, ev := range events {
432433
ev.Ts = int64(float64(ev.Ts-minTs) * freq)
433434
// Move timers and syscalls to separate fake Ps.
434-
if timerGoid != 0 && ev.G == timerGoid && ev.Type == EvGoUnblock {
435+
if timerGoids[ev.G] && ev.Type == EvGoUnblock {
435436
ev.P = TimerP
436437
}
437438
if ev.Type == EvGoSysExit {

src/runtime/proc.go

+4-8
Original file line numberDiff line numberDiff line change
@@ -3863,15 +3863,11 @@ func sysmon() {
38633863
}
38643864
shouldRelax := true
38653865
if osRelaxMinNS > 0 {
3866-
lock(&timers.lock)
3867-
if timers.sleeping {
3868-
now := nanotime()
3869-
next := timers.sleepUntil
3870-
if next-now < osRelaxMinNS {
3871-
shouldRelax = false
3872-
}
3866+
next := timeSleepUntil()
3867+
now := nanotime()
3868+
if next-now < osRelaxMinNS {
3869+
shouldRelax = false
38733870
}
3874-
unlock(&timers.lock)
38753871
}
38763872
if shouldRelax {
38773873
osRelax(true)

0 commit comments

Comments
 (0)