Skip to content

Commit 155d34b

Browse files
[red-knot] Infer precise types for len() calls (#14599)
## Summary Resolves #14598. ## Test Plan Markdown tests. --------- Co-authored-by: Carl Meyer <[email protected]>
1 parent 04c887c commit 155d34b

File tree

4 files changed

+337
-13
lines changed

4 files changed

+337
-13
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
# Length (`len()`)
2+
3+
## Literal and constructed iterables
4+
5+
### Strings and bytes literals
6+
7+
```py
8+
reveal_type(len("no\rmal")) # revealed: Literal[6]
9+
reveal_type(len(r"aw stri\ng")) # revealed: Literal[10]
10+
reveal_type(len(r"conca\t" "ena\tion")) # revealed: Literal[14]
11+
reveal_type(len(b"ytes lite" rb"al")) # revealed: Literal[11]
12+
reveal_type(len("𝒰𝕹🄸©🕲𝕕ℇ")) # revealed: Literal[7]
13+
14+
reveal_type( # revealed: Literal[7]
15+
len(
16+
"""foo
17+
bar"""
18+
)
19+
)
20+
reveal_type( # revealed: Literal[9]
21+
len(
22+
r"""foo\r
23+
bar"""
24+
)
25+
)
26+
reveal_type( # revealed: Literal[7]
27+
len(
28+
b"""foo
29+
bar"""
30+
)
31+
)
32+
reveal_type( # revealed: Literal[9]
33+
len(
34+
rb"""foo\r
35+
bar"""
36+
)
37+
)
38+
```
39+
40+
### Tuples
41+
42+
```py
43+
reveal_type(len(())) # revealed: Literal[0]
44+
reveal_type(len((1,))) # revealed: Literal[1]
45+
reveal_type(len((1, 2))) # revealed: Literal[2]
46+
47+
# TODO: Handle constructor calls
48+
reveal_type(len(tuple())) # revealed: int
49+
50+
# TODO: Handle star unpacks; Should be: Literal[0]
51+
reveal_type(len((*[],))) # revealed: Literal[1]
52+
53+
# TODO: Handle star unpacks; Should be: Literal[1]
54+
reveal_type( # revealed: Literal[2]
55+
len(
56+
(
57+
*[],
58+
1,
59+
)
60+
)
61+
)
62+
63+
# TODO: Handle star unpacks; Should be: Literal[2]
64+
reveal_type(len((*[], 1, 2))) # revealed: Literal[3]
65+
66+
# TODO: Handle star unpacks; Should be: Literal[0]
67+
reveal_type(len((*[], *{}))) # revealed: Literal[2]
68+
```
69+
70+
### Lists, sets and dictionaries
71+
72+
```py
73+
reveal_type(len([])) # revealed: int
74+
reveal_type(len([1])) # revealed: int
75+
reveal_type(len([1, 2])) # revealed: int
76+
reveal_type(len([*{}, *dict()])) # revealed: int
77+
78+
reveal_type(len({})) # revealed: int
79+
reveal_type(len({**{}})) # revealed: int
80+
reveal_type(len({**{}, **{}})) # revealed: int
81+
82+
reveal_type(len({1})) # revealed: int
83+
reveal_type(len({1, 2})) # revealed: int
84+
reveal_type(len({*[], 2})) # revealed: int
85+
86+
reveal_type(len(list())) # revealed: int
87+
reveal_type(len(set())) # revealed: int
88+
reveal_type(len(dict())) # revealed: int
89+
reveal_type(len(frozenset())) # revealed: int
90+
```
91+
92+
## `__len__`
93+
94+
The returned value of `__len__` is implicitly and recursively converted to `int`.
95+
96+
### Literal integers
97+
98+
```py
99+
from typing import Literal
100+
101+
class Zero:
102+
def __len__(self) -> Literal[0]: ...
103+
104+
class ZeroOrOne:
105+
def __len__(self) -> Literal[0, 1]: ...
106+
107+
class ZeroOrTrue:
108+
def __len__(self) -> Literal[0, True]: ...
109+
110+
class OneOrFalse:
111+
def __len__(self) -> Literal[1] | Literal[False]: ...
112+
113+
class OneOrFoo:
114+
def __len__(self) -> Literal[1, "foo"]: ...
115+
116+
class ZeroOrStr:
117+
def __len__(self) -> Literal[0] | str: ...
118+
119+
reveal_type(len(Zero())) # revealed: Literal[0]
120+
reveal_type(len(ZeroOrOne())) # revealed: Literal[0, 1]
121+
reveal_type(len(ZeroOrTrue())) # revealed: Literal[0, 1]
122+
reveal_type(len(OneOrFalse())) # revealed: Literal[0, 1]
123+
124+
# TODO: Emit a diagnostic
125+
reveal_type(len(OneOrFoo())) # revealed: int
126+
127+
# TODO: Emit a diagnostic
128+
reveal_type(len(ZeroOrStr())) # revealed: int
129+
```
130+
131+
### Literal booleans
132+
133+
```py
134+
from typing import Literal
135+
136+
class LiteralTrue:
137+
def __len__(self) -> Literal[True]: ...
138+
139+
class LiteralFalse:
140+
def __len__(self) -> Literal[False]: ...
141+
142+
reveal_type(len(LiteralTrue())) # revealed: Literal[1]
143+
reveal_type(len(LiteralFalse())) # revealed: Literal[0]
144+
```
145+
146+
### Enums
147+
148+
```py
149+
from enum import Enum, auto
150+
from typing import Literal
151+
152+
class SomeEnum(Enum):
153+
AUTO = auto()
154+
INT = 2
155+
STR = "4"
156+
TUPLE = (8, "16")
157+
INT_2 = 3_2
158+
159+
class Auto:
160+
def __len__(self) -> Literal[SomeEnum.AUTO]: ...
161+
162+
class Int:
163+
def __len__(self) -> Literal[SomeEnum.INT]: ...
164+
165+
class Str:
166+
def __len__(self) -> Literal[SomeEnum.STR]: ...
167+
168+
class Tuple:
169+
def __len__(self) -> Literal[SomeEnum.TUPLE]: ...
170+
171+
class IntUnion:
172+
def __len__(self) -> Literal[SomeEnum.INT, SomeEnum.INT_2]: ...
173+
174+
reveal_type(len(Auto())) # revealed: int
175+
reveal_type(len(Int())) # revealed: Literal[2]
176+
reveal_type(len(Str())) # revealed: int
177+
reveal_type(len(Tuple())) # revealed: int
178+
reveal_type(len(IntUnion())) # revealed: Literal[2, 32]
179+
```
180+
181+
### Negative integers
182+
183+
```py
184+
from typing import Literal
185+
186+
class Negative:
187+
def __len__(self) -> Literal[-1]: ...
188+
189+
# TODO: Emit a diagnostic
190+
reveal_type(len(Negative())) # revealed: int
191+
```
192+
193+
### Wrong signature
194+
195+
```py
196+
from typing import Literal
197+
198+
class SecondOptionalArgument:
199+
def __len__(self, v: int = 0) -> Literal[0]: ...
200+
201+
class SecondRequiredArgument:
202+
def __len__(self, v: int) -> Literal[1]: ...
203+
204+
# TODO: Emit a diagnostic
205+
reveal_type(len(SecondOptionalArgument())) # revealed: Literal[0]
206+
207+
# TODO: Emit a diagnostic
208+
reveal_type(len(SecondRequiredArgument())) # revealed: Literal[1]
209+
```
210+
211+
### No `__len__`
212+
213+
```py
214+
class NoDunderLen:
215+
pass
216+
217+
# TODO: Emit a diagnostic
218+
reveal_type(len(NoDunderLen())) # revealed: int
219+
```

crates/red_knot_python_semantic/resources/mdtest/unpacking.md

+39
Original file line numberDiff line numberDiff line change
@@ -267,3 +267,42 @@ reveal_type(b) # revealed: LiteralString
267267
# TODO: Should be list[int] once support for assigning to starred expression is added
268268
reveal_type(c) # revealed: @Todo(starred unpacking)
269269
```
270+
271+
### Unicode
272+
273+
```py
274+
# TODO: Add diagnostic (need more values to unpack)
275+
(a, b) = "é"
276+
277+
reveal_type(a) # revealed: LiteralString
278+
reveal_type(b) # revealed: Unknown
279+
```
280+
281+
### Unicode escape (1)
282+
283+
```py
284+
# TODO: Add diagnostic (need more values to unpack)
285+
(a, b) = "\u9E6C"
286+
287+
reveal_type(a) # revealed: LiteralString
288+
reveal_type(b) # revealed: Unknown
289+
```
290+
291+
### Unicode escape (2)
292+
293+
```py
294+
# TODO: Add diagnostic (need more values to unpack)
295+
(a, b) = "\U0010FFFF"
296+
297+
reveal_type(a) # revealed: LiteralString
298+
reveal_type(b) # revealed: Unknown
299+
```
300+
301+
### Surrogates
302+
303+
```py
304+
(a, b) = "\uD800\uDFFF"
305+
306+
reveal_type(a) # revealed: LiteralString
307+
reveal_type(b) # revealed: LiteralString
308+
```

crates/red_knot_python_semantic/src/types.rs

+77-12
Original file line numberDiff line numberDiff line change
@@ -1417,21 +1417,76 @@ impl<'db> Type<'db> {
14171417
}
14181418
}
14191419

1420+
/// Return the type of `len()` on a type if it is known more precisely than `int`,
1421+
/// or `None` otherwise.
1422+
///
1423+
/// In the second case, the return type of `len()` in `typeshed` (`int`)
1424+
/// is used as a fallback.
1425+
fn len(&self, db: &'db dyn Db) -> Option<Type<'db>> {
1426+
fn non_negative_int_literal<'db>(db: &'db dyn Db, ty: Type<'db>) -> Option<Type<'db>> {
1427+
match ty {
1428+
// TODO: Emit diagnostic for non-integers and negative integers
1429+
Type::IntLiteral(value) => (value >= 0).then_some(ty),
1430+
Type::BooleanLiteral(value) => Some(Type::IntLiteral(value.into())),
1431+
Type::Union(union) => {
1432+
let mut builder = UnionBuilder::new(db);
1433+
for element in union.elements(db) {
1434+
builder = builder.add(non_negative_int_literal(db, *element)?);
1435+
}
1436+
Some(builder.build())
1437+
}
1438+
_ => None,
1439+
}
1440+
}
1441+
1442+
let usize_len = match self {
1443+
Type::BytesLiteral(bytes) => Some(bytes.python_len(db)),
1444+
Type::StringLiteral(string) => Some(string.python_len(db)),
1445+
Type::Tuple(tuple) => Some(tuple.len(db)),
1446+
_ => None,
1447+
};
1448+
1449+
if let Some(usize_len) = usize_len {
1450+
return usize_len.try_into().ok().map(Type::IntLiteral);
1451+
}
1452+
1453+
let return_ty = match self.call_dunder(db, "__len__", &[*self]) {
1454+
// TODO: emit a diagnostic
1455+
CallDunderResult::MethodNotAvailable => return None,
1456+
1457+
CallDunderResult::CallOutcome(outcome) | CallDunderResult::PossiblyUnbound(outcome) => {
1458+
outcome.return_ty(db)?
1459+
}
1460+
};
1461+
1462+
non_negative_int_literal(db, return_ty)
1463+
}
1464+
14201465
/// Return the outcome of calling an object of this type.
14211466
#[must_use]
14221467
fn call(self, db: &'db dyn Db, arg_types: &[Type<'db>]) -> CallOutcome<'db> {
14231468
match self {
14241469
// TODO validate typed call arguments vs callable signature
1425-
Type::FunctionLiteral(function_type) => {
1426-
if function_type.is_known(db, KnownFunction::RevealType) {
1427-
CallOutcome::revealed(
1428-
function_type.signature(db).return_ty,
1429-
*arg_types.first().unwrap_or(&Type::Unknown),
1430-
)
1431-
} else {
1432-
CallOutcome::callable(function_type.signature(db).return_ty)
1470+
Type::FunctionLiteral(function_type) => match function_type.known(db) {
1471+
Some(KnownFunction::RevealType) => CallOutcome::revealed(
1472+
function_type.signature(db).return_ty,
1473+
*arg_types.first().unwrap_or(&Type::Unknown),
1474+
),
1475+
1476+
Some(KnownFunction::Len) => {
1477+
let normal_return_ty = function_type.signature(db).return_ty;
1478+
1479+
let [only_arg] = arg_types else {
1480+
// TODO: Emit a diagnostic
1481+
return CallOutcome::callable(normal_return_ty);
1482+
};
1483+
let len_ty = only_arg.len(db);
1484+
1485+
CallOutcome::callable(len_ty.unwrap_or(normal_return_ty))
14331486
}
1434-
}
1487+
1488+
_ => CallOutcome::callable(function_type.signature(db).return_ty),
1489+
},
14351490

14361491
// TODO annotated return type on `__new__` or metaclass `__call__`
14371492
Type::ClassLiteral(ClassLiteralType { class }) => {
@@ -2597,13 +2652,15 @@ pub enum KnownFunction {
25972652
ConstraintFunction(KnownConstraintFunction),
25982653
/// `builtins.reveal_type`, `typing.reveal_type` or `typing_extensions.reveal_type`
25992654
RevealType,
2655+
/// `builtins.len`
2656+
Len,
26002657
}
26012658

26022659
impl KnownFunction {
26032660
pub fn constraint_function(self) -> Option<KnownConstraintFunction> {
26042661
match self {
26052662
Self::ConstraintFunction(f) => Some(f),
2606-
Self::RevealType => None,
2663+
Self::RevealType | Self::Len => None,
26072664
}
26082665
}
26092666

@@ -2620,6 +2677,7 @@ impl KnownFunction {
26202677
"issubclass" if definition.is_builtin_definition(db) => Some(
26212678
KnownFunction::ConstraintFunction(KnownConstraintFunction::IsSubclass),
26222679
),
2680+
"len" if definition.is_builtin_definition(db) => Some(KnownFunction::Len),
26232681
_ => None,
26242682
}
26252683
}
@@ -3074,8 +3132,9 @@ pub struct StringLiteralType<'db> {
30743132
}
30753133

30763134
impl<'db> StringLiteralType<'db> {
3077-
pub fn len(&self, db: &'db dyn Db) -> usize {
3078-
self.value(db).len()
3135+
/// The length of the string, as would be returned by Python's `len()`.
3136+
pub fn python_len(&self, db: &'db dyn Db) -> usize {
3137+
self.value(db).chars().count()
30793138
}
30803139
}
30813140

@@ -3085,6 +3144,12 @@ pub struct BytesLiteralType<'db> {
30853144
value: Box<[u8]>,
30863145
}
30873146

3147+
impl<'db> BytesLiteralType<'db> {
3148+
pub fn python_len(&self, db: &'db dyn Db) -> usize {
3149+
self.value(db).len()
3150+
}
3151+
}
3152+
30883153
#[salsa::interned]
30893154
pub struct SliceLiteralType<'db> {
30903155
start: Option<i32>,

0 commit comments

Comments
 (0)