Skip to content

Commit 77ae0cc

Browse files
authored
[red-knot] Infer subscript expression types for bytes literals (#13901)
## Summary Infer subscript expression types for bytes literals: ```py b = b"\x00abc\xff" reveal_type(b[0]) # revealed: Literal[b"\x00"] reveal_type(b[1]) # revealed: Literal[b"a"] reveal_type(b[-1]) # revealed: Literal[b"\xff"] reveal_type(b[-2]) # revealed: Literal[b"c"] reveal_type(b[False]) # revealed: Literal[b"\x00"] reveal_type(b[True]) # revealed: Literal[b"a"] ``` part of #13689 (#13689 (comment)) ## Test Plan - New Markdown-based tests (see `mdtest/subscript/bytes.md`) - Added missing test for `string_literal[bool_literal]`
1 parent 73ee72b commit 77ae0cc

File tree

7 files changed

+156
-68
lines changed

7 files changed

+156
-68
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Bytes literals
2+
3+
## Simple
4+
5+
```py
6+
reveal_type(b"red" b"knot") # revealed: Literal[b"redknot"]
7+
reveal_type(b"hello") # revealed: Literal[b"hello"]
8+
reveal_type(b"world" + b"!") # revealed: Literal[b"world!"]
9+
reveal_type(b"\xff\x00") # revealed: Literal[b"\xff\x00"]
10+
```

crates/red_knot_python_semantic/resources/mdtest/subscript/bytes.md

+29-4
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,33 @@
33
## Simple
44

55
```py
6-
reveal_type(b"red" b"knot") # revealed: Literal[b"redknot"]
7-
reveal_type(b"hello") # revealed: Literal[b"hello"]
8-
reveal_type(b"world" + b"!") # revealed: Literal[b"world!"]
9-
reveal_type(b"\xff\x00") # revealed: Literal[b"\xff\x00"]
6+
b = b"\x00abc\xff"
7+
8+
reveal_type(b[0]) # revealed: Literal[b"\x00"]
9+
reveal_type(b[1]) # revealed: Literal[b"a"]
10+
reveal_type(b[4]) # revealed: Literal[b"\xff"]
11+
12+
reveal_type(b[-1]) # revealed: Literal[b"\xff"]
13+
reveal_type(b[-2]) # revealed: Literal[b"c"]
14+
reveal_type(b[-5]) # revealed: Literal[b"\x00"]
15+
16+
reveal_type(b[False]) # revealed: Literal[b"\x00"]
17+
reveal_type(b[True]) # revealed: Literal[b"a"]
18+
19+
x = b[5] # error: [index-out-of-bounds] "Index 5 is out of bounds for bytes literal `Literal[b"\x00abc\xff"]` with length 5"
20+
reveal_type(x) # revealed: Unknown
21+
22+
y = b[-6] # error: [index-out-of-bounds] "Index -6 is out of bounds for bytes literal `Literal[b"\x00abc\xff"]` with length 5"
23+
reveal_type(y) # revealed: Unknown
24+
```
25+
26+
## Function return
27+
28+
```py
29+
def int_instance() -> int: ...
30+
31+
32+
a = b"abcde"[int_instance()]
33+
# TODO: Support overloads... Should be `bytes`
34+
reveal_type(a) # revealed: @Todo
1035
```

crates/red_knot_python_semantic/resources/mdtest/subscript/string.md

+5-3
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ reveal_type(s[1]) # revealed: Literal["b"]
1010
reveal_type(s[-1]) # revealed: Literal["e"]
1111
reveal_type(s[-2]) # revealed: Literal["d"]
1212

13+
reveal_type(s[False]) # revealed: Literal["a"]
14+
reveal_type(s[True]) # revealed: Literal["b"]
15+
1316
a = s[8] # error: [index-out-of-bounds] "Index 8 is out of bounds for string `Literal["abcde"]` with length 5"
1417
reveal_type(a) # revealed: Unknown
1518

@@ -20,11 +23,10 @@ reveal_type(b) # revealed: Unknown
2023
## Function return
2124

2225
```py
23-
def add(x: int, y: int) -> int:
24-
return x + y
26+
def int_instance() -> int: ...
2527

2628

27-
a = "abcde"[add(0, 1)]
29+
a = "abcde"[int_instance()]
2830
# TODO: Support overloads... Should be `str`
2931
reveal_type(a) # revealed: @Todo
3032
```

crates/red_knot_python_semantic/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -21,5 +21,6 @@ mod semantic_model;
2121
pub(crate) mod site_packages;
2222
mod stdlib;
2323
pub mod types;
24+
mod util;
2425

2526
type FxOrderSet<V> = ordermap::set::OrderSet<V, BuildHasherDefault<FxHasher>>;

crates/red_knot_python_semantic/src/types/infer.rs

+28-61
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ use crate::types::{
5555
typing_extensions_symbol_ty, BytesLiteralType, ClassType, FunctionType, KnownFunction,
5656
StringLiteralType, Truthiness, TupleType, Type, TypeArrayDisplay, UnionType,
5757
};
58+
use crate::util::subscript::PythonSubscript;
5859
use crate::Db;
5960

6061
use super::{KnownClass, UnionBuilder};
@@ -1466,8 +1467,9 @@ impl<'db> TypeInferenceBuilder<'db> {
14661467
}
14671468

14681469
/// Emit a diagnostic declaring that an index is out of bounds for a tuple.
1469-
pub(super) fn tuple_index_out_of_bounds_diagnostic(
1470+
pub(super) fn index_out_of_bounds_diagnostic(
14701471
&mut self,
1472+
kind: &'static str,
14711473
node: AnyNodeRef,
14721474
tuple_ty: Type<'db>,
14731475
length: usize,
@@ -1477,30 +1479,12 @@ impl<'db> TypeInferenceBuilder<'db> {
14771479
node,
14781480
"index-out-of-bounds",
14791481
format_args!(
1480-
"Index {index} is out of bounds for tuple of type `{}` with length {length}",
1482+
"Index {index} is out of bounds for {kind} `{}` with length {length}",
14811483
tuple_ty.display(self.db)
14821484
),
14831485
);
14841486
}
14851487

1486-
/// Emit a diagnostic declaring that an index is out of bounds for a string.
1487-
pub(super) fn string_index_out_of_bounds_diagnostic(
1488-
&mut self,
1489-
node: AnyNodeRef,
1490-
string_ty: Type<'db>,
1491-
length: usize,
1492-
index: i64,
1493-
) {
1494-
self.add_diagnostic(
1495-
node,
1496-
"index-out-of-bounds",
1497-
format_args!(
1498-
"Index {index} is out of bounds for string `{}` with length {length}",
1499-
string_ty.display(self.db)
1500-
),
1501-
);
1502-
}
1503-
15041488
/// Emit a diagnostic declaring that a type does not support subscripting.
15051489
pub(super) fn non_subscriptable_diagnostic(
15061490
&mut self,
@@ -3192,30 +3176,15 @@ impl<'db> TypeInferenceBuilder<'db> {
31923176
) -> Type<'db> {
31933177
match (value_ty, slice_ty) {
31943178
// Ex) Given `("a", "b", "c", "d")[1]`, return `"b"`
3195-
(Type::Tuple(tuple_ty), Type::IntLiteral(int)) if int >= 0 => {
3179+
(Type::Tuple(tuple_ty), Type::IntLiteral(int)) => {
31963180
let elements = tuple_ty.elements(self.db);
3197-
usize::try_from(int)
3198-
.ok()
3199-
.and_then(|index| elements.get(index).copied())
3200-
.unwrap_or_else(|| {
3201-
self.tuple_index_out_of_bounds_diagnostic(
3202-
value_node.into(),
3203-
value_ty,
3204-
elements.len(),
3205-
int,
3206-
);
3207-
Type::Unknown
3208-
})
3209-
}
3210-
// Ex) Given `("a", "b", "c", "d")[-1]`, return `"c"`
3211-
(Type::Tuple(tuple_ty), Type::IntLiteral(int)) if int < 0 => {
3212-
let elements = tuple_ty.elements(self.db);
3213-
int.checked_neg()
3214-
.and_then(|int| usize::try_from(int).ok())
3215-
.and_then(|index| elements.len().checked_sub(index))
3216-
.and_then(|index| elements.get(index).copied())
3181+
elements
3182+
.iter()
3183+
.python_subscript(int)
3184+
.copied()
32173185
.unwrap_or_else(|| {
3218-
self.tuple_index_out_of_bounds_diagnostic(
3186+
self.index_out_of_bounds_diagnostic(
3187+
"tuple",
32193188
value_node.into(),
32203189
value_ty,
32213190
elements.len(),
@@ -3231,19 +3200,20 @@ impl<'db> TypeInferenceBuilder<'db> {
32313200
Type::IntLiteral(i64::from(bool)),
32323201
),
32333202
// Ex) Given `"value"[1]`, return `"a"`
3234-
(Type::StringLiteral(literal_ty), Type::IntLiteral(int)) if int >= 0 => {
3203+
(Type::StringLiteral(literal_ty), Type::IntLiteral(int)) => {
32353204
let literal_value = literal_ty.value(self.db);
3236-
usize::try_from(int)
3237-
.ok()
3238-
.and_then(|index| literal_value.chars().nth(index))
3205+
literal_value
3206+
.chars()
3207+
.python_subscript(int)
32393208
.map(|ch| {
32403209
Type::StringLiteral(StringLiteralType::new(
32413210
self.db,
32423211
ch.to_string().into_boxed_str(),
32433212
))
32443213
})
32453214
.unwrap_or_else(|| {
3246-
self.string_index_out_of_bounds_diagnostic(
3215+
self.index_out_of_bounds_diagnostic(
3216+
"string",
32473217
value_node.into(),
32483218
value_ty,
32493219
literal_value.chars().count(),
@@ -3252,31 +3222,28 @@ impl<'db> TypeInferenceBuilder<'db> {
32523222
Type::Unknown
32533223
})
32543224
}
3255-
// Ex) Given `"value"[-1]`, return `"e"`
3256-
(Type::StringLiteral(literal_ty), Type::IntLiteral(int)) if int < 0 => {
3225+
// Ex) Given `b"value"[1]`, return `b"a"`
3226+
(Type::BytesLiteral(literal_ty), Type::IntLiteral(int)) => {
32573227
let literal_value = literal_ty.value(self.db);
3258-
int.checked_neg()
3259-
.and_then(|int| usize::try_from(int).ok())
3260-
.and_then(|index| index.checked_sub(1))
3261-
.and_then(|index| literal_value.chars().rev().nth(index))
3262-
.map(|ch| {
3263-
Type::StringLiteral(StringLiteralType::new(
3264-
self.db,
3265-
ch.to_string().into_boxed_str(),
3266-
))
3228+
literal_value
3229+
.iter()
3230+
.python_subscript(int)
3231+
.map(|byte| {
3232+
Type::BytesLiteral(BytesLiteralType::new(self.db, [*byte].as_slice()))
32673233
})
32683234
.unwrap_or_else(|| {
3269-
self.string_index_out_of_bounds_diagnostic(
3235+
self.index_out_of_bounds_diagnostic(
3236+
"bytes literal",
32703237
value_node.into(),
32713238
value_ty,
3272-
literal_value.chars().count(),
3239+
literal_value.len(),
32733240
int,
32743241
);
32753242
Type::Unknown
32763243
})
32773244
}
32783245
// Ex) Given `"value"[True]`, return `"a"`
3279-
(Type::StringLiteral(_), Type::BooleanLiteral(bool)) => self
3246+
(Type::StringLiteral(_) | Type::BytesLiteral(_), Type::BooleanLiteral(bool)) => self
32803247
.infer_subscript_expression_types(
32813248
value_node,
32823249
value_ty,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
pub(crate) mod subscript;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
pub(crate) trait PythonSubscript {
2+
type Item;
3+
4+
fn python_subscript(&mut self, index: i64) -> Option<Self::Item>;
5+
}
6+
7+
impl<I, T: DoubleEndedIterator<Item = I>> PythonSubscript for T {
8+
type Item = I;
9+
10+
fn python_subscript(&mut self, index: i64) -> Option<I> {
11+
if index >= 0 {
12+
self.nth(usize::try_from(index).ok()?)
13+
} else {
14+
let nth_rev = usize::try_from(index.checked_neg()?).ok()?.checked_sub(1)?;
15+
self.rev().nth(nth_rev)
16+
}
17+
}
18+
}
19+
20+
#[cfg(test)]
21+
mod tests {
22+
use super::PythonSubscript;
23+
24+
#[test]
25+
fn python_subscript_basic() {
26+
let iter = 'a'..='e';
27+
28+
assert_eq!(iter.clone().python_subscript(0), Some('a'));
29+
assert_eq!(iter.clone().python_subscript(1), Some('b'));
30+
assert_eq!(iter.clone().python_subscript(4), Some('e'));
31+
assert_eq!(iter.clone().python_subscript(5), None);
32+
33+
assert_eq!(iter.clone().python_subscript(-1), Some('e'));
34+
assert_eq!(iter.clone().python_subscript(-2), Some('d'));
35+
assert_eq!(iter.clone().python_subscript(-5), Some('a'));
36+
assert_eq!(iter.clone().python_subscript(-6), None);
37+
}
38+
39+
#[test]
40+
fn python_subscript_empty() {
41+
let iter = 'a'..'a';
42+
43+
assert_eq!(iter.clone().python_subscript(0), None);
44+
assert_eq!(iter.clone().python_subscript(1), None);
45+
assert_eq!(iter.clone().python_subscript(-1), None);
46+
}
47+
48+
#[test]
49+
fn python_subscript_single_element() {
50+
let iter = 'a'..='a';
51+
52+
assert_eq!(iter.clone().python_subscript(0), Some('a'));
53+
assert_eq!(iter.clone().python_subscript(1), None);
54+
assert_eq!(iter.clone().python_subscript(-1), Some('a'));
55+
assert_eq!(iter.clone().python_subscript(-2), None);
56+
}
57+
58+
#[test]
59+
fn python_subscript_uses_full_index_range() {
60+
let iter = 0..=u64::MAX;
61+
62+
assert_eq!(iter.clone().python_subscript(0), Some(0));
63+
assert_eq!(iter.clone().python_subscript(1), Some(1));
64+
assert_eq!(
65+
iter.clone().python_subscript(i64::MAX),
66+
Some(i64::MAX as u64)
67+
);
68+
69+
assert_eq!(iter.clone().python_subscript(-1), Some(u64::MAX));
70+
assert_eq!(iter.clone().python_subscript(-2), Some(u64::MAX - 1));
71+
72+
// i64::MIN is not representable as a positive number, so it is not
73+
// a valid index:
74+
assert_eq!(iter.clone().python_subscript(i64::MIN), None);
75+
76+
// but i64::MIN +1 is:
77+
assert_eq!(
78+
iter.clone().python_subscript(i64::MIN + 1),
79+
Some(2u64.pow(63) + 1)
80+
);
81+
}
82+
}

0 commit comments

Comments
 (0)