Skip to content

Commit 7cb2e44

Browse files
committed
Initial implementation of PEP 701
1 parent 094cf39 commit 7cb2e44

17 files changed

+3780
-2019
lines changed

Doc/library/token-list.inc

+10
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Grammar/Tokens

+4
Original file line numberDiff line numberDiff line change
@@ -53,13 +53,17 @@ ATEQUAL '@='
5353
RARROW '->'
5454
ELLIPSIS '...'
5555
COLONEQUAL ':='
56+
EXCLAMATION '!'
5657

5758
OP
5859
AWAIT
5960
ASYNC
6061
TYPE_IGNORE
6162
TYPE_COMMENT
6263
SOFT_KEYWORD
64+
FSTRING_START
65+
FSTRING_MIDDLE
66+
FSTRING_END
6367
ERRORTOKEN
6468

6569
# These aren't used by the C tokenizer but are needed for tokenize.py

Grammar/python.gram

+25-2
Original file line numberDiff line numberDiff line change
@@ -807,7 +807,8 @@ atom[expr_ty]:
807807
| 'True' { _PyAST_Constant(Py_True, NULL, EXTRA) }
808808
| 'False' { _PyAST_Constant(Py_False, NULL, EXTRA) }
809809
| 'None' { _PyAST_Constant(Py_None, NULL, EXTRA) }
810-
| &STRING strings
810+
# | gstring
811+
| &(STRING|FSTRING_START) strings
811812
| NUMBER
812813
| &'(' (tuple | group | genexp)
813814
| &'[' (list | listcomp)
@@ -877,7 +878,29 @@ lambda_param[arg_ty]: a=NAME { _PyAST_arg(a->v.Name.id, NULL, NULL, EXTRA) }
877878
# LITERALS
878879
# ========
879880

880-
strings[expr_ty] (memo): a=STRING+ { _PyPegen_concatenate_strings(p, a) }
881+
gstring_middle[expr_ty]:
882+
| gstring_replacement_field
883+
| t=FSTRING_MIDDLE { _PyPegen_constant_from_token2(p, t) }
884+
# There are some shenanigans with the gstring_format_spec: Don't try to put it in its own rule
885+
# or otherwise it will try to parse the first token with the regular tokenizer mode (due to the EXTRA).
886+
# TODO: (Ideally we need a way similar to 'memo' so the parser can set the tokenize mode on fstring/normal)
887+
gstring_replacement_field[expr_ty]:
888+
| expr_start='{' a=(yield_expr | star_expressions) debug_expr="="? conversion=[
889+
conv_token="!" conv=NAME { _PyPegen_check_fstring_conversion(p, conv_token, conv) ? NULL : conv }
890+
] format=[
891+
':' spec=gstring_format_spec* { spec ? _PyAST_JoinedStr((asdl_expr_seq*)spec, EXTRA) : NULL }
892+
] &&'}' {
893+
_PyPegen_formatted_value(p, a, debug_expr, conversion, format, EXTRA)
894+
}
895+
gstring_format_spec[expr_ty]:
896+
| t=FSTRING_MIDDLE { _PyPegen_constant_from_token2(p, t) }
897+
| gstring_replacement_field
898+
gstring[expr_ty]:
899+
| a=FSTRING_START b=gstring_middle* c=FSTRING_END { deal_with_gstring2(p, a, (asdl_expr_seq*)b, c) }
900+
901+
string[expr_ty]: s[Token*]=STRING { _PyPegen_constant_from_token(p, s) }
902+
strings[expr_ty] (memo): a[asdl_expr_seq*]=(gstring|string)+ { _PyPegen_concatenate_strings2(p, a, EXTRA) }
903+
# strings[expr_ty] (memo): a=STRING+ { _PyPegen_concatenate_strings(p, a) }
881904

882905
list[expr_ty]:
883906
| '[' a=[star_named_expressions] ']' { _PyAST_List(a, Load, EXTRA) }

Include/internal/pycore_token.h

+12-8
Original file line numberDiff line numberDiff line change
@@ -67,14 +67,18 @@ extern "C" {
6767
#define RARROW 51
6868
#define ELLIPSIS 52
6969
#define COLONEQUAL 53
70-
#define OP 54
71-
#define AWAIT 55
72-
#define ASYNC 56
73-
#define TYPE_IGNORE 57
74-
#define TYPE_COMMENT 58
75-
#define SOFT_KEYWORD 59
76-
#define ERRORTOKEN 60
77-
#define N_TOKENS 64
70+
#define EXCLAMATION 54
71+
#define OP 55
72+
#define AWAIT 56
73+
#define ASYNC 57
74+
#define TYPE_IGNORE 58
75+
#define TYPE_COMMENT 59
76+
#define SOFT_KEYWORD 60
77+
#define FSTRING_START 61
78+
#define FSTRING_MIDDLE 62
79+
#define FSTRING_END 63
80+
#define ERRORTOKEN 64
81+
#define N_TOKENS 68
7882
#define NT_OFFSET 256
7983

8084
/* Special definitions for cooperation with parser */

Lib/random.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,7 @@ def choices(self, population, weights=None, *, cum_weights=None, k=1):
468468
raise
469469
k = weights
470470
raise TypeError(
471-
f'The number of choices must be a keyword argument: {k=}'
471+
f'The number of choices must be a keyword argument: {k}'
472472
) from None
473473
elif weights is not None:
474474
raise TypeError('Cannot specify both weights and cumulative weights')

Lib/test/test_fstring.py

+76-20
Original file line numberDiff line numberDiff line change
@@ -329,13 +329,13 @@ def test_ast_line_numbers_multiline_fstring(self):
329329
self.assertEqual(t.body[1].lineno, 3)
330330
self.assertEqual(t.body[1].value.lineno, 3)
331331
self.assertEqual(t.body[1].value.values[0].lineno, 3)
332-
self.assertEqual(t.body[1].value.values[1].lineno, 3)
333-
self.assertEqual(t.body[1].value.values[2].lineno, 3)
332+
self.assertEqual(t.body[1].value.values[1].lineno, 4)
333+
self.assertEqual(t.body[1].value.values[2].lineno, 6)
334334
self.assertEqual(t.body[1].col_offset, 0)
335335
self.assertEqual(t.body[1].value.col_offset, 0)
336-
self.assertEqual(t.body[1].value.values[0].col_offset, 0)
337-
self.assertEqual(t.body[1].value.values[1].col_offset, 0)
338-
self.assertEqual(t.body[1].value.values[2].col_offset, 0)
336+
self.assertEqual(t.body[1].value.values[0].col_offset, 4)
337+
self.assertEqual(t.body[1].value.values[1].col_offset, 2)
338+
self.assertEqual(t.body[1].value.values[2].col_offset, 11)
339339
# NOTE: the following lineno information and col_offset is correct for
340340
# expressions within FormattedValues.
341341
binop = t.body[1].value.values[1].value
@@ -366,13 +366,13 @@ def test_ast_line_numbers_multiline_fstring(self):
366366
self.assertEqual(t.body[0].lineno, 2)
367367
self.assertEqual(t.body[0].value.lineno, 2)
368368
self.assertEqual(t.body[0].value.values[0].lineno, 2)
369-
self.assertEqual(t.body[0].value.values[1].lineno, 2)
370-
self.assertEqual(t.body[0].value.values[2].lineno, 2)
369+
self.assertEqual(t.body[0].value.values[1].lineno, 3)
370+
self.assertEqual(t.body[0].value.values[2].lineno, 3)
371371
self.assertEqual(t.body[0].col_offset, 0)
372372
self.assertEqual(t.body[0].value.col_offset, 4)
373-
self.assertEqual(t.body[0].value.values[0].col_offset, 4)
374-
self.assertEqual(t.body[0].value.values[1].col_offset, 4)
375-
self.assertEqual(t.body[0].value.values[2].col_offset, 4)
373+
self.assertEqual(t.body[0].value.values[0].col_offset, 8)
374+
self.assertEqual(t.body[0].value.values[1].col_offset, 10)
375+
self.assertEqual(t.body[0].value.values[2].col_offset, 17)
376376
# Check {blech}
377377
self.assertEqual(t.body[0].value.values[1].value.lineno, 3)
378378
self.assertEqual(t.body[0].value.values[1].value.end_lineno, 3)
@@ -387,6 +387,20 @@ def test_ast_line_numbers_with_parentheses(self):
387387
t = ast.parse(expr)
388388
self.assertEqual(type(t), ast.Module)
389389
self.assertEqual(len(t.body), 1)
390+
# check the joinedstr location
391+
joinedstr = t.body[0].value
392+
self.assertEqual(type(joinedstr), ast.JoinedStr)
393+
self.assertEqual(joinedstr.lineno, 3)
394+
self.assertEqual(joinedstr.end_lineno, 3)
395+
self.assertEqual(joinedstr.col_offset, 4)
396+
self.assertEqual(joinedstr.end_col_offset, 17)
397+
# check the formatted value location
398+
fv = t.body[0].value.values[1]
399+
self.assertEqual(type(fv), ast.FormattedValue)
400+
self.assertEqual(fv.lineno, 3)
401+
self.assertEqual(fv.end_lineno, 3)
402+
self.assertEqual(fv.col_offset, 7)
403+
self.assertEqual(fv.end_col_offset, 16)
390404
# check the test(t) location
391405
call = t.body[0].value.values[1].value
392406
self.assertEqual(type(call), ast.Call)
@@ -415,9 +429,9 @@ def test_ast_line_numbers_with_parentheses(self):
415429
# check the first wat
416430
self.assertEqual(type(wat1), ast.Constant)
417431
self.assertEqual(wat1.lineno, 4)
418-
self.assertEqual(wat1.end_lineno, 6)
419-
self.assertEqual(wat1.col_offset, 12)
420-
self.assertEqual(wat1.end_col_offset, 18)
432+
self.assertEqual(wat1.end_lineno, 5)
433+
self.assertEqual(wat1.col_offset, 14)
434+
self.assertEqual(wat1.end_col_offset, 26)
421435
# check the call
422436
call = middle.value
423437
self.assertEqual(type(call), ast.Call)
@@ -427,9 +441,9 @@ def test_ast_line_numbers_with_parentheses(self):
427441
self.assertEqual(call.end_col_offset, 31)
428442
# check the second wat
429443
self.assertEqual(type(wat2), ast.Constant)
430-
self.assertEqual(wat2.lineno, 4)
444+
self.assertEqual(wat2.lineno, 5)
431445
self.assertEqual(wat2.end_lineno, 6)
432-
self.assertEqual(wat2.col_offset, 12)
446+
self.assertEqual(wat2.col_offset, 32)
433447
self.assertEqual(wat2.end_col_offset, 18)
434448

435449
def test_docstring(self):
@@ -618,6 +632,7 @@ def test_format_specifier_expressions(self):
618632
self.assertEqual(f'{-10:-{"#"}1{0}x}', ' -0xa')
619633
self.assertEqual(f'{-10:{"-"}#{1}0{"x"}}', ' -0xa')
620634
self.assertEqual(f'{10:#{3 != {4:5} and width}x}', ' 0xa')
635+
self.assertEqual(f'result: {value:{width:{0}}.{precision:1}}', 'result: 12.35')
621636

622637
self.assertAllRaise(SyntaxError,
623638
"""f-string: invalid conversion character 'r{"': """
@@ -632,11 +647,6 @@ def test_format_specifier_expressions(self):
632647
"f'{4:{/5}}'",
633648
])
634649

635-
self.assertAllRaise(SyntaxError, "f-string: expressions nested too deeply",
636-
[# Can't nest format specifiers.
637-
"f'result: {value:{width:{0}}.{precision:1}}'",
638-
])
639-
640650
self.assertAllRaise(SyntaxError, 'f-string: invalid conversion character',
641651
[# No expansion inside conversion or for
642652
# the : or ! itself.
@@ -848,6 +858,50 @@ def test_lambda(self):
848858
["f'{lambda x:x}'",
849859
])
850860

861+
def test_valid_prefixes(self):
862+
self.assertEqual(F'{1}', "1")
863+
self.assertEqual(FR'{2}', "2")
864+
self.assertEqual(fR'{3}', "3")
865+
866+
def test_roundtrip_raw_quotes(self):
867+
self.assertEqual(fr"\'", "\\'")
868+
self.assertEqual(fr'\"', '\\"')
869+
self.assertEqual(fr'\"\'', '\\"\\\'')
870+
self.assertEqual(fr'\'\"', '\\\'\\"')
871+
self.assertEqual(fr'\"\'\"', '\\"\\\'\\"')
872+
self.assertEqual(fr'\'\"\'', '\\\'\\"\\\'')
873+
self.assertEqual(fr'\"\'\"\'', '\\"\\\'\\"\\\'')
874+
875+
def test_fstring_backslash_before_double_bracket(self):
876+
self.assertEqual(f'\{{\}}', '\\{\\}')
877+
self.assertEqual(f'\{{', '\\{')
878+
self.assertEqual(f'\{{{1+1}', '\\{2')
879+
self.assertEqual(f'\}}{1+1}', '\\}2')
880+
self.assertEqual(f'{1+1}\}}', '2\\}')
881+
self.assertEqual(fr'\{{\}}', '\\{\\}')
882+
self.assertEqual(fr'\{{', '\\{')
883+
self.assertEqual(fr'\{{{1+1}', '\\{2')
884+
self.assertEqual(fr'\}}{1+1}', '\\}2')
885+
self.assertEqual(fr'{1+1}\}}', '2\\}')
886+
887+
def test_fstring_backslash_prefix_raw(self):
888+
self.assertEqual(f'\\', '\\')
889+
self.assertEqual(f'\\\\', '\\\\')
890+
self.assertEqual(fr'\\', r'\\')
891+
self.assertEqual(fr'\\\\', r'\\\\')
892+
self.assertEqual(rf'\\', r'\\')
893+
self.assertEqual(rf'\\\\', r'\\\\')
894+
self.assertEqual(Rf'\\', R'\\')
895+
self.assertEqual(Rf'\\\\', R'\\\\')
896+
self.assertEqual(fR'\\', R'\\')
897+
self.assertEqual(fR'\\\\', R'\\\\')
898+
self.assertEqual(FR'\\', R'\\')
899+
self.assertEqual(FR'\\\\', R'\\\\')
900+
901+
def test_fstring_format_spec_greedy_matching(self):
902+
self.assertEqual(f"{1:}}}", "1}")
903+
self.assertEqual(f"{1:>3{5}}}}", " 1}")
904+
851905
def test_yield(self):
852906
# Not terribly useful, but make sure the yield turns
853907
# a function into a generator
@@ -1314,6 +1368,7 @@ def __repr__(self):
13141368
self.assertEqual(f'X{x =}Y', 'Xx ='+repr(x)+'Y')
13151369
self.assertEqual(f'X{x= }Y', 'Xx= '+repr(x)+'Y')
13161370
self.assertEqual(f'X{x = }Y', 'Xx = '+repr(x)+'Y')
1371+
self.assertEqual(f"sadsd {1 + 1 = :{1 + 1:1d}f}", "sadsd 1 + 1 = 2.000000")
13171372

13181373
# These next lines contains tabs. Backslash escapes don't
13191374
# work in f-strings.
@@ -1324,6 +1379,7 @@ def __repr__(self):
13241379
#self.assertEqual(f'X{x =}Y', 'Xx\t='+repr(x)+'Y')
13251380
#self.assertEqual(f'X{x = }Y', 'Xx\t=\t'+repr(x)+'Y')
13261381

1382+
13271383
def test_walrus(self):
13281384
x = 20
13291385
# This isn't an assignment expression, it's 'x', with a format

Lib/token.py

+16-11
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)