From 62f39e3ce9721c10058aad6811878c38c5b42634 Mon Sep 17 00:00:00 2001 From: mdemello Date: Thu, 27 Feb 2020 15:27:22 -0800 Subject: [PATCH 1/3] FIX: Convert lines to bytestrings before calculating byte offsets. PiperOrigin-RevId: 297702793 --- pytype/tools/traces/CMakeLists.txt | 2 ++ pytype/tools/traces/source.py | 7 ++++++- pytype/tools/traces/source_test.py | 18 ++++++++++++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/pytype/tools/traces/CMakeLists.txt b/pytype/tools/traces/CMakeLists.txt index 338f77bbf..d9f732430 100644 --- a/pytype/tools/traces/CMakeLists.txt +++ b/pytype/tools/traces/CMakeLists.txt @@ -44,6 +44,8 @@ py_library( source SRCS source.py + DEPS + pytype.utils ) py_test( diff --git a/pytype/tools/traces/source.py b/pytype/tools/traces/source.py index cef0c5768..f6142ff16 100644 --- a/pytype/tools/traces/source.py +++ b/pytype/tools/traces/source.py @@ -4,6 +4,9 @@ from __future__ import print_function import collections +from pytype import compat + + Location = collections.namedtuple("Location", ("line", "column")) @@ -50,7 +53,9 @@ def _init_byte_offsets(self): offset = 0 for line in self._lines: self._offsets.append(offset) - offset += len(line) + 1 # account for the \n + # convert line to bytes + bytes_ = compat.bytestring(line) + offset += len(bytes_) + 1 # account for the \n def get_offset(self, location): """Gets the utf-8 byte offset of a source.Location from start of source.""" diff --git a/pytype/tools/traces/source_test.py b/pytype/tools/traces/source_test.py index f49573c9d..75d3c0c3b 100644 --- a/pytype/tools/traces/source_test.py +++ b/pytype/tools/traces/source_test.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Lint as: python2, python3 """Tests for traces.source.""" @@ -52,6 +53,23 @@ def test_get_offset(self): src = source.Code("line1\nline2", [], _FakeTrace, "") self.assertEqual(src.get_offset(source.Location(2, 3)), 9) + def test_get_offset_multibyte(self): + # With single-byte characters + src = source.Code("""\ + # coding=utf-8 + line1 # a + line2 + """, [], _FakeTrace, "") + self.assertEqual(src.get_offset(source.Location(3, 3)), 40) + + # With a multibyte character the byte offset should change + src = source.Code("""\ + # coding=utf-8 + line1 # ツ + line2 + """, [], _FakeTrace, "") + self.assertEqual(src.get_offset(source.Location(3, 3)), 42) + def test_line(self): src = source.Code("line1\nline2", [], _FakeTrace, "") self.assertEqual(src.line(2), "line2") From e6d84530c4c7f9517aaf3fc76c01c4aa8d02f291 Mon Sep 17 00:00:00 2001 From: Rune Tynan Date: Tue, 3 Mar 2020 11:17:22 -0800 Subject: [PATCH 2/3] Add lexing for base 2, 8, and 16 literals (#519) Alters lexer.lex and adds a test for the new numeric types, Closes #494 Resolves #519 PiperOrigin-RevId: 298649098 --- pytype/pyi/lexer.lex | 12 ++++++++++++ pytype/pyi/lexer_test.py | 17 +++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/pytype/pyi/lexer.lex b/pytype/pyi/lexer.lex index 8841586ef..6e5f804ca 100644 --- a/pytype/pyi/lexer.lex +++ b/pytype/pyi/lexer.lex @@ -119,6 +119,18 @@ typedef pytype::parser::token t; yylval->obj=PyInt_FromString(yytext, NULL, 10); return t::NUMBER; } +[-+]?0b[01]+ { + yylval->obj=PyInt_FromString(yytext, NULL, 2); + return t::NUMBER; +} +[-+]?0o[0-7]+ { + yylval->obj=PyInt_FromString(yytext, NULL, 8); + return t::NUMBER; +} +[-+]?0x[0-9a-fA-F]+ { + yylval->obj=PyInt_FromString(yytext, NULL, 16); + return t::NUMBER; +} [-+]?[0-9]*\.[0-9]+ { yylval->obj=PyFloat_FromDouble(atof(yytext)); return t::NUMBER; diff --git a/pytype/pyi/lexer_test.py b/pytype/pyi/lexer_test.py index 4c226693e..b76d21001 100644 --- a/pytype/pyi/lexer_test.py +++ b/pytype/pyi/lexer_test.py @@ -130,6 +130,23 @@ def test_number(self): self.check([0.5], "+.5") self.check([-0.5], "-.5") + def test_number_base(self): + self.check([0], "0b0") + self.check([1], "0b1") + self.check([42], "0b101010") + self.check([-8], "-0b1000") + + self.check([1], "0o1") + self.check([8], "0o10") + self.check([42], "0o52") + self.check([-7], "-0o7") + + self.check([1], "0x1") + self.check([240], "0xF0") + self.check([-240], "-0xF0") + self.check([15], "0x0f") + self.check([-15], "-0x0f") + def test_line_numbers(self): self.check([("NAME", "a", 1), ("NAME", "b", 2)], "a\nb") From b26e81e0c817c2a52b281e9cd513575d7cd0bbc5 Mon Sep 17 00:00:00 2001 From: rechen Date: Tue, 3 Mar 2020 11:20:00 -0800 Subject: [PATCH 3/3] Cleanup: silence a lint error I noticed while importing PR #519. PiperOrigin-RevId: 298649713 --- pytype/pyi/lexer_test.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pytype/pyi/lexer_test.py b/pytype/pyi/lexer_test.py index b76d21001..701064c23 100644 --- a/pytype/pyi/lexer_test.py +++ b/pytype/pyi/lexer_test.py @@ -6,6 +6,9 @@ import unittest +# We use '\' to make test code more readable: +# pylint: disable=g-backslash-continuation + # Map from token code to name. TOKEN_NAMES = {code: name for name, code in parser_ext.TOKENS.items()}