From 62f39e3ce9721c10058aad6811878c38c5b42634 Mon Sep 17 00:00:00 2001
From: mdemello <mdemello@google.com>
Date: Thu, 27 Feb 2020 15:27:22 -0800
Subject: [PATCH 1/3] FIX: Convert lines to bytestrings before calculating byte
 offsets.

PiperOrigin-RevId: 297702793
---
 pytype/tools/traces/CMakeLists.txt |  2 ++
 pytype/tools/traces/source.py      |  7 ++++++-
 pytype/tools/traces/source_test.py | 18 ++++++++++++++++++
 3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/pytype/tools/traces/CMakeLists.txt b/pytype/tools/traces/CMakeLists.txt
index 338f77bbf..d9f732430 100644
--- a/pytype/tools/traces/CMakeLists.txt
+++ b/pytype/tools/traces/CMakeLists.txt
@@ -44,6 +44,8 @@ py_library(
     source
   SRCS
     source.py
+  DEPS
+    pytype.utils
 )
 
 py_test(
diff --git a/pytype/tools/traces/source.py b/pytype/tools/traces/source.py
index cef0c5768..f6142ff16 100644
--- a/pytype/tools/traces/source.py
+++ b/pytype/tools/traces/source.py
@@ -4,6 +4,9 @@
 from __future__ import print_function
 import collections
 
+from pytype import compat
+
+
 Location = collections.namedtuple("Location", ("line", "column"))
 
 
@@ -50,7 +53,9 @@ def _init_byte_offsets(self):
     offset = 0
     for line in self._lines:
       self._offsets.append(offset)
-      offset += len(line) + 1  # account for the \n
+      # convert line to bytes
+      bytes_ = compat.bytestring(line)
+      offset += len(bytes_) + 1  # account for the \n
 
   def get_offset(self, location):
     """Gets the utf-8 byte offset of a source.Location from start of source."""
diff --git a/pytype/tools/traces/source_test.py b/pytype/tools/traces/source_test.py
index f49573c9d..75d3c0c3b 100644
--- a/pytype/tools/traces/source_test.py
+++ b/pytype/tools/traces/source_test.py
@@ -1,3 +1,4 @@
+# coding=utf-8
 # Lint as: python2, python3
 """Tests for traces.source."""
 
@@ -52,6 +53,23 @@ def test_get_offset(self):
     src = source.Code("line1\nline2", [], _FakeTrace, "")
     self.assertEqual(src.get_offset(source.Location(2, 3)), 9)
 
+  def test_get_offset_multibyte(self):
+    # With single-byte characters
+    src = source.Code("""\
+      # coding=utf-8
+      line1 # a
+      line2
+    """, [], _FakeTrace, "")
+    self.assertEqual(src.get_offset(source.Location(3, 3)), 40)
+
+    # With a multibyte character the byte offset should change
+    src = source.Code("""\
+      # coding=utf-8
+      line1 # ツ
+      line2
+    """, [], _FakeTrace, "")
+    self.assertEqual(src.get_offset(source.Location(3, 3)), 42)
+
   def test_line(self):
     src = source.Code("line1\nline2", [], _FakeTrace, "")
     self.assertEqual(src.line(2), "line2")

From e6d84530c4c7f9517aaf3fc76c01c4aa8d02f291 Mon Sep 17 00:00:00 2001
From: Rune Tynan <runetynan@gmail.com>
Date: Tue, 3 Mar 2020 11:17:22 -0800
Subject: [PATCH 2/3] Add lexing for base 2, 8, and 16 literals (#519)

Alters lexer.lex and adds a test for the new numeric types, Closes #494

Resolves #519

PiperOrigin-RevId: 298649098
---
 pytype/pyi/lexer.lex     | 12 ++++++++++++
 pytype/pyi/lexer_test.py | 17 +++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/pytype/pyi/lexer.lex b/pytype/pyi/lexer.lex
index 8841586ef..6e5f804ca 100644
--- a/pytype/pyi/lexer.lex
+++ b/pytype/pyi/lexer.lex
@@ -119,6 +119,18 @@ typedef pytype::parser::token t;
   yylval->obj=PyInt_FromString(yytext, NULL, 10);
   return t::NUMBER;
 }
+[-+]?0b[01]+  {
+  yylval->obj=PyInt_FromString(yytext, NULL, 2);
+  return t::NUMBER;
+}
+[-+]?0o[0-7]+  {
+  yylval->obj=PyInt_FromString(yytext, NULL, 8);
+  return t::NUMBER;
+}
+[-+]?0x[0-9a-fA-F]+  {
+  yylval->obj=PyInt_FromString(yytext, NULL, 16);
+  return t::NUMBER;
+}
 [-+]?[0-9]*\.[0-9]+  {
   yylval->obj=PyFloat_FromDouble(atof(yytext));
   return t::NUMBER;
diff --git a/pytype/pyi/lexer_test.py b/pytype/pyi/lexer_test.py
index 4c226693e..b76d21001 100644
--- a/pytype/pyi/lexer_test.py
+++ b/pytype/pyi/lexer_test.py
@@ -130,6 +130,23 @@ def test_number(self):
     self.check([0.5], "+.5")
     self.check([-0.5], "-.5")
 
+  def test_number_base(self):
+    self.check([0], "0b0")
+    self.check([1], "0b1")
+    self.check([42], "0b101010")
+    self.check([-8], "-0b1000")
+
+    self.check([1], "0o1")
+    self.check([8], "0o10")
+    self.check([42], "0o52")
+    self.check([-7], "-0o7")
+
+    self.check([1], "0x1")
+    self.check([240], "0xF0")
+    self.check([-240], "-0xF0")
+    self.check([15], "0x0f")
+    self.check([-15], "-0x0f")
+
   def test_line_numbers(self):
     self.check([("NAME", "a", 1), ("NAME", "b", 2)], "a\nb")
 

From b26e81e0c817c2a52b281e9cd513575d7cd0bbc5 Mon Sep 17 00:00:00 2001
From: rechen <rechen@google.com>
Date: Tue, 3 Mar 2020 11:20:00 -0800
Subject: [PATCH 3/3] Cleanup: silence a lint error I noticed while importing
 PR #519.

PiperOrigin-RevId: 298649713
---
 pytype/pyi/lexer_test.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pytype/pyi/lexer_test.py b/pytype/pyi/lexer_test.py
index b76d21001..701064c23 100644
--- a/pytype/pyi/lexer_test.py
+++ b/pytype/pyi/lexer_test.py
@@ -6,6 +6,9 @@
 
 import unittest
 
+# We use '\' to make test code more readable:
+# pylint: disable=g-backslash-continuation
+
 # Map from token code to name.
 TOKEN_NAMES = {code: name for name, code in parser_ext.TOKENS.items()}