Merge pull request #1301 from AndrewBasem1/feature/get_records_limit

feature -- adding `worksheet.get_records_subset` to get specific row ranges
burnash · Sep 28, 2023 · 7fe63bf · 7fe63bf
2 parents db06366 + 4f39bef
commit 7fe63bf
Show file tree

Hide file tree

Showing 15 changed files with 6,814 additions and 6,121 deletions.
diff --git a/gspread/utils.py b/gspread/utils.py
@@ -533,12 +533,12 @@ def wid_to_gid(wid):
     return str(int(widval, 36) ^ xorval)
 
 
-def rightpad(row, max_len):
+def rightpad(row, max_len, padding_value=""):
     pad_len = max_len - len(row)
-    return row + ([""] * pad_len) if pad_len != 0 else row
+    return row + ([padding_value] * pad_len) if pad_len != 0 else row
 
 
-def fill_gaps(L, rows=None, cols=None):
+def fill_gaps(L, rows=None, cols=None, padding_value=""):
     """Fill gaps in a list of lists.
     e.g.,::
 
@@ -554,10 +554,12 @@ def fill_gaps(L, rows=None, cols=None):
     :param L: List of lists to fill gaps in.
     :param rows: Number of rows to fill.
     :param cols: Number of columns to fill.
+    :param padding_value: Default value to fill gaps with.
 
     :type L: list[list[T]]
     :type rows: int
     :type cols: int
+    :type padding_value: T
 
     :return: List of lists with gaps filled.
     :rtype: list[list[T]]:
@@ -571,7 +573,7 @@ def fill_gaps(L, rows=None, cols=None):
         if pad_rows:
             L = L + ([[]] * pad_rows)
 
-        return [rightpad(row, max_cols) for row in L]
+        return [rightpad(row, max_cols, padding_value=padding_value) for row in L]
     except ValueError:
         return []
 

diff --git a/gspread/worksheet.py b/gspread/worksheet.py
@@ -501,15 +501,18 @@ def get_all_values(self, **kwargs):
         """
         return self.get_values(**kwargs)
 
-    def get_all_records(
-        self,
+    @accepted_kwargs(
         empty2zero=False,
         head=1,
         default_blank="",
         allow_underscores_in_numeric_literals=False,
         numericise_ignore=[],
         value_render_option=None,
         expected_headers=None,
+    )
+    def get_all_records(
+        self,
+        **kwargs,
     ):
         """Returns a list of dictionaries, all of them having the contents of
         the spreadsheet with the head row as keys and each of these
@@ -542,40 +545,153 @@ def get_all_records(
                 returned dictionaries will contain all headers even if not included in this list
 
         """
-        idx = head - 1
+        return self.get_records(**kwargs)
 
-        data = self.get_all_values(value_render_option=value_render_option)
+    def get_records(  # noqa: C901 # this comment disables the complexity check for this function
+        self,
+        empty2zero=False,
+        head=1,
+        use_index=0,
+        first_index=None,
+        last_index=None,
+        default_blank="",
+        allow_underscores_in_numeric_literals=False,
+        numericise_ignore=[],
+        value_render_option=None,
+        expected_headers=None,
+    ):
+        """Returns a list of dictionaries, all of them having the contents of
+        the spreadsheet range selected with the head row/col as keys and each of these
+        dictionaries holding the contents of subsequent selected rows/cols of cells as
+        values.
 
-        # Return an empty list if the sheet doesn't have enough rows
-        if len(data) <= idx:
-            return []
+        Cell values are numericised (strings that can be read as ints or floats
+        are converted), unless specified in numericise_ignore
 
-        keys = data[idx]
+        Can be used to read data from rows (use_index=0) or columns (use_index=1) (default is 0),
+            check the examples below for more details.
+
+        :param bool empty2zero: (optional) Determines whether empty cells are
+            converted to zeros.
+        :param int head: (optional) Determines which index to use as keys,
+            starting from 1 following the numeration of the spreadsheet.
+        :param int use_index: (optional) Determines whether to read records and headers from rows or columns.
+            0 for rows, 1 for columns.
+        :param int first_index: (optional) row/col (depends on `use_index`) to start reading data from (inclusive) (1-based).
+        :param int last_index: (optional) row/col (depends on `use_index`) to stop reading at (inclusive) (1-based).
+        :param str default_blank: (optional) Determines which value to use for
+            blank cells, defaults to empty string.
+        :param bool allow_underscores_in_numeric_literals: (optional) Allow
+            underscores in numeric literals, as introduced in PEP 515
+        :param list numericise_ignore: (optional) List of ints of indices of
+            the columns (starting at 1) to ignore numericising, special use
+            of ['all'] to ignore numericising on all columns.
+        :param value_render_option: (optional) Determines how values should
+            be rendered in the output. See `ValueRenderOption`_ in
+            the Sheets API.
+        :type value_render_option: :namedtuple:`~gspread.utils.ValueRenderOption`
+
+        :param list expected_headers: (optional) List of expected headers, they must be unique.
+
+            .. note::
+
+                returned dictionaries will contain all headers even if not included in this list
+
+        Examples::
+
+            # Sheet data:
+            #      A    B    C
+            #
+            # 1    A1   B2   C3
+            # 2    A6   B7   C8
+            # 3    A11  B12  C13
+
+            # Read all rows from the sheet
+            >>> worksheet.get_records(use_index=0)
+            {
+                {"A1": "A6", "B2": "B7", "C3": "C8"},
+                {"A1": "A11", "B2": "B12", "C3": "C13"}
+            }
+
+            >>> worksheet.get_records(use_index=1)
+            {
+                {"A1": "B2", "A6": "B7", "A11": "B12"},
+                {"A1": "C3", "A6": "C8", "A11": "C13"}
+            }
+        """
+        # some sanity checks
+        if use_index not in [0, 1]:
+            raise ValueError("use_index must be either 0 or 1")
+        if use_index == 1:  # TODO: implement use_index=1
+            raise NotImplementedError("use_index=1 is not implemented yet")
+
+        if first_index is None:
+            first_index = head + 1
+        elif first_index <= head:
+            raise ValueError("first_index must be greater than the head row")
+        elif first_index > self.row_count:
+            raise ValueError(
+                "first_index must be less than or equal to the number of rows in the worksheet"
+            )
+
+        if last_index is None:
+            last_index = self.row_count
+        elif last_index < first_index:
+            raise ValueError("last_index must be greater than or equal to first_index")
+        elif last_index > self.row_count:
+            raise ValueError(
+                "last_index must be an integer less than or equal to the number of rows in the worksheet"
+            )
+
+        keys = self.get_values(
+            f"{head}:{head}", value_render_option=value_render_option
+        )[0]
 
-        # if no given expected headers, expect all of them
         if expected_headers is None:
             expected_headers = keys
+        else:
+            expected_headers_are_unique = len(expected_headers) == len(
+                set(expected_headers)
+            )
+            if not expected_headers_are_unique:
+                raise GSpreadException("the given 'expected_headers' are not uniques")
 
-        # keys must:
-        # - be uniques
-        # - be part of the complete header list
-        # - not contain extra headers
-        expected = set(expected_headers)
-        headers = set(keys)
-
-        # make sure they are uniques
-        if len(expected) != len(expected_headers):
-            raise GSpreadException("the given 'expected_headers' are not uniques")
+        # validating the headers in the worksheet
+        header_row_is_unique = len(keys) == len(set(keys))
+        if not header_row_is_unique:
+            raise GSpreadException("the header row in the worksheet is not unique")
 
-        if not expected & headers == expected:
+        # validating that the expected headers are part of the headers in the worksheet
+        if not all(header in keys for header in expected_headers):
             raise GSpreadException(
                 "the given 'expected_headers' contains unknown headers: {}".format(
-                    expected - headers
+                    set(expected_headers) - set(keys)
                 )
             )
 
+        values = self.get_values(
+            f"{first_index}:{last_index}",
+            value_render_option=value_render_option,
+        )
+
+        values_len = len(values[0])
+        keys_len = len(keys)
+        values_wider_than_keys_by = values_len - keys_len
+        default_blank_in_keys = default_blank in keys
+
+        if ((values_wider_than_keys_by > 0) and default_blank_in_keys) or (
+            values_wider_than_keys_by > 1
+        ):
+            raise GSpreadException(
+                "the header row in the worksheet contains multiple empty cells"
+            )
+        elif values_wider_than_keys_by == 1:
+            keys.append(default_blank)
+        elif values_wider_than_keys_by < 0:
+            values = fill_gaps(values, cols=keys_len, padding_value=default_blank)
+
         if numericise_ignore == ["all"]:
-            values = data[idx + 1 :]
+            pass
         else:
             values = [
                 numericise_all(
@@ -585,10 +701,12 @@ def get_all_records(
                     allow_underscores_in_numeric_literals,
                     numericise_ignore,
                 )
-                for row in data[idx + 1 :]
+                for row in values
             ]
 
-        return [dict(zip(keys, row)) for row in values]
+        formatted_records = [dict(zip(keys, row)) for row in values]
+
+        return formatted_records
 
     def get_all_cells(self):
         """Returns a list of all `Cell` of the current sheet."""