Skip to content

Commit

Permalink
Merge pull request #1301 from AndrewBasem1/feature/get_records_limit
Browse files Browse the repository at this point in the history
feature -- adding `worksheet.get_records_subset` to get specific row ranges
  • Loading branch information
alifeee authored Sep 28, 2023
2 parents db06366 + 4f39bef commit 7fe63bf
Show file tree
Hide file tree
Showing 15 changed files with 6,814 additions and 6,121 deletions.
10 changes: 6 additions & 4 deletions gspread/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,12 +533,12 @@ def wid_to_gid(wid):
return str(int(widval, 36) ^ xorval)


def rightpad(row, max_len):
def rightpad(row, max_len, padding_value=""):
pad_len = max_len - len(row)
return row + ([""] * pad_len) if pad_len != 0 else row
return row + ([padding_value] * pad_len) if pad_len != 0 else row


def fill_gaps(L, rows=None, cols=None):
def fill_gaps(L, rows=None, cols=None, padding_value=""):
"""Fill gaps in a list of lists.
e.g.,::
Expand All @@ -554,10 +554,12 @@ def fill_gaps(L, rows=None, cols=None):
:param L: List of lists to fill gaps in.
:param rows: Number of rows to fill.
:param cols: Number of columns to fill.
:param padding_value: Default value to fill gaps with.
:type L: list[list[T]]
:type rows: int
:type cols: int
:type padding_value: T
:return: List of lists with gaps filled.
:rtype: list[list[T]]:
Expand All @@ -571,7 +573,7 @@ def fill_gaps(L, rows=None, cols=None):
if pad_rows:
L = L + ([[]] * pad_rows)

return [rightpad(row, max_cols) for row in L]
return [rightpad(row, max_cols, padding_value=padding_value) for row in L]
except ValueError:
return []

Expand Down
166 changes: 142 additions & 24 deletions gspread/worksheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,15 +501,18 @@ def get_all_values(self, **kwargs):
"""
return self.get_values(**kwargs)

def get_all_records(
self,
@accepted_kwargs(
empty2zero=False,
head=1,
default_blank="",
allow_underscores_in_numeric_literals=False,
numericise_ignore=[],
value_render_option=None,
expected_headers=None,
)
def get_all_records(
self,
**kwargs,
):
"""Returns a list of dictionaries, all of them having the contents of
the spreadsheet with the head row as keys and each of these
Expand Down Expand Up @@ -542,40 +545,153 @@ def get_all_records(
returned dictionaries will contain all headers even if not included in this list
"""
idx = head - 1
return self.get_records(**kwargs)

data = self.get_all_values(value_render_option=value_render_option)
def get_records( # noqa: C901 # this comment disables the complexity check for this function
self,
empty2zero=False,
head=1,
use_index=0,
first_index=None,
last_index=None,
default_blank="",
allow_underscores_in_numeric_literals=False,
numericise_ignore=[],
value_render_option=None,
expected_headers=None,
):
"""Returns a list of dictionaries, all of them having the contents of
the spreadsheet range selected with the head row/col as keys and each of these
dictionaries holding the contents of subsequent selected rows/cols of cells as
values.
# Return an empty list if the sheet doesn't have enough rows
if len(data) <= idx:
return []
Cell values are numericised (strings that can be read as ints or floats
are converted), unless specified in numericise_ignore
keys = data[idx]
Can be used to read data from rows (use_index=0) or columns (use_index=1) (default is 0),
check the examples below for more details.
:param bool empty2zero: (optional) Determines whether empty cells are
converted to zeros.
:param int head: (optional) Determines which index to use as keys,
starting from 1 following the numeration of the spreadsheet.
:param int use_index: (optional) Determines whether to read records and headers from rows or columns.
0 for rows, 1 for columns.
:param int first_index: (optional) row/col (depends on `use_index`) to start reading data from (inclusive) (1-based).
:param int last_index: (optional) row/col (depends on `use_index`) to stop reading at (inclusive) (1-based).
:param str default_blank: (optional) Determines which value to use for
blank cells, defaults to empty string.
:param bool allow_underscores_in_numeric_literals: (optional) Allow
underscores in numeric literals, as introduced in PEP 515
:param list numericise_ignore: (optional) List of ints of indices of
the columns (starting at 1) to ignore numericising, special use
of ['all'] to ignore numericising on all columns.
:param value_render_option: (optional) Determines how values should
be rendered in the output. See `ValueRenderOption`_ in
the Sheets API.
:type value_render_option: :namedtuple:`~gspread.utils.ValueRenderOption`
:param list expected_headers: (optional) List of expected headers, they must be unique.
.. note::
returned dictionaries will contain all headers even if not included in this list
Examples::
# Sheet data:
# A B C
#
# 1 A1 B2 C3
# 2 A6 B7 C8
# 3 A11 B12 C13
# Read all rows from the sheet
>>> worksheet.get_records(use_index=0)
{
{"A1": "A6", "B2": "B7", "C3": "C8"},
{"A1": "A11", "B2": "B12", "C3": "C13"}
}
>>> worksheet.get_records(use_index=1)
{
{"A1": "B2", "A6": "B7", "A11": "B12"},
{"A1": "C3", "A6": "C8", "A11": "C13"}
}
"""
# some sanity checks
if use_index not in [0, 1]:
raise ValueError("use_index must be either 0 or 1")
if use_index == 1: # TODO: implement use_index=1
raise NotImplementedError("use_index=1 is not implemented yet")

if first_index is None:
first_index = head + 1
elif first_index <= head:
raise ValueError("first_index must be greater than the head row")
elif first_index > self.row_count:
raise ValueError(
"first_index must be less than or equal to the number of rows in the worksheet"
)

if last_index is None:
last_index = self.row_count
elif last_index < first_index:
raise ValueError("last_index must be greater than or equal to first_index")
elif last_index > self.row_count:
raise ValueError(
"last_index must be an integer less than or equal to the number of rows in the worksheet"
)

keys = self.get_values(
f"{head}:{head}", value_render_option=value_render_option
)[0]

# if no given expected headers, expect all of them
if expected_headers is None:
expected_headers = keys
else:
expected_headers_are_unique = len(expected_headers) == len(
set(expected_headers)
)
if not expected_headers_are_unique:
raise GSpreadException("the given 'expected_headers' are not uniques")

# keys must:
# - be uniques
# - be part of the complete header list
# - not contain extra headers
expected = set(expected_headers)
headers = set(keys)

# make sure they are uniques
if len(expected) != len(expected_headers):
raise GSpreadException("the given 'expected_headers' are not uniques")
# validating the headers in the worksheet
header_row_is_unique = len(keys) == len(set(keys))
if not header_row_is_unique:
raise GSpreadException("the header row in the worksheet is not unique")

if not expected & headers == expected:
# validating that the expected headers are part of the headers in the worksheet
if not all(header in keys for header in expected_headers):
raise GSpreadException(
"the given 'expected_headers' contains unknown headers: {}".format(
expected - headers
set(expected_headers) - set(keys)
)
)

values = self.get_values(
f"{first_index}:{last_index}",
value_render_option=value_render_option,
)

values_len = len(values[0])
keys_len = len(keys)
values_wider_than_keys_by = values_len - keys_len
default_blank_in_keys = default_blank in keys

if ((values_wider_than_keys_by > 0) and default_blank_in_keys) or (
values_wider_than_keys_by > 1
):
raise GSpreadException(
"the header row in the worksheet contains multiple empty cells"
)
elif values_wider_than_keys_by == 1:
keys.append(default_blank)
elif values_wider_than_keys_by < 0:
values = fill_gaps(values, cols=keys_len, padding_value=default_blank)

if numericise_ignore == ["all"]:
values = data[idx + 1 :]
pass
else:
values = [
numericise_all(
Expand All @@ -585,10 +701,12 @@ def get_all_records(
allow_underscores_in_numeric_literals,
numericise_ignore,
)
for row in data[idx + 1 :]
for row in values
]

return [dict(zip(keys, row)) for row in values]
formatted_records = [dict(zip(keys, row)) for row in values]

return formatted_records

def get_all_cells(self):
"""Returns a list of all `Cell` of the current sheet."""
Expand Down
Loading

0 comments on commit 7fe63bf

Please sign in to comment.