Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature -- adding worksheet.get_records to get specific row ranges #1301

Merged
merged 29 commits into from
Sep 28, 2023
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
9b7c207
adding skeleton for method
AndrewBasem1 Sep 14, 2023
5d5dd0c
renaming args, and adding some sanity checks
AndrewBasem1 Sep 15, 2023
f6f0658
adding main logic
AndrewBasem1 Sep 15, 2023
bd06c33
fixing some minor issues
AndrewBasem1 Sep 15, 2023
a356c91
updated doctsring, added tests, and fixed issues
AndrewBasem1 Sep 21, 2023
96b74a2
rename some tests
AndrewBasem1 Sep 21, 2023
307fa22
adding cassettes
AndrewBasem1 Sep 21, 2023
5c4ca7b
Merge branch 'master' into feature/get_records_limit
AndrewBasem1 Sep 21, 2023
99c8fa1
removing isinstance checks
AndrewBasem1 Sep 24, 2023
4565154
removing unneeded variable
AndrewBasem1 Sep 24, 2023
14ed342
making rows args mandatory
AndrewBasem1 Sep 24, 2023
27eb373
improving padding for get_records
AndrewBasem1 Sep 24, 2023
301666e
renaming method to `get_records`
AndrewBasem1 Sep 24, 2023
0f668f5
fixing broken test functions
AndrewBasem1 Sep 24, 2023
652c497
Revert "making rows args mandatory"
AndrewBasem1 Sep 24, 2023
12a2794
adding kwargs in get_all_records
AndrewBasem1 Sep 24, 2023
415861f
Revert "adding cassettes"
AndrewBasem1 Sep 24, 2023
a574f6b
adding needed cassettes only
AndrewBasem1 Sep 24, 2023
fe43232
fix cassettes
alifeee Sep 25, 2023
63d57c0
moving validations inside the method
AndrewBasem1 Sep 25, 2023
29955c1
moving padding inside
AndrewBasem1 Sep 25, 2023
e31ed99
adding test for fill_gaps with defined value
AndrewBasem1 Sep 25, 2023
aea25a8
adding a default value in fill gaps
AndrewBasem1 Sep 25, 2023
c690971
using fill gaps in method
AndrewBasem1 Sep 25, 2023
0a9aa8c
Merge remote-tracking branch 'upstream/feature/get_records_limit' int…
AndrewBasem1 Sep 25, 2023
342f35f
aligining test_cases with comments, and adding new one
AndrewBasem1 Sep 25, 2023
81c1d4a
ignoring function complexity checker
AndrewBasem1 Sep 25, 2023
b0ea1be
renaming args, and adding examples in docstring
AndrewBasem1 Sep 26, 2023
4f39bef
adding new cassette
AndrewBasem1 Sep 27, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
161 changes: 137 additions & 24 deletions gspread/worksheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,15 +501,18 @@ def get_all_values(self, **kwargs):
"""
return self.get_values(**kwargs)

def get_all_records(
self,
@accepted_kwargs(
empty2zero=False,
head=1,
default_blank="",
allow_underscores_in_numeric_literals=False,
numericise_ignore=[],
value_render_option=None,
expected_headers=None,
)
def get_all_records(
self,
**kwargs,
):
"""Returns a list of dictionaries, all of them having the contents of
the spreadsheet with the head row as keys and each of these
Expand Down Expand Up @@ -542,40 +545,148 @@ def get_all_records(
returned dictionaries will contain all headers even if not included in this list

"""
idx = head - 1
return self.get_records(**kwargs)

data = self.get_all_values(value_render_option=value_render_option)
def _validate_rows_ranges_for_get_records(
self,
head,
first_row,
last_row,
):
"""Validates the given head, first_row and last_row for `get_records`"""
if first_row is None:
first_row = head + 1
elif first_row <= head:
raise ValueError("first_row must be greater than the head row")
elif first_row > self.row_count:
raise ValueError(
"first_row must be less than or equal to the number of rows in the worksheet"
)

# Return an empty list if the sheet doesn't have enough rows
if len(data) <= idx:
return []
if last_row is None:
last_row = self.row_count
elif last_row < first_row:
raise ValueError("last_row must be greater than or equal to first_row")
elif last_row > self.row_count:
raise ValueError(
"last_row must be an integer less than or equal to the number of rows in the worksheet"
)

keys = data[idx]
return head, first_row, last_row

# if no given expected headers, expect all of them
def _validate_headers_and_keys_for_get_records(self, keys, expected_headers):
"""Validates the returned keys and the given expected headers for `get_records`"""
if expected_headers is None:
expected_headers = keys
else:
expected_headers_are_unique = len(expected_headers) == len(
set(expected_headers)
)
if not expected_headers_are_unique:
raise GSpreadException("the given 'expected_headers' are not uniques")

# keys must:
# - be uniques
# - be part of the complete header list
# - not contain extra headers
expected = set(expected_headers)
headers = set(keys)

# make sure they are uniques
if len(expected) != len(expected_headers):
raise GSpreadException("the given 'expected_headers' are not uniques")
# validating the headers in the worksheet
header_row_is_unique = len(keys) == len(set(keys))
if not header_row_is_unique:
raise GSpreadException("the header row in the worksheet is not unique")

if not expected & headers == expected:
# validating that the expected headers are part of the headers in the worksheet
if not all(header in keys for header in expected_headers):
raise GSpreadException(
"the given 'expected_headers' contains unknown headers: {}".format(
expected - headers
set(expected_headers) - set(keys)
)
)

def _pad_values_and_keys_for_get_records(self, values, keys, default_blank):
"""Pads the given values and keys for `get_records` if needed"""
values_len = len(values[0])
keys_len = len(keys)
values_wider_than_keys_by = values_len - keys_len
default_blank_in_keys = default_blank in keys

if ((values_wider_than_keys_by > 0) and default_blank_in_keys) or (
values_wider_than_keys_by > 1
):
raise GSpreadException(
"the header row in the worksheet contains multiple empty cells"
)
elif values_wider_than_keys_by == 1:
keys.append(default_blank)
elif values_wider_than_keys_by < 0:
values = [
row + [default_blank] * (-values_wider_than_keys_by) for row in values
]

return values, keys

def get_records(
self,
empty2zero=False,
head=1,
first_row=None,
last_row=None,
default_blank="",
allow_underscores_in_numeric_literals=False,
numericise_ignore=[],
value_render_option=None,
expected_headers=None,
):
"""Returns a list of dictionaries, all of them having the contents of
the spreadsheet range selected with the head row as keys and each of these
dictionaries holding the contents of subsequent selected rows of cells as
values.

Cell values are numericised (strings that can be read as ints or floats
are converted), unless specified in numericise_ignore

:param bool empty2zero: (optional) Determines whether empty cells are
converted to zeros.
:param int head: (optional) Determines which row to use as keys,
starting from 1 following the numeration of the spreadsheet.
:param int first_row: (optional) row to start reading data from (inclusive) (1-based).
:param int last_row: (optional) row to stop reading at (inclusive) (1-based).
:param str default_blank: (optional) Determines which value to use for
blank cells, defaults to empty string.
:param bool allow_underscores_in_numeric_literals: (optional) Allow
underscores in numeric literals, as introduced in PEP 515
:param list numericise_ignore: (optional) List of ints of indices of
the columns (starting at 1) to ignore numericising, special use
of ['all'] to ignore numericising on all columns.
:param value_render_option: (optional) Determines how values should
be rendered in the output. See `ValueRenderOption`_ in
the Sheets API.
:type value_render_option: :namedtuple:`~gspread.utils.ValueRenderOption`

:param list expected_headers: (optional) List of expected headers, they must be unique.

.. note::

returned dictionaries will contain all headers even if not included in this list

"""
# some sanity checks
head, first_row, last_row = self._validate_rows_ranges_for_get_records(
head, first_row, last_row
)

keys = self.get_values(
f"{head}:{head}", value_render_option=value_render_option
)[0]

self._validate_headers_and_keys_for_get_records(keys, expected_headers)

values = self.get_values(
f"{first_row}:{last_row}",
value_render_option=value_render_option,
)

values, keys = self._pad_values_and_keys_for_get_records(
values, keys, default_blank
)

if numericise_ignore == ["all"]:
values = data[idx + 1 :]
pass
else:
values = [
numericise_all(
Expand All @@ -585,10 +696,12 @@ def get_all_records(
allow_underscores_in_numeric_literals,
numericise_ignore,
)
for row in data[idx + 1 :]
for row in values
]

return [dict(zip(keys, row)) for row in values]
formatted_records = [dict(zip(keys, row)) for row in values]

return formatted_records

def get_all_cells(self):
"""Returns a list of all `Cell` of the current sheet."""
Expand Down
Loading