Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature -- adding worksheet.get_records to get specific row ranges #1301

Merged
merged 29 commits into from
Sep 28, 2023
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
9b7c207
adding skeleton for method
AndrewBasem1 Sep 14, 2023
5d5dd0c
renaming args, and adding some sanity checks
AndrewBasem1 Sep 15, 2023
f6f0658
adding main logic
AndrewBasem1 Sep 15, 2023
bd06c33
fixing some minor issues
AndrewBasem1 Sep 15, 2023
a356c91
updated doctsring, added tests, and fixed issues
AndrewBasem1 Sep 21, 2023
96b74a2
rename some tests
AndrewBasem1 Sep 21, 2023
307fa22
adding cassettes
AndrewBasem1 Sep 21, 2023
5c4ca7b
Merge branch 'master' into feature/get_records_limit
AndrewBasem1 Sep 21, 2023
99c8fa1
removing isinstance checks
AndrewBasem1 Sep 24, 2023
4565154
removing unneeded variable
AndrewBasem1 Sep 24, 2023
14ed342
making rows args mandatory
AndrewBasem1 Sep 24, 2023
27eb373
improving padding for get_records
AndrewBasem1 Sep 24, 2023
301666e
renaming method to `get_records`
AndrewBasem1 Sep 24, 2023
0f668f5
fixing broken test functions
AndrewBasem1 Sep 24, 2023
652c497
Revert "making rows args mandatory"
AndrewBasem1 Sep 24, 2023
12a2794
adding kwargs in get_all_records
AndrewBasem1 Sep 24, 2023
415861f
Revert "adding cassettes"
AndrewBasem1 Sep 24, 2023
a574f6b
adding needed cassettes only
AndrewBasem1 Sep 24, 2023
fe43232
fix cassettes
alifeee Sep 25, 2023
63d57c0
moving validations inside the method
AndrewBasem1 Sep 25, 2023
29955c1
moving padding inside
AndrewBasem1 Sep 25, 2023
e31ed99
adding test for fill_gaps with defined value
AndrewBasem1 Sep 25, 2023
aea25a8
adding a default value in fill gaps
AndrewBasem1 Sep 25, 2023
c690971
using fill gaps in method
AndrewBasem1 Sep 25, 2023
0a9aa8c
Merge remote-tracking branch 'upstream/feature/get_records_limit' int…
AndrewBasem1 Sep 25, 2023
342f35f
aligining test_cases with comments, and adding new one
AndrewBasem1 Sep 25, 2023
81c1d4a
ignoring function complexity checker
AndrewBasem1 Sep 25, 2023
b0ea1be
renaming args, and adding examples in docstring
AndrewBasem1 Sep 26, 2023
4f39bef
adding new cassette
AndrewBasem1 Sep 27, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions gspread/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,12 +533,12 @@ def wid_to_gid(wid):
return str(int(widval, 36) ^ xorval)


def rightpad(row, max_len):
def rightpad(row, max_len, padding_value=""):
pad_len = max_len - len(row)
return row + ([""] * pad_len) if pad_len != 0 else row
return row + ([padding_value] * pad_len) if pad_len != 0 else row


def fill_gaps(L, rows=None, cols=None):
def fill_gaps(L, rows=None, cols=None, padding_value=""):
"""Fill gaps in a list of lists.
e.g.,::

Expand All @@ -554,10 +554,12 @@ def fill_gaps(L, rows=None, cols=None):
:param L: List of lists to fill gaps in.
:param rows: Number of rows to fill.
:param cols: Number of columns to fill.
:param padding_value: Default value to fill gaps with.

:type L: list[list[T]]
:type rows: int
:type cols: int
:type padding_value: T

:return: List of lists with gaps filled.
:rtype: list[list[T]]:
Expand All @@ -571,7 +573,7 @@ def fill_gaps(L, rows=None, cols=None):
if pad_rows:
L = L + ([[]] * pad_rows)

return [rightpad(row, max_cols) for row in L]
return [rightpad(row, max_cols, padding_value=padding_value) for row in L]
except ValueError:
return []

Expand Down
166 changes: 142 additions & 24 deletions gspread/worksheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,15 +501,18 @@ def get_all_values(self, **kwargs):
"""
return self.get_values(**kwargs)

def get_all_records(
self,
@accepted_kwargs(
empty2zero=False,
head=1,
default_blank="",
allow_underscores_in_numeric_literals=False,
numericise_ignore=[],
value_render_option=None,
expected_headers=None,
)
def get_all_records(
self,
**kwargs,
):
"""Returns a list of dictionaries, all of them having the contents of
the spreadsheet with the head row as keys and each of these
Expand Down Expand Up @@ -542,40 +545,153 @@ def get_all_records(
returned dictionaries will contain all headers even if not included in this list

"""
idx = head - 1
return self.get_records(**kwargs)

data = self.get_all_values(value_render_option=value_render_option)
def get_records( # noqa: C901 # this comment disables the complexity check for this function
self,
empty2zero=False,
head=1,
use_index=0,
first_index=None,
last_index=None,
default_blank="",
allow_underscores_in_numeric_literals=False,
numericise_ignore=[],
value_render_option=None,
expected_headers=None,
):
"""Returns a list of dictionaries, all of them having the contents of
the spreadsheet range selected with the head row/col as keys and each of these
dictionaries holding the contents of subsequent selected rows/cols of cells as
values.

# Return an empty list if the sheet doesn't have enough rows
if len(data) <= idx:
return []
Cell values are numericised (strings that can be read as ints or floats
are converted), unless specified in numericise_ignore

keys = data[idx]
Can be used to read data from rows (use_index=0) or columns (use_index=1) (default is 0),
check the examples below for more details.

:param bool empty2zero: (optional) Determines whether empty cells are
converted to zeros.
:param int head: (optional) Determines which index to use as keys,
starting from 1 following the numeration of the spreadsheet.
:param int use_index: (optional) Determines whether to read records and headers from rows or columns.
0 for rows, 1 for columns.
:param int first_index: (optional) row/col (depends on `use_index`) to start reading data from (inclusive) (1-based).
:param int last_index: (optional) row/col (depends on `use_index`) to stop reading at (inclusive) (1-based).
:param str default_blank: (optional) Determines which value to use for
blank cells, defaults to empty string.
:param bool allow_underscores_in_numeric_literals: (optional) Allow
underscores in numeric literals, as introduced in PEP 515
:param list numericise_ignore: (optional) List of ints of indices of
the columns (starting at 1) to ignore numericising, special use
of ['all'] to ignore numericising on all columns.
:param value_render_option: (optional) Determines how values should
be rendered in the output. See `ValueRenderOption`_ in
the Sheets API.
:type value_render_option: :namedtuple:`~gspread.utils.ValueRenderOption`

:param list expected_headers: (optional) List of expected headers, they must be unique.

.. note::

returned dictionaries will contain all headers even if not included in this list

Examples::

# Sheet data:
# A B C
#
# 1 A1 B2 C3
# 2 A6 B7 C8
# 3 A11 B12 C13

# Read all rows from the sheet
>>> worksheet.get_records(use_index=0)
{
{"A1": "A6", "B2": "B7", "C3": "C8"},
{"A1": "A11", "B2": "B12", "C3": "C13"}
}

>>> worksheet.get_records(use_index=1)
{
{"A1": "B2", "A6": "B7", "A11": "B12"},
{"A1": "C3", "A6": "C8", "A11": "C13"}
}
"""
# some sanity checks
if use_index not in [0, 1]:
raise ValueError("use_index must be either 0 or 1")
if use_index == 1: # TODO: implement use_index=1
raise NotImplementedError("use_index=1 is not implemented yet")

if first_index is None:
first_index = head + 1
elif first_index <= head:
raise ValueError("first_index must be greater than the head row")
elif first_index > self.row_count:
raise ValueError(
"first_index must be less than or equal to the number of rows in the worksheet"
)

if last_index is None:
last_index = self.row_count
elif last_index < first_index:
raise ValueError("last_index must be greater than or equal to first_index")
elif last_index > self.row_count:
raise ValueError(
"last_index must be an integer less than or equal to the number of rows in the worksheet"
)

keys = self.get_values(
f"{head}:{head}", value_render_option=value_render_option
)[0]

# if no given expected headers, expect all of them
if expected_headers is None:
expected_headers = keys
else:
expected_headers_are_unique = len(expected_headers) == len(
set(expected_headers)
)
if not expected_headers_are_unique:
raise GSpreadException("the given 'expected_headers' are not uniques")

# keys must:
# - be uniques
# - be part of the complete header list
# - not contain extra headers
expected = set(expected_headers)
headers = set(keys)

# make sure they are uniques
if len(expected) != len(expected_headers):
raise GSpreadException("the given 'expected_headers' are not uniques")
# validating the headers in the worksheet
header_row_is_unique = len(keys) == len(set(keys))
if not header_row_is_unique:
raise GSpreadException("the header row in the worksheet is not unique")

if not expected & headers == expected:
# validating that the expected headers are part of the headers in the worksheet
if not all(header in keys for header in expected_headers):
raise GSpreadException(
"the given 'expected_headers' contains unknown headers: {}".format(
expected - headers
set(expected_headers) - set(keys)
)
)

values = self.get_values(
f"{first_index}:{last_index}",
value_render_option=value_render_option,
)

values_len = len(values[0])
keys_len = len(keys)
values_wider_than_keys_by = values_len - keys_len
default_blank_in_keys = default_blank in keys

if ((values_wider_than_keys_by > 0) and default_blank_in_keys) or (
values_wider_than_keys_by > 1
):
raise GSpreadException(
"the header row in the worksheet contains multiple empty cells"
)
elif values_wider_than_keys_by == 1:
keys.append(default_blank)
elif values_wider_than_keys_by < 0:
values = fill_gaps(values, cols=keys_len, padding_value=default_blank)

if numericise_ignore == ["all"]:
values = data[idx + 1 :]
pass
else:
values = [
numericise_all(
Expand All @@ -585,10 +701,12 @@ def get_all_records(
allow_underscores_in_numeric_literals,
numericise_ignore,
)
for row in data[idx + 1 :]
for row in values
]

return [dict(zip(keys, row)) for row in values]
formatted_records = [dict(zip(keys, row)) for row in values]

return formatted_records

def get_all_cells(self):
"""Returns a list of all `Cell` of the current sheet."""
Expand Down
Loading