Skip to content

Commit

Permalink
Merge pull request #326 from openfisca/fix_survey_get_values
Browse files Browse the repository at this point in the history
fix get_values
  • Loading branch information
clallemand authored Feb 5, 2025
2 parents 2c8a14a + affb9dd commit aa36366
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 6 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Changelog

### 3.0.4 [#326](https://github.com/openfisca/openfisca-survey-manager/pull/326)

* Technical changes
- Fix get_values of Survey class.

### 3.0.3 [#329](https://github.com/openfisca/openfisca-survey-manager/pull/329)

* Technical changes
Expand Down
11 changes: 6 additions & 5 deletions openfisca_survey_manager/surveys.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ def get_values(self, variables = None, table = None, lowercase = False, ignoreca
if table is None:
raise Exception("A table name is needed to retrieve data from a parquet file")
for table_name, table_content in self.tables.items():
if table in table_name:
if table == table_name:
parquet_file = table_content.get("parquet_file")
# Is parquet_file a folder or a file?
if os.path.isdir(parquet_file):
Expand All @@ -246,9 +246,10 @@ def get_values(self, variables = None, table = None, lowercase = False, ignoreca
one_parquet_file = parquet_file
parquet_schema = pq.read_schema(one_parquet_file)
assert len(parquet_schema.names) >= 1, f"The parquet file {table_content.get('parquet_file')} is empty"
columns = table_content.get('variables')
if variables is None:
variables = table_content.get('variables')
if filter_by:
df = pq.ParquetDataset(parquet_file, filters=filter_by).read(columns=columns).to_pandas()
df = pq.ParquetDataset(parquet_file, filters=filter_by).read(columns=variables).to_pandas()
elif batch_size:
if os.path.isdir(parquet_file):
parquet_file = glob.glob(os.path.join(parquet_file, '*.parquet'))
Expand All @@ -258,7 +259,7 @@ def get_values(self, variables = None, table = None, lowercase = False, ignoreca
tables = []
# Loop through the file paths and read each Parquet file
for file_path in parquet_file:
table = pq.read_table(file_path, columns=columns)
table = pq.read_table(file_path, columns=variables)
tables.append(table)

# Concatenate the tables if needed
Expand All @@ -283,7 +284,7 @@ def get_values(self, variables = None, table = None, lowercase = False, ignoreca
# break
# index += 1
else:
df = pq.ParquetDataset(parquet_file).read(columns=columns).to_pandas()
df = pq.ParquetDataset(parquet_file).read(columns=variables).to_pandas()
break
else:
raise Exception(f"No table {table} found in {self.parquet_file_path}")
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "OpenFisca-Survey-Manager"
version = "3.0.3"
version = "3.0.4"
description = "A tool for managing survey/administrative data and import them in OpenFisca"
readme = "README.md"
keywords = ["microsimulation", "tax", "benefit", "rac", "rules-as-code", "survey", "data"]
Expand Down

0 comments on commit aa36366

Please sign in to comment.