Example Scripts: Read JSON and CSV data with Polars and databpy #3

kolibril13 · 2024-12-20T10:19:40Z

For reference, here are two scripts to load data into Blender spreadsheets using polars.
My plan is to incorporate them into https://extensions.blender.org/add-ons/csv-importer/ next month, together with databpy.

Read JSON

import polars as pl
import databpy as db
from io import StringIO
import numpy as np


# Example JSON data
json_file = StringIO(
    """
    {
    "Star": [
        [58.2136, 91.8819, 0.0],
        [58.1961, 92.215, 0.0]
    ],
    "Is_Visible": [[true], [false]],
    "Intensity": [[10], [20]]
    }
"""
)

# here's how you'd load a custom json file
# import pathlib as Pathlib 
# json_file = Pathlib.cwd() / "data.json"

df = pl.read_json(json_file)
columns_to_explode = [col for col in df.columns if df[col].dtype == pl.List(pl.List)]
df = df.explode(columns_to_explode)

vertices = np.zeros((len(df), 3), dtype=np.float32)
bob = db.create_bob(vertices, name="Hello JSON")

for col in df.columns:
    data = np.vstack(df.get_column(col).to_numpy())
    bob.store_named_attribute(data, col)

print(bob.named_attribute("Star"))
print(bob.named_attribute("Is_Visible"))
print(bob.named_attribute("Intensity"))

Read CSV

import polars as pl
import databpy as db
from io import StringIO
import numpy as np

csv_data = StringIO(
    """MyFloat,Is_Visible,Intensity
42.12,true,10
12.33,false,20
"""
)

# here's how you'd load a custom csv file
# import pathlib as Pathlib
# json_file = Pathlib.cwd() / "data.csv"


df = pl.read_csv(csv_data)

# Since we no longer have nested arrays as in read_json, there's no need to explode columns
vertices = np.zeros((len(df), 3), dtype=np.float32)
bob = db.create_bob(vertices, name="Hello CSV")

# Store each column as an attribute
# Note: .to_numpy() returns a 1D array, so we reshape to 2D if needed.
for col in df.columns:
    data = df[col].to_numpy().reshape(-1, 1)
    bob.store_named_attribute(data, col)

# Print the stored attributes
print(bob.named_attribute("MyFloat"))
print(bob.named_attribute("Is_Visible"))
print(bob.named_attribute("Intensity"))

BradyAJohnston · 2024-12-20T10:25:00Z

Thanks for the reference! If you've like to contribute any documentation / examples for the documentation I would welcome it. If you'd like to implement the polars stuff inside databpy I would welcome PRs!

kolibril13 · 2024-12-20T10:39:19Z

If you've like to contribute any documentation / examples for the documentation

Sure, I might be able to contribute that early next month.
I've already made some attempts to document data_attributes here:
https://kolibril13.github.io/bpy-gallery/n3data_attributes/#adding-a-float
it still needs some refactoring, but maybe I can make a databpy version of this chapter.

If you'd like to implement the polars stuff inside databpy I would welcome PRs

That's good to hear! I'll see if I find time for this next month as well.

kolibril13 · 2024-12-20T11:09:02Z

While I'm on it, here are snippets for the data formats parquet and xlsx (latter one is from Excel)

Read Parquet

import polars as pl

data_polars = pl.DataFrame({
    "MyFloat": [42.12, 12.33],
    "Is_Visible": [True, False],
    "Intensity": [10, 20]
})

data_polars.write_parquet("simple_data.parquet")

import polars as pl
import databpy as db
import numpy as np

df = pl.read_parquet("simple_data.parquet")

vertices = np.zeros((len(df), 3), dtype=np.float32)
bob = db.create_bob(vertices, name="Hello Parquet")

for col in df.columns:
    data = df[col].to_numpy().reshape(-1, 1)
    bob.store_named_attribute(data, col)

Read Excel file

import polars as pl

data_polars = pl.DataFrame({
    "MyFloat": [42.12, 12.33],
    "Is_Visible": [True, False],
    "Intensity": [10, 20]
})

# Save the DataFrame to an Excel file
# pip install xlsxwriter
data_polars.write_excel("simple_excel_polars.xlsx")

import polars as pl
import databpy as db
import numpy as np

# Read the Excel file using Polars
# pip install fastexcel pyarrow 
df = pl.read_excel("simple_excel_polars.xlsx", sheet_name="Sheet1")

vertices = np.zeros((len(df), 3), dtype=np.float32)
bob = db.create_bob(vertices, name="Hello Excel")

for col in df.columns:
    data = df[col].to_numpy().reshape(-1, 1)
    bob.store_named_attribute(data, col)

kolibril13 · 2025-01-04T21:25:20Z

next iteration that intentionally skips columns with string data

Read CSV refined

import polars as pl
import databpy as db
import numpy as np
from io import StringIO
import time

start_time = time.perf_counter()
 
csv_file = StringIO(
    """
Is_Visible,Intensity,My Strings
True,10,A
False,20,B
"""
)

df = pl.read_csv(csv_file)
# Create Bob object
vertices = np.zeros((len(df), 3), dtype=np.float32)
bob = db.create_bob(vertices, name="CSV_MyBob")

for col in df.columns:
    col_dtype = df[col].dtype
    print(col_dtype)
    
    # Skip columns with string data types
    # pl.Utf8 is for CSV objects
    # pl.List(pl.Utf8) is for JSON objects (string arrays in each cell)
    if col_dtype in [pl.Utf8, pl.List(pl.Utf8)]:
        print(f"Skipping column '{col}' as it contains string data.")
        continue

    data = df[col].to_numpy()
    if df[col].dtype == pl.List: # Handle nested lists if necessary
        data = np.vstack(data)
    bob.store_named_attribute(data, col)

elapsed_time_ms = (time.perf_counter() - start_time) * 1000

print(f" 🐻‍❄️ 📥  Added {bob.name} in {elapsed_time_ms:.2f} ms")

Read JSON refined:

import polars as pl
import databpy as db
import numpy as np
from io import StringIO
import time

# Updated JSON data
json_file = StringIO(
"""
{
"Dino": [
    [55.3846, 97.1795, 0.0],
    [51.5385, 96.0256, 0.0]
],
"Star": [
    [58.2136, 91.8819, 0.0],
    [58.1961, 92.215, 0.0]
],
"Is_Visible": [
    [true],
    [false]
],
"Intensity": [
    [10],
    [20]
],
"My Strings": [
    ["A"],
    ["B"]
]
}
"""
)

start_time = time.perf_counter()
 
df = pl.read_json(json_file)

columns_to_explode = [
    col for col in df.columns if df[col].dtype == pl.List(pl.List)
]
df = df.explode(columns_to_explode)

# Create Bob object
vertices = np.zeros((len(df), 3), dtype=np.float32)
bob = db.create_bob(vertices, name="JSON_MyBob")


for col in df.columns:
    col_dtype = df[col].dtype
    print(col_dtype)
    
    # Skip columns with string data types
    # pl.Utf8 is for CSV objects
    # pl.List(pl.Utf8) is for JSON objects (string arrays in each cell)
    if col_dtype in [pl.Utf8, pl.List(pl.Utf8)]:
        print(f"Skipping column '{col}' as it contains string data.")
        continue

    data = df[col].to_numpy()
    if df[col].dtype == pl.List: # Handle nested lists if necessary
        data = np.vstack(data)
        
    bob.store_named_attribute(data, col)



elapsed_time_ms = (time.perf_counter() - start_time) * 1000

print(f" 🐻‍❄️ 📥  Added {bob.name} in {elapsed_time_ms:.2f} ms")

kolibril13 · 2025-01-13T14:41:50Z

Read polars dataframe

import polars as pl
import numpy as np
import databpy as db
df = pl.DataFrame({
    "Star": [
        [58.2, 91.8, 0.0],
        [58.1, 92.2, 0.0]
    ],
    "Is_Visible": [True, False],
    "Intensity": [10, 20],
})

vertices = np.zeros((len(df), 3), dtype=np.float32)
bob = db.create_bob(vertices, name="DataWithVector")

for col in df.columns:
    data = np.vstack(df[col].to_numpy())
    bob.store_named_attribute(data, col)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Example Scripts: Read JSON and CSV data with Polars and databpy #3

Example Scripts: Read JSON and CSV data with Polars and databpy #3

kolibril13 commented Dec 20, 2024

BradyAJohnston commented Dec 20, 2024

kolibril13 commented Dec 20, 2024

kolibril13 commented Dec 20, 2024

kolibril13 commented Jan 4, 2025

kolibril13 commented Jan 13, 2025

Example Scripts: Read JSON and CSV data with Polars and databpy #3

Example Scripts: Read JSON and CSV data with Polars and databpy #3

Comments

kolibril13 commented Dec 20, 2024

Read JSON

Read CSV

BradyAJohnston commented Dec 20, 2024

kolibril13 commented Dec 20, 2024

kolibril13 commented Dec 20, 2024

Read Parquet

Read Excel file

kolibril13 commented Jan 4, 2025

Read CSV refined

Read JSON refined:

kolibril13 commented Jan 13, 2025

Read polars dataframe