Skip to content

Commit

Permalink
Merge pull request #58 from kurtmckee/compression
Browse files Browse the repository at this point in the history
Compress all database content using the LZMA algorithm
  • Loading branch information
kurtmckee authored Feb 18, 2025
2 parents c2a97fc + c9f9d64 commit 05de623
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 92 deletions.
50 changes: 7 additions & 43 deletions assets/test-performance.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
$env:PYTHONPROFILEIMPORTTIME=$null
$env:PYTHONDONTWRITEBYTECODE=1

Remove-Item -Recurse "build/perftest"
Remove-Item -Recurse "build\perftest"
Remove-Item perf.*
python assets/generate-perftest-directory.py

Expand All @@ -18,7 +18,7 @@ $env:PYTHONPROFILEIMPORTTIME=1
Write-Host
$env:FILE_PREFIX="perf.filesystem.source"
Write-Host "${env:FILE_PREFIX}"
$env:PYTHONPATH="build/perftest"
$env:PYTHONPATH="build\perftest"
Measure-Command {
python -c 'import a; print(a)' 2>"${env:FILE_PREFIX}.import.log" | Write-Host
} > "${env:FILE_PREFIX}.time.log"
Expand All @@ -28,25 +28,7 @@ Measure-Command {
# ------------------

Write-Host
$env:FILE_PREFIX="perf.zip.source.storeonly"
Write-Host "${env:FILE_PREFIX}"
$env:PYTHONPATH="${env:FILE_PREFIX}.zip"
Compress-Archive -CompressionLevel NoCompression -Path "build\perftest\*" -DestinationPath "${env:PYTHONPATH}"
Measure-Command {
python -c 'import a; print(a)' 2>"${env:FILE_PREFIX}.import.log" | Write-Host
} > "${env:FILE_PREFIX}.time.log"

Write-Host
$env:FILE_PREFIX="perf.zip.source.fast"
Write-Host "${env:FILE_PREFIX}"
$env:PYTHONPATH="${env:FILE_PREFIX}.zip"
Compress-Archive -CompressionLevel Fastest -Path "build\perftest\*" -DestinationPath "${env:PYTHONPATH}"
Measure-Command {
python -c 'import a; print(a)' 2>"${env:FILE_PREFIX}.import.log" | Write-Host
} > "${env:FILE_PREFIX}.time.log"

Write-Host
$env:FILE_PREFIX="perf.zip.source.best"
$env:FILE_PREFIX="perf.zip.source"
Write-Host "${env:FILE_PREFIX}"
$env:PYTHONPATH="${env:FILE_PREFIX}.zip"
Compress-Archive -CompressionLevel Optimal -Path "build\perftest\*" -DestinationPath "${env:PYTHONPATH}"
Expand Down Expand Up @@ -75,7 +57,7 @@ Measure-Command {
# ------------------------------

$env:PYTHONPROFILEIMPORTTIME=$null
python -m compileall -q "build/perftest"
python -m compileall -q "build\perftest"
$env:PYTHONPROFILEIMPORTTIME=1


Expand All @@ -85,7 +67,7 @@ $env:PYTHONPROFILEIMPORTTIME=1
Write-Host
$env:FILE_PREFIX="perf.filesystem.bytecode"
Write-Host "${env:FILE_PREFIX}"
$env:PYTHONPATH="build/perftest"
$env:PYTHONPATH="build\perftest"
Measure-Command {
python -c 'import a; print(a)' 2>"${env:FILE_PREFIX}.import.log" | Write-Host
} > "${env:FILE_PREFIX}.time.log"
Expand All @@ -95,25 +77,7 @@ Measure-Command {
# ---------------

Write-Host
$env:FILE_PREFIX="perf.zip.bytecode.storeonly"
Write-Host "${env:FILE_PREFIX}"
$env:PYTHONPATH="${env:FILE_PREFIX}.zip"
Compress-Archive -CompressionLevel NoCompression -Path "build\perftest\*" -DestinationPath "${env:PYTHONPATH}"
Measure-Command {
python -c 'import a; print(a)' 2>"${env:FILE_PREFIX}.import.log" | Write-Host
} > "${env:FILE_PREFIX}.time.log"

Write-Host
$env:FILE_PREFIX="perf.zip.bytecode.fast"
Write-Host "${env:FILE_PREFIX}"
$env:PYTHONPATH="${env:FILE_PREFIX}.zip"
Compress-Archive -CompressionLevel Fastest -Path "build\perftest\*" -DestinationPath "${env:PYTHONPATH}"
Measure-Command {
python -c 'import a; print(a)' 2>"${env:FILE_PREFIX}.import.log" | Write-Host
} > "${env:FILE_PREFIX}.time.log"

Write-Host
$env:FILE_PREFIX="perf.zip.bytecode.best"
$env:FILE_PREFIX="perf.zip.bytecode"
Write-Host "${env:FILE_PREFIX}"
$env:PYTHONPATH="${env:FILE_PREFIX}.zip"
Compress-Archive -CompressionLevel Optimal -Path "build\perftest\*" -DestinationPath "${env:PYTHONPATH}"
Expand All @@ -131,7 +95,7 @@ Write-Host "${env:FILE_PREFIX}"
$env:PYTHONPATH="${env:FILE_PREFIX}.sqlite3"
$env:PYTHONPROFILEIMPORTTIME=$null
sqliteimport bundle "build\perftest" "${env:PYTHONPATH}" | Out-Null
sqliteimport compile "${PYTHONPATH}"
sqliteimport compile "${env:PYTHONPATH}"
$env:PYTHONPROFILEIMPORTTIME=1
Measure-Command {
python -c 'import sqliteimport; import a; print(a)' 2> "${env:FILE_PREFIX}.import.log" | Write-Host
Expand Down
44 changes: 2 additions & 42 deletions assets/test-performance.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,27 +28,7 @@ command time --portability --output "${FILE_PREFIX}.time.log" \
# ------------------

echo
export FILE_PREFIX="perf.zip.source.storeonly"
echo "${FILE_PREFIX}"
export PYTHONPATH="${FILE_PREFIX}.zip"
cd "build/perftest"
zip -qr0 "../../${PYTHONPATH}" .
cd "../.."
command time --portability --output "${FILE_PREFIX}.time.log" \
python -c 'import a; print(a)' 2> "${FILE_PREFIX}.import.log"

echo
export FILE_PREFIX="perf.zip.source.fast"
echo "${FILE_PREFIX}"
export PYTHONPATH="${FILE_PREFIX}.zip"
cd "build/perftest"
zip -qr1 "../../${PYTHONPATH}" .
cd "../.."
command time --portability --output "${FILE_PREFIX}.time.log" \
python -c 'import a; print(a)' 2> "${FILE_PREFIX}.import.log"

echo
export FILE_PREFIX="perf.zip.source.best"
export FILE_PREFIX="perf.zip.source"
echo "${FILE_PREFIX}"
export PYTHONPATH="${FILE_PREFIX}.zip"
cd "build/perftest"
Expand Down Expand Up @@ -91,27 +71,7 @@ command time --portability --output "${FILE_PREFIX}.time.log" \
# ---------------

echo
export FILE_PREFIX="perf.zip.bytecode.storeonly"
echo "${FILE_PREFIX}"
export PYTHONPATH="${FILE_PREFIX}.zip"
cd "build/perftest"
zip -qr0 "../../${PYTHONPATH}" .
cd "../.."
command time --portability --output "${FILE_PREFIX}.time.log" \
python -c 'import a; print(a)' 2> "${FILE_PREFIX}.import.log"

echo
export FILE_PREFIX="perf.zip.bytecode.fast"
echo "${FILE_PREFIX}"
export PYTHONPATH="${FILE_PREFIX}.zip"
cd "build/perftest"
zip -qr1 "../../${PYTHONPATH}" .
cd "../.."
command time --portability --output "${FILE_PREFIX}.time.log" \
python -c 'import a; print(a)' 2> "${FILE_PREFIX}.import.log"

echo
export FILE_PREFIX="perf.zip.bytecode.best"
export FILE_PREFIX="perf.zip.bytecode"
echo "${FILE_PREFIX}"
export PYTHONPATH="${FILE_PREFIX}.zip"
cd "build/perftest"
Expand Down
4 changes: 4 additions & 0 deletions changelog.d/20250218_070335_kurtmckee_compression.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Added
-----

* Compress all database content using the LZMA algorithm.
35 changes: 28 additions & 7 deletions src/sqliteimport/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from __future__ import annotations

import lzma
import marshal
import pathlib
import sqlite3
Expand Down Expand Up @@ -117,7 +118,7 @@ def add_file(self, directory: pathlib.Path, file: pathlib.Path) -> None:
fullname.replace("/", ".").replace("\\", "."),
str(pathlib.PurePosixPath(file)),
is_package,
contents,
compress(contents),
),
)

Expand Down Expand Up @@ -160,7 +161,7 @@ def add_bytecode(
fullname,
path,
is_package,
code,
compress(code),
),
)

Expand Down Expand Up @@ -193,13 +194,14 @@ def find_spec(self, fullname: str) -> tuple[bytes | types.CodeType, bool] | None
).fetchone()
if result is None:
return None
code, is_package = result
code = decompress(code)

# Source code
if self.find_spec_table == "code":
return result
return code, is_package

# Byte code
code, is_package = result
return marshal.loads(code, allow_code=True), is_package

def get_file(self, path_like: str) -> bytes:
Expand All @@ -212,7 +214,7 @@ def get_file(self, path_like: str) -> bytes:
""",
(path_like,),
).fetchone()[0]
return contents
return decompress(contents)

def list_directory(self, path_like: str) -> list[str]:
"""List the contents of a directory."""
Expand Down Expand Up @@ -256,7 +258,7 @@ def list_directory(self, path_like: str) -> list[str]:
parsed_results.append(result[0])
return parsed_results

def iter_source_code(self) -> typing.Generator[tuple[str, str, bool, str]]:
def iter_source_code(self) -> typing.Generator[tuple[str, str, bool, bytes]]:
cursor = self.connection.cursor()
iterable = cursor.execute(
"""
Expand All @@ -270,4 +272,23 @@ def iter_source_code(self) -> typing.Generator[tuple[str, str, bool, str]]:
;
"""
)
yield from (row for row in iterable)
row: tuple[str, str, bool, bytes]
for row in iterable:
fullname, path, is_package, contents = row
yield fullname, path, is_package, decompress(contents)


def compress(data: bytes) -> bytes:
return lzma.compress(
data,
format=lzma.FORMAT_RAW,
filters=[{"id": lzma.FILTER_LZMA2, "preset": 0}],
)


def decompress(data: bytes) -> bytes:
return lzma.decompress(
data,
format=lzma.FORMAT_RAW,
filters=[{"id": lzma.FILTER_LZMA2, "preset": 0}],
)

0 comments on commit 05de623

Please sign in to comment.