Skip to content

Daily refresh

Daily refresh #598

Workflow file for this run

name: Daily refresh
on:
schedule:
- cron: "10 15 * * *"
push:
branches:
- daily
workflow_dispatch:
inputs:
short_circuit:
type: boolean
description: "Short-circuit unchanged stages"
default: true
refresh_data:
type: boolean
description: "Refresh data (from NJSP)"
default: true
update_pqts:
type: boolean
description: "Update parquets (in this repo)"
default: true
update_pqts_sha:
description: "\"Update NJSP data\" commit hash (for posting to Slack and rebuilding www)"
update_plots:
type: boolean
description: "Update plots"
default: true
post_to_slack:
type: boolean
description: "Post to Slack"
default: true
build_www:
type: boolean
description: "Build www"
default: true
slack_channel_id:
description: "Slack channel override"
jobs:
refresh_data:
name: Refresh NJSP data, dispatch www rebuild (if necessary)
runs-on: ubuntu-latest
steps:
- name: Compute step that need to run
id: compute_steps
run: |
echo "needs_python=${{ github.event.schedule || inputs.refresh_data || inputs.update_pqts || inputs.update_pqts_sha || inputs.update_plots || inputs.post_to_slack || '' }}" >> $GITHUB_OUTPUT
- uses: actions/checkout@v4
if: steps.compute_steps.outputs.needs_python
with:
ref: ${{ github.ref_name }}
- uses: webfactory/[email protected]
if: steps.compute_steps.outputs.needs_python
with:
ssh-private-key: ${{ secrets.GHA_DEPLOY_KEY }}
- uses: actions/setup-python@v4
if: steps.compute_steps.outputs.needs_python
with:
python-version: 3.9
cache: pip
- run: pip install -e .
if: steps.compute_steps.outputs.needs_python
- name: Configure Git author
if: steps.compute_steps.outputs.needs_python
run: |
git config --global user.name 'GitHub Actions'
git config --global user.email '[email protected]'
- name: Refresh data
id: refresh_data
if: github.event.schedule || inputs.refresh_data
run: njsp -cc refresh_data
- name: Update parquets
id: update_pqts
if: (github.event.schedule || inputs.update_pqts) && (steps.refresh_data.outputs.sha || !inputs.short_circuit)
run: njsp -cc update_pqts
- name: AWS sync, update parquets SHA
id: update_pqts_sha
if: inputs.update_pqts_sha || steps.update_pqts.outputs.sha
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_DEFAULT_REGION: us-east-1
run: |
echo "sha=${{ inputs.update_pqts_sha || steps.update_pqts.outputs.sha }}" >> $GITHUB_OUTPUT
aws s3 sync --exclude '*' --include 'nj_crashes.db' ./ s3://nj-crashes/
aws s3api put-object-acl --bucket nj-crashes --key nj_crashes.db --acl public-read
aws s3 cp data/crashes.pqt s3://nj-crashes/data/crashes.parquet
aws s3api put-object-acl --bucket nj-crashes --key data/crashes.parquet --acl public-read
- name: Compute plot_data.changed
id: plot_data
if: (github.event.schedule || inputs.update_plots) && (steps.update_pqts_sha.outputs.sha || !inputs.short_circuit)
run: echo "changed=1" >> $GITHUB_OUTPUT
- name: Refresh annual summaries
if: steps.plot_data.outputs.changed
run: njsp -cc refresh_summaries
- name: "Fetch ≈1yr of history"
if: steps.plot_data.outputs.changed
run: |
year=$(date +%Y)
let prv_year=year-1
since="$(date --date="$(date +%Y-%m-%d) -375 day" +%Y-%m-%d)"
echo "Fetching commits since $since"
git fetch --shallow-since "$since" origin ${{ github.ref_name }}
echo "Fetched $(git rev-list --count) revisions"
- name: Update YTD / ROY projections
if: steps.plot_data.outputs.changed
run: njsp -cc update_projections
- name: Update plot data
id: update_plots
if: steps.plot_data.outputs.changed
run: njsp -cc update_plots
- name: Post to Slack
id: post_to_slack
if: (github.event.schedule || inputs.post_to_slack) && (steps.update_pqts_sha.outputs.sha || !inputs.short_circuit)
run: njsp slack sync -m500 -c ${{ steps.update_pqts_sha.outputs.sha }}
env:
GH_TOKEN: ${{ secrets.GH_TOKEN }}
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
SLACK_CHANNEL_ID: ${{ inputs.slack_channel_id || secrets.SLACK_CHANNEL_ID }}
- name: Decide whether to rebuild www
id: build_www
if: (github.event.schedule || inputs.build_www) && (steps.update_plots.outputs.sha || !inputs.short_circuit)
run: |
echo "run=1" >> $GITHUB_OUTPUT
echo "sha=$(git log -1 --format=%h)" >> $GITHUB_OUTPUT
outputs:
update_pqts: ${{ steps.update_pqts.outputs.sha }}
build_www: ${{ steps.build_www.outputs.run }}
sha: ${{ steps.build_www.outputs.sha }}
rebuild_www:
name: Rebuild www
needs: refresh_data
if: needs.refresh_data.outputs.build_www
uses: ./.github/workflows/www.yml
with:
ref: ${{ needs.refresh_data.outputs.sha }}