Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add precipitation histogram to prognostic run report #1271

Merged
merged 20 commits into from
Jun 23, 2021
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion external/report/report/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
from .create_report import create_html, insert_report_figure, Metrics, Metadata, Link
from .create_report import (
create_html,
insert_report_figure,
Metrics,
Metadata,
Link,
OrderedList,
)

__version__ = "0.1.0"
11 changes: 10 additions & 1 deletion external/report/report/create_report.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import datetime
import os
from typing import Mapping, Sequence, Union
from typing import Any, Mapping, Sequence, Union

from jinja2 import Template
from pytz import timezone
Expand Down Expand Up @@ -90,6 +90,15 @@ def __repr__(self) -> str:
return f'<a href="{self.url}">{self.tag}</a>'


class OrderedList:
def __init__(self, *items: Any):
self.items = items

def __repr__(self) -> str:
items_li = [f"<li>{item}</li>" for item in self.items]
return "<ol>\n" + "\n".join(items_li) + "\n</ol>"


def resolve_plot(obj):
if isinstance(obj, str):
return ImagePlot(obj)
Expand Down
7 changes: 6 additions & 1 deletion external/report/tests/test_report.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os
from report import __version__, create_html
from report import __version__, create_html, OrderedList
from report.create_report import _save_figure, insert_report_figure


Expand Down Expand Up @@ -46,3 +46,8 @@ def test__save_figure(tmpdir):
with open(os.path.join(output_dir, filepath_relative_to_report), "r") as f:
saved_data = f.read()
assert saved_data.replace("\n", "") == fig.content


def test_OrderedList_repr():
result = str(OrderedList("item1", "item2"))
assert result == "<ol>\n<li>item1</li>\n<li>item2</li>\n</ol>"
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,12 @@
grouping contains outputs from the physics routines (`sfc_dt_atmos.tile*.nc` and
`diags.zarr`).
"""
import os
import sys

import datetime
import tempfile
import intake
import numpy as np
import xarray as xr
import shutil
from dask.diagnostics import ProgressBar
import fsspec

Expand All @@ -37,6 +34,7 @@
from fv3net.diagnostics.prognostic_run import diurnal_cycle
from fv3net.diagnostics.prognostic_run import transform
from fv3net.diagnostics.prognostic_run.constants import (
HISTOGRAM_BINS,
HORIZONTAL_DIMS,
DiagArg,
GLOBAL_AVERAGE_DYCORE_VARS,
Expand Down Expand Up @@ -229,16 +227,6 @@ def _assign_diagnostic_time_attrs(
return diagnostics_ds


def dump_nc(ds: xr.Dataset, f):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We switched to using the vcm.dump_nc version of this a while ago, so this func is unused.

# to_netcdf closes file, which will delete the buffer
# need to use a buffer since seek doesn't work with GCSFS file objects
with tempfile.TemporaryDirectory() as dirname:
url = os.path.join(dirname, "tmp.nc")
ds.to_netcdf(url, engine="h5netcdf")
with open(url, "rb") as tmp1:
shutil.copyfileobj(tmp1, f)


@add_to_diags("dycore")
@diag_finalizer("rms_global")
@transform.apply("resample_time", "3H", inner_join=True)
Expand Down Expand Up @@ -503,6 +491,34 @@ def _diurnal_func(
return _assign_diagnostic_time_attrs(diag, prognostic)


@add_to_diags("physics")
@diag_finalizer("histogram")
@transform.apply("resample_time", "3H", inner_join=True)
@transform.apply("subset_variables", HISTOGRAM_BINS.keys())
def compute_histogram(prognostic, verification, grid):
logger.info("Computing histograms for physics diagnostics")
counts = xr.Dataset()
for varname in prognostic.data_vars:
# bins = HISTOGRAM_BINS[varname]
# counts[varname] = prognostic.groupby_bins(varname, bins).count()[varname]
# bin_midpoints = [x.item().mid for x in counts[f"{varname}_bins"]]
# counts = counts.assign_coords({f"{varname}_bins": bin_midpoints})
# counts[varname] /= counts[varname].sum()
# counts[varname] /= bins[1:] - bins[:1]
# counts[varname].attrs["units"] = f"({prognostic[varname].units})^-1"
# counts[varname].attrs["long_name"] = "Frequency"
# counts[f"{varname}_bins"].attrs = prognostic[varname].attrs
count, bins = np.histogram(
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I found using np.histogram much faster (and simpler code-wise) than doing an xarray groupby.

prognostic[varname], bins=HISTOGRAM_BINS[varname], density=True
)
bin_midpoints = 0.5 * (bins[:-1] + bins[1:])
coords = {f"{varname}_bins": bin_midpoints}
count_da = xr.DataArray(count, coords=coords, dims=list(coords.keys()))
count_da[f"{varname}_bins"].attrs["units"] = prognostic[varname].units
counts[varname] = count_da
return _assign_diagnostic_time_attrs(counts, prognostic)


def register_parser(subparsers):
parser = subparsers.add_parser("save", help="Compute the prognostic run diags.")
parser.add_argument("url", help="Prognostic run output location.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -274,11 +274,6 @@ def detect_folders(
bucket: str, fs: fsspec.AbstractFileSystem,
) -> Mapping[str, DiagnosticFolder]:
diag_ncs = fs.glob(os.path.join(bucket, "*", "diags.nc"))
if len(diag_ncs) < 2:
raise ValueError(
"Plots require more than 1 diagnostic directory in"
f" {bucket} for holoviews plots to display correctly."
)
return {
Path(url).parent.name: DiagnosticFolder(fs, Path(url).parent.as_posix())
for url in diag_ncs
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import numpy as np
import xarray as xr
from typing import Tuple

Expand Down Expand Up @@ -135,3 +136,5 @@
"dQu",
"dQv",
]

HISTOGRAM_BINS = {"total_precip_to_surface": np.logspace(-1, np.log10(500), 101)}
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@
}


def fig_to_b64(fig, format="png"):
def fig_to_b64(fig, format="png", dpi=None):
pic_IObytes = io.BytesIO()
fig.savefig(pic_IObytes, format=format, bbox_inches="tight")
fig.savefig(pic_IObytes, format=format, bbox_inches="tight", dpi=dpi)
pic_IObytes.seek(0)
pic_hash = base64.b64encode(pic_IObytes.read())
return f"data:image/png;base64, " + pic_hash.decode()
Expand Down Expand Up @@ -179,6 +179,27 @@ def plot_cubed_sphere_map(
)


def plot_histogram(run_diags: RunDiagnostics, varname: str) -> raw_html:
"""Plot 1D histogram of varname overlaid across runs."""

logging.info(f"plotting {varname}")
fig, ax = plt.subplots()
bin_name = varname.replace("histogram", "bins")
for run in run_diags.runs:
v = run_diags.get_variable(run, varname)
ax.step(v[bin_name], v, label=run, where="post", linewidth=1)
ax.set_xlabel(f"{v.long_name} [{v.units}]")
ax.set_ylabel(f"Frequency [({v.units})^-1]")
ax.set_xscale("log")
ax.set_yscale("log")
ax.set_xlim([v[bin_name].values[0], v[bin_name].values[-1]])
ax.legend()
fig.tight_layout()
data = fig_to_b64(fig, dpi=150)
plt.close(fig)
return raw_html(f'<img src="{data}" width="800px" />')


def _render_map_title(
metrics: RunMetrics, variable: str, run: str, metrics_for_title: Mapping[str, str],
) -> str:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,14 @@
RunMetrics,
)

from report import create_html, Link
from report import create_html, Link, OrderedList
from report.holoviews import HVPlot, get_html_header
from .matplotlib import plot_2d_matplotlib, plot_cubed_sphere_map, raw_html
from .matplotlib import (
plot_2d_matplotlib,
plot_cubed_sphere_map,
raw_html,
plot_histogram,
)

import logging

Expand Down Expand Up @@ -75,9 +80,7 @@ def make_plots(self, data) -> Iterable:
yield func(data)


def plot_1d(
run_diags: RunDiagnostics, varfilter: str, run_attr_name: str = "run",
) -> HVPlot:
def plot_1d(run_diags: RunDiagnostics, varfilter: str) -> HVPlot:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

deleted unused argument run_attr_name

"""Plot all diagnostics whose name includes varfilter. Plot is overlaid across runs.
All matching diagnostics must be 1D."""
p = hv.Cycle("Colorblind")
Expand All @@ -95,10 +98,7 @@ def plot_1d(


def plot_1d_min_max_with_region_bar(
run_diags: RunDiagnostics,
varfilter_min: str,
varfilter_max: str,
run_attr_name: str = "run",
run_diags: RunDiagnostics, varfilter_min: str, varfilter_max: str,
) -> HVPlot:
"""Plot all diagnostics whose name includes varfilter. Plot is overlaid across runs.
All matching diagnostics must be 1D."""
Expand All @@ -123,9 +123,7 @@ def plot_1d_min_max_with_region_bar(
return HVPlot(_set_opts_and_overlay(hmap))


def plot_1d_with_region_bar(
run_diags: RunDiagnostics, varfilter: str, run_attr_name: str = "run"
) -> HVPlot:
def plot_1d_with_region_bar(run_diags: RunDiagnostics, varfilter: str) -> HVPlot:
"""Plot all diagnostics whose name includes varfilter. Plot is overlaid across runs.
Region will be selectable through a drop-down bar. Region is assumed to be part of
variable name after last underscore. All matching diagnostics must be 1D."""
Expand Down Expand Up @@ -189,6 +187,7 @@ def diurnal_component_plot(
hovmoller_plot_manager = PlotManager()
zonal_pressure_plot_manager = PlotManager()
diurnal_plot_manager = PlotManager()
histogram_plot_manager = PlotManager()

# this will be passed the data from the metrics.json files
metrics_plot_manager = PlotManager()
Expand Down Expand Up @@ -291,6 +290,11 @@ def diurnal_cycle_component_plots(diagnostics: Iterable[xr.Dataset]) -> HVPlot:
return diurnal_component_plot(diagnostics)


@histogram_plot_manager.register
def histogram_plots(diagnostics: Iterable[xr.Dataset]) -> HVPlot:
return plot_histogram(diagnostics, "total_precip_to_surface_histogram")


# Routines for plotting the "metrics"
# New plotting routines can be registered here.
@metrics_plot_manager.register
Expand Down Expand Up @@ -325,20 +329,21 @@ def generic_metric_plot(metrics: RunMetrics, metric_type: str) -> hv.HoloMap:
return HVPlot(hmap.opts(**bar_opts))


navigation = [
navigation = OrderedList(
Link("Home", "index.html"),
Link("Process diagnostics", "process.html"),
Link("Latitude versus time hovmoller", "hovmoller.html"),
Link("Time-mean maps", "maps.html"),
Link("Time-mean zonal-pressure profiles", "zonal_pressure.html"),
]
)
navigation = [navigation] # must be iterable for Jinja HTML template


def render_index(metadata, diagnostics, metrics, movie_links):
sections_index = {
"Links": navigation,
"Timeseries": list(timeseries_plot_manager.make_plots(diagnostics)),
"Zonal mean": list(zonal_mean_plot_manager.make_plots(diagnostics)),
"Diurnal cycle": list(diurnal_plot_manager.make_plots(diagnostics)),
}

if not metrics.empty:
Expand Down Expand Up @@ -398,6 +403,20 @@ def render_zonal_pressures(metadata, diagnostics):
)


def render_process_diagnostics(metadata, diagnostics):
sections = {
"Links": navigation,
"Diurnal cycle": list(diurnal_plot_manager.make_plots(diagnostics)),
"Precipitation histogram": list(histogram_plot_manager.make_plots(diagnostics)),
}
return create_html(
title="Process diagnostics",
metadata=metadata,
sections=sections,
html_header=get_html_header(),
)


def _html_link(url, tag):
return f"<a href='{url}'>{tag}</a>"

Expand Down Expand Up @@ -427,6 +446,7 @@ def make_report(computed_diagnostics: ComputedDiagnosticsList, output):
"hovmoller.html": render_hovmollers(metadata, diagnostics),
"maps.html": render_maps(metadata, diagnostics, metrics),
"zonal_pressure.html": render_zonal_pressures(metadata, diagnostics),
"process_diagnostics.html": render_process_diagnostics(metadata, diagnostics),
}

for filename, html in pages.items():
Expand Down
3 changes: 0 additions & 3 deletions workflows/prognostic_run_diags/tests/test_integration.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,6 @@ gsutil cp /tmp/$random/metrics.json $OUTPUT/run1/metrics.json
# generate movies for short sample prognostic run
prognostic_run_diags movie --n_jobs 1 --n_timesteps 2 $RUN $OUTPUT/run1

# make a second copy of diags/metrics since generate_report.py needs at least two runs
gsutil -m cp -r $OUTPUT/run1 $OUTPUT/run2

# generate report based on diagnostics computed above
prognostic_run_diags report $OUTPUT $OUTPUT

Expand Down