ai2cm · oliverwm1 · Jun 23, 2021 · Jun 18, 2021 · Jun 18, 2021 · Jun 18, 2021
diff --git a/external/report/report/__init__.py b/external/report/report/__init__.py
@@ -1,3 +1,10 @@
-from .create_report import create_html, insert_report_figure, Metrics, Metadata, Link
+from .create_report import (
+    create_html,
+    insert_report_figure,
+    Metrics,
+    Metadata,
+    Link,
+    OrderedList,
+)
 
 __version__ = "0.1.0"
diff --git a/external/report/report/create_report.py b/external/report/report/create_report.py
@@ -1,6 +1,6 @@
 import datetime
 import os
-from typing import Mapping, Sequence, Union
+from typing import Any, Mapping, Sequence, Union
 
 from jinja2 import Template
 from pytz import timezone
@@ -90,6 +90,15 @@ def __repr__(self) -> str:
         return f'<a href="{self.url}">{self.tag}</a>'
 
 
+class OrderedList:
+    def __init__(self, *items: Any):
+        self.items = items
+
+    def __repr__(self) -> str:
+        items_li = [f"<li>{item}</li>" for item in self.items]
+        return "<ol>\n" + "\n".join(items_li) + "\n</ol>"
+
+
 def resolve_plot(obj):
     if isinstance(obj, str):
         return ImagePlot(obj)

diff --git a/external/report/tests/test_report.py b/external/report/tests/test_report.py
@@ -1,5 +1,5 @@
 import os
-from report import __version__, create_html
+from report import __version__, create_html, OrderedList
 from report.create_report import _save_figure, insert_report_figure
 
 
@@ -46,3 +46,8 @@ def test__save_figure(tmpdir):
     with open(os.path.join(output_dir, filepath_relative_to_report), "r") as f:
         saved_data = f.read()
     assert saved_data.replace("\n", "") == fig.content
+
+
+def test_OrderedList_repr():
+    result = str(OrderedList("item1", "item2"))
+    assert result == "<ol>\n<li>item1</li>\n<li>item2</li>\n</ol>"
diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/compute.py
@@ -12,15 +12,12 @@
 grouping contains outputs from the physics routines (`sfc_dt_atmos.tile*.nc` and
 `diags.zarr`).
 """
-import os
 import sys
 
 import datetime
-import tempfile
 import intake
 import numpy as np
 import xarray as xr
-import shutil
 from dask.diagnostics import ProgressBar
 import fsspec
 
@@ -37,6 +34,7 @@
 from fv3net.diagnostics.prognostic_run import diurnal_cycle
 from fv3net.diagnostics.prognostic_run import transform
 from fv3net.diagnostics.prognostic_run.constants import (
+    HISTOGRAM_BINS,
     HORIZONTAL_DIMS,
     DiagArg,
     GLOBAL_AVERAGE_DYCORE_VARS,
@@ -229,16 +227,6 @@ def _assign_diagnostic_time_attrs(
     return diagnostics_ds
 
 
-def dump_nc(ds: xr.Dataset, f):
-    # to_netcdf closes file, which will delete the buffer
-    # need to use a buffer since seek doesn't work with GCSFS file objects
-    with tempfile.TemporaryDirectory() as dirname:
-        url = os.path.join(dirname, "tmp.nc")
-        ds.to_netcdf(url, engine="h5netcdf")
-        with open(url, "rb") as tmp1:
-            shutil.copyfileobj(tmp1, f)
-
-
 @add_to_diags("dycore")
 @diag_finalizer("rms_global")
 @transform.apply("resample_time", "3H", inner_join=True)
@@ -503,6 +491,34 @@ def _diurnal_func(
             return _assign_diagnostic_time_attrs(diag, prognostic)
 
 
+@add_to_diags("physics")
+@diag_finalizer("histogram")
+@transform.apply("resample_time", "3H", inner_join=True)
+@transform.apply("subset_variables", HISTOGRAM_BINS.keys())
+def compute_histogram(prognostic, verification, grid):
+    logger.info("Computing histograms for physics diagnostics")
+    counts = xr.Dataset()
+    for varname in prognostic.data_vars:
+        # bins = HISTOGRAM_BINS[varname]
+        # counts[varname] = prognostic.groupby_bins(varname, bins).count()[varname]
+        # bin_midpoints = [x.item().mid for x in counts[f"{varname}_bins"]]
+        # counts = counts.assign_coords({f"{varname}_bins": bin_midpoints})
+        # counts[varname] /= counts[varname].sum()
+        # counts[varname] /= bins[1:] - bins[:1]
+        # counts[varname].attrs["units"] = f"({prognostic[varname].units})^-1"
+        # counts[varname].attrs["long_name"] = "Frequency"
+        # counts[f"{varname}_bins"].attrs = prognostic[varname].attrs
+        count, bins = np.histogram(
+            prognostic[varname], bins=HISTOGRAM_BINS[varname], density=True
+        )
+        bin_midpoints = 0.5 * (bins[:-1] + bins[1:])
+        coords = {f"{varname}_bins": bin_midpoints}
+        count_da = xr.DataArray(count, coords=coords, dims=list(coords.keys()))
+        count_da[f"{varname}_bins"].attrs["units"] = prognostic[varname].units
+        counts[varname] = count_da
+    return _assign_diagnostic_time_attrs(counts, prognostic)
+
+
 def register_parser(subparsers):
     parser = subparsers.add_parser("save", help="Compute the prognostic run diags.")
     parser.add_argument("url", help="Prognostic run output location.")

diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/computed_diagnostics.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/computed_diagnostics.py
@@ -274,11 +274,6 @@ def detect_folders(
     bucket: str, fs: fsspec.AbstractFileSystem,
 ) -> Mapping[str, DiagnosticFolder]:
     diag_ncs = fs.glob(os.path.join(bucket, "*", "diags.nc"))
-    if len(diag_ncs) < 2:
-        raise ValueError(
-            "Plots require more than 1 diagnostic directory in"
-            f" {bucket} for holoviews plots to display correctly."
-        )
     return {
         Path(url).parent.name: DiagnosticFolder(fs, Path(url).parent.as_posix())
         for url in diag_ncs

diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/constants.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/constants.py
@@ -1,3 +1,4 @@
+import numpy as np
 import xarray as xr
 from typing import Tuple
 
@@ -135,3 +136,5 @@
     "dQu",
     "dQv",
 ]
+
+HISTOGRAM_BINS = {"total_precip_to_surface": np.logspace(-1, np.log10(500), 101)}
diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/matplotlib.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/matplotlib.py
@@ -30,9 +30,9 @@
 }
 
 
-def fig_to_b64(fig, format="png"):
+def fig_to_b64(fig, format="png", dpi=None):
     pic_IObytes = io.BytesIO()
-    fig.savefig(pic_IObytes, format=format, bbox_inches="tight")
+    fig.savefig(pic_IObytes, format=format, bbox_inches="tight", dpi=dpi)
     pic_IObytes.seek(0)
     pic_hash = base64.b64encode(pic_IObytes.read())
     return f"data:image/png;base64, " + pic_hash.decode()
@@ -179,6 +179,27 @@ def plot_cubed_sphere_map(
     )
 
 
+def plot_histogram(run_diags: RunDiagnostics, varname: str) -> raw_html:
+    """Plot 1D histogram of varname overlaid across runs."""
+
+    logging.info(f"plotting {varname}")
+    fig, ax = plt.subplots()
+    bin_name = varname.replace("histogram", "bins")
+    for run in run_diags.runs:
+        v = run_diags.get_variable(run, varname)
+        ax.step(v[bin_name], v, label=run, where="post", linewidth=1)
+    ax.set_xlabel(f"{v.long_name} [{v.units}]")
+    ax.set_ylabel(f"Frequency [({v.units})^-1]")
+    ax.set_xscale("log")
+    ax.set_yscale("log")
+    ax.set_xlim([v[bin_name].values[0], v[bin_name].values[-1]])
+    ax.legend()
+    fig.tight_layout()
+    data = fig_to_b64(fig, dpi=150)
+    plt.close(fig)
+    return raw_html(f'<img src="{data}" width="800px" />')
+
+
 def _render_map_title(
     metrics: RunMetrics, variable: str, run: str, metrics_for_title: Mapping[str, str],
 ) -> str:

diff --git a/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/static_report.py b/workflows/prognostic_run_diags/fv3net/diagnostics/prognostic_run/views/static_report.py
@@ -13,9 +13,14 @@
     RunMetrics,
 )
 
-from report import create_html, Link
+from report import create_html, Link, OrderedList
 from report.holoviews import HVPlot, get_html_header
-from .matplotlib import plot_2d_matplotlib, plot_cubed_sphere_map, raw_html
+from .matplotlib import (
+    plot_2d_matplotlib,
+    plot_cubed_sphere_map,
+    raw_html,
+    plot_histogram,
+)
 
 import logging
 
@@ -75,9 +80,7 @@ def make_plots(self, data) -> Iterable:
             yield func(data)
 
 
-def plot_1d(
-    run_diags: RunDiagnostics, varfilter: str, run_attr_name: str = "run",
-) -> HVPlot:
+def plot_1d(run_diags: RunDiagnostics, varfilter: str) -> HVPlot:
     """Plot all diagnostics whose name includes varfilter. Plot is overlaid across runs.
     All matching diagnostics must be 1D."""
     p = hv.Cycle("Colorblind")
@@ -95,10 +98,7 @@ def plot_1d(
 
 
 def plot_1d_min_max_with_region_bar(
-    run_diags: RunDiagnostics,
-    varfilter_min: str,
-    varfilter_max: str,
-    run_attr_name: str = "run",
+    run_diags: RunDiagnostics, varfilter_min: str, varfilter_max: str,
 ) -> HVPlot:
     """Plot all diagnostics whose name includes varfilter. Plot is overlaid across runs.
     All matching diagnostics must be 1D."""
@@ -123,9 +123,7 @@ def plot_1d_min_max_with_region_bar(
     return HVPlot(_set_opts_and_overlay(hmap))
 
 
-def plot_1d_with_region_bar(
-    run_diags: RunDiagnostics, varfilter: str, run_attr_name: str = "run"
-) -> HVPlot:
+def plot_1d_with_region_bar(run_diags: RunDiagnostics, varfilter: str) -> HVPlot:
     """Plot all diagnostics whose name includes varfilter. Plot is overlaid across runs.
     Region will be selectable through a drop-down bar. Region is assumed to be part of
     variable name after last underscore. All matching diagnostics must be 1D."""
@@ -189,6 +187,7 @@ def diurnal_component_plot(
 hovmoller_plot_manager = PlotManager()
 zonal_pressure_plot_manager = PlotManager()
 diurnal_plot_manager = PlotManager()
+histogram_plot_manager = PlotManager()
 
 # this will be passed the data from the metrics.json files
 metrics_plot_manager = PlotManager()
@@ -291,6 +290,11 @@ def diurnal_cycle_component_plots(diagnostics: Iterable[xr.Dataset]) -> HVPlot:
     return diurnal_component_plot(diagnostics)
 
 
+@histogram_plot_manager.register
+def histogram_plots(diagnostics: Iterable[xr.Dataset]) -> HVPlot:
+    return plot_histogram(diagnostics, "total_precip_to_surface_histogram")
+
+
 # Routines for plotting the "metrics"
 # New plotting routines can be registered here.
 @metrics_plot_manager.register
@@ -325,20 +329,21 @@ def generic_metric_plot(metrics: RunMetrics, metric_type: str) -> hv.HoloMap:
         return HVPlot(hmap.opts(**bar_opts))
 
 
-navigation = [
+navigation = OrderedList(
     Link("Home", "index.html"),
+    Link("Process diagnostics", "process.html"),
     Link("Latitude versus time hovmoller", "hovmoller.html"),
     Link("Time-mean maps", "maps.html"),
     Link("Time-mean zonal-pressure profiles", "zonal_pressure.html"),
-]
+)
+navigation = [navigation]  # must be iterable for Jinja HTML template
 
 
 def render_index(metadata, diagnostics, metrics, movie_links):
     sections_index = {
         "Links": navigation,
         "Timeseries": list(timeseries_plot_manager.make_plots(diagnostics)),
         "Zonal mean": list(zonal_mean_plot_manager.make_plots(diagnostics)),
-        "Diurnal cycle": list(diurnal_plot_manager.make_plots(diagnostics)),
     }
 
     if not metrics.empty:
@@ -398,6 +403,20 @@ def render_zonal_pressures(metadata, diagnostics):
     )
 
 
+def render_process_diagnostics(metadata, diagnostics):
+    sections = {
+        "Links": navigation,
+        "Diurnal cycle": list(diurnal_plot_manager.make_plots(diagnostics)),
+        "Precipitation histogram": list(histogram_plot_manager.make_plots(diagnostics)),
+    }
+    return create_html(
+        title="Process diagnostics",
+        metadata=metadata,
+        sections=sections,
+        html_header=get_html_header(),
+    )
+
+
 def _html_link(url, tag):
     return f"<a href='{url}'>{tag}</a>"
 
@@ -427,6 +446,7 @@ def make_report(computed_diagnostics: ComputedDiagnosticsList, output):
         "hovmoller.html": render_hovmollers(metadata, diagnostics),
         "maps.html": render_maps(metadata, diagnostics, metrics),
         "zonal_pressure.html": render_zonal_pressures(metadata, diagnostics),
+        "process_diagnostics.html": render_process_diagnostics(metadata, diagnostics),
     }
 
     for filename, html in pages.items():

diff --git a/workflows/prognostic_run_diags/tests/test_integration.sh b/workflows/prognostic_run_diags/tests/test_integration.sh
@@ -21,9 +21,6 @@ gsutil cp /tmp/$random/metrics.json $OUTPUT/run1/metrics.json
 # generate movies for short sample prognostic run
 prognostic_run_diags movie --n_jobs 1 --n_timesteps 2 $RUN $OUTPUT/run1
 
-# make a second copy of diags/metrics since generate_report.py needs at least two runs
-gsutil -m cp -r $OUTPUT/run1 $OUTPUT/run2
-
 # generate report based on diagnostics computed above
 prognostic_run_diags report $OUTPUT $OUTPUT