From 45ee495efb9c0cc1b105888a64cf103b5b21721f Mon Sep 17 00:00:00 2001
From: Seth Michael Larson <seth.larson@elastic.co>
Date: Thu, 15 Oct 2020 16:22:15 -0500
Subject: [PATCH 1/2] Document DataFrame.groupby() and rename Field.index ->
 .column

---
 .../reference/api/eland.DataFrame.groupby.rst |  6 ++
 docs/source/reference/dataframe.rst           |  1 +
 eland/dataframe.py                            | 11 +--
 eland/field_mappings.py                       | 24 +++---
 eland/groupby.py                              | 28 +++---
 eland/operations.py                           | 85 +++++++++++--------
 eland/query.py                                | 56 ++++++++----
 eland/query_compiler.py                       | 13 ++-
 eland/tests/dataframe/test_groupby_pytest.py  | 52 +++++++++---
 9 files changed, 173 insertions(+), 103 deletions(-)
 create mode 100644 docs/source/reference/api/eland.DataFrame.groupby.rst

diff --git a/docs/source/reference/api/eland.DataFrame.groupby.rst b/docs/source/reference/api/eland.DataFrame.groupby.rst
new file mode 100644
index 00000000..e31ce1d3
--- /dev/null
+++ b/docs/source/reference/api/eland.DataFrame.groupby.rst
@@ -0,0 +1,6 @@
+eland.DataFrame.groupby
+=======================
+
+.. currentmodule:: eland
+
+.. automethod:: DataFrame.groupby
diff --git a/docs/source/reference/dataframe.rst b/docs/source/reference/dataframe.rst
index ffd9f32e..cc05497c 100644
--- a/docs/source/reference/dataframe.rst
+++ b/docs/source/reference/dataframe.rst
@@ -46,6 +46,7 @@ Function Application, GroupBy & Window
 
    DataFrame.agg
    DataFrame.aggregate
+   DataFrame.groupby
 
 .. _api.dataframe.stats:
 
diff --git a/eland/dataframe.py b/eland/dataframe.py
index 98293961..5eea8762 100644
--- a/eland/dataframe.py
+++ b/eland/dataframe.py
@@ -1442,13 +1442,10 @@ def groupby(
         by:
             column or list of columns used to groupby
             Currently accepts column or list of columns
-            TODO Implement other combinations of by similar to pandas
 
         dropna: default True
             If True, and if group keys contain NA values, NA values together with row/column will be dropped.
-            TODO Implement False
 
-        TODO Implement remainder of pandas arguments
         Returns
         -------
         GroupByDataFrame
@@ -1495,18 +1492,18 @@ def groupby(
         [63 rows x 2 columns]
         """
         if by is None:
-            raise TypeError("by parameter should be specified to groupby")
+            raise ValueError("by parameter should be specified to groupby")
         if isinstance(by, str):
             by = [by]
         if isinstance(by, (list, tuple)):
-            remaining_columns = set(by) - set(self._query_compiler.columns)
+            remaining_columns = sorted(set(by) - set(self._query_compiler.columns))
             if remaining_columns:
                 raise KeyError(
-                    f"Requested columns {remaining_columns} not in the DataFrame."
+                    f"Requested columns {repr(remaining_columns)[1:-1]} not in the DataFrame"
                 )
 
         return GroupByDataFrame(
-            by=by, query_compiler=self._query_compiler, dropna=dropna
+            by=by, query_compiler=self._query_compiler.copy(), dropna=dropna
         )
 
     def query(self, expr) -> "DataFrame":
diff --git a/eland/field_mappings.py b/eland/field_mappings.py
index de7001a3..56018276 100644
--- a/eland/field_mappings.py
+++ b/eland/field_mappings.py
@@ -64,7 +64,7 @@
 class Field(NamedTuple):
     """Holds all information on a particular field in the mapping"""
 
-    index: str
+    column: str
     es_field_name: str
     is_source: bool
     es_dtype: str
@@ -129,7 +129,7 @@ class FieldMappings:
     _mappings_capabilities: pandas.DataFrame
         A data frame summarising the capabilities of the index mapping
 
-        index                       - the eland display name
+        column (index)              - the eland display name
 
         es_field_name               - the Elasticsearch field name
         is_source                   - is top level field (i.e. not a multi-field sub-field)
@@ -537,13 +537,13 @@ def _generate_es_mappings(
         """
 
         mapping_props = {}
-        for field_name_name, dtype in dataframe.dtypes.iteritems():
-            if es_type_overrides is not None and field_name_name in es_type_overrides:
-                es_dtype = es_type_overrides[field_name_name]
+        for column, dtype in dataframe.dtypes.iteritems():
+            if es_type_overrides is not None and column in es_type_overrides:
+                es_dtype = es_type_overrides[column]
             else:
                 es_dtype = FieldMappings._pd_dtype_to_es_dtype(dtype)
 
-            mapping_props[field_name_name] = {"type": es_dtype}
+            mapping_props[column] = {"type": es_dtype}
 
         return {"mappings": {"properties": mapping_props}}
 
@@ -708,9 +708,9 @@ def all_source_fields(self) -> List[Field]:
 
         """
         source_fields: List[Field] = []
-        for index, row in self._mappings_capabilities.iterrows():
+        for column, row in self._mappings_capabilities.iterrows():
             row = row.to_dict()
-            row["index"] = index
+            row["column"] = column
             source_fields.append(Field(**row))
         return source_fields
 
@@ -731,13 +731,13 @@ def groupby_source_fields(self, by: List[str]) -> Tuple[List[Field], List[Field]
         groupby_fields: Dict[str, Field] = {}
         # groupby_fields: Union[List[Field], List[None]] = [None] * len(by)
         aggregatable_fields: List[Field] = []
-        for index_name, row in self._mappings_capabilities.iterrows():
+        for column, row in self._mappings_capabilities.iterrows():
             row = row.to_dict()
-            row["index"] = index_name
-            if index_name not in by:
+            row["column"] = column
+            if column not in by:
                 aggregatable_fields.append(Field(**row))
             else:
-                groupby_fields[index_name] = Field(**row)
+                groupby_fields[column] = Field(**row)
 
         # Maintain groupby order as given input
         return [groupby_fields[column] for column in by], aggregatable_fields
diff --git a/eland/groupby.py b/eland/groupby.py
index 515d351a..b9979dd8 100644
--- a/eland/groupby.py
+++ b/eland/groupby.py
@@ -24,7 +24,7 @@
 
 class GroupBy:
     """
-    This holds all the groupby base methods
+    Base class for calls to X.groupby([...])
 
     Parameters
     ----------
@@ -34,7 +34,6 @@ class GroupBy:
         Query compiler object
     dropna:
         default is true, drop None/NaT/NaN values while grouping
-
     """
 
     def __init__(
@@ -47,9 +46,8 @@ def __init__(
         self._dropna: bool = dropna
         self._by: List[str] = by
 
-    # numeric_only=True by default for all aggs because pandas does the same
     def mean(self, numeric_only: bool = True) -> "pd.DataFrame":
-        return self._query_compiler.groupby(
+        return self._query_compiler.aggs_groupby(
             by=self._by,
             pd_aggs=["mean"],
             dropna=self._dropna,
@@ -57,7 +55,7 @@ def mean(self, numeric_only: bool = True) -> "pd.DataFrame":
         )
 
     def var(self, numeric_only: bool = True) -> "pd.DataFrame":
-        return self._query_compiler.groupby(
+        return self._query_compiler.aggs_groupby(
             by=self._by,
             pd_aggs=["var"],
             dropna=self._dropna,
@@ -65,7 +63,7 @@ def var(self, numeric_only: bool = True) -> "pd.DataFrame":
         )
 
     def std(self, numeric_only: bool = True) -> "pd.DataFrame":
-        return self._query_compiler.groupby(
+        return self._query_compiler.aggs_groupby(
             by=self._by,
             pd_aggs=["std"],
             dropna=self._dropna,
@@ -73,7 +71,7 @@ def std(self, numeric_only: bool = True) -> "pd.DataFrame":
         )
 
     def mad(self, numeric_only: bool = True) -> "pd.DataFrame":
-        return self._query_compiler.groupby(
+        return self._query_compiler.aggs_groupby(
             by=self._by,
             pd_aggs=["mad"],
             dropna=self._dropna,
@@ -81,7 +79,7 @@ def mad(self, numeric_only: bool = True) -> "pd.DataFrame":
         )
 
     def median(self, numeric_only: bool = True) -> "pd.DataFrame":
-        return self._query_compiler.groupby(
+        return self._query_compiler.aggs_groupby(
             by=self._by,
             pd_aggs=["median"],
             dropna=self._dropna,
@@ -89,7 +87,7 @@ def median(self, numeric_only: bool = True) -> "pd.DataFrame":
         )
 
     def sum(self, numeric_only: bool = True) -> "pd.DataFrame":
-        return self._query_compiler.groupby(
+        return self._query_compiler.aggs_groupby(
             by=self._by,
             pd_aggs=["sum"],
             dropna=self._dropna,
@@ -97,7 +95,7 @@ def sum(self, numeric_only: bool = True) -> "pd.DataFrame":
         )
 
     def min(self, numeric_only: bool = True) -> "pd.DataFrame":
-        return self._query_compiler.groupby(
+        return self._query_compiler.aggs_groupby(
             by=self._by,
             pd_aggs=["min"],
             dropna=self._dropna,
@@ -105,7 +103,7 @@ def min(self, numeric_only: bool = True) -> "pd.DataFrame":
         )
 
     def max(self, numeric_only: bool = True) -> "pd.DataFrame":
-        return self._query_compiler.groupby(
+        return self._query_compiler.aggs_groupby(
             by=self._by,
             pd_aggs=["max"],
             dropna=self._dropna,
@@ -113,7 +111,7 @@ def max(self, numeric_only: bool = True) -> "pd.DataFrame":
         )
 
     def nunique(self) -> "pd.DataFrame":
-        return self._query_compiler.groupby(
+        return self._query_compiler.aggs_groupby(
             by=self._by,
             pd_aggs=["nunique"],
             dropna=self._dropna,
@@ -133,7 +131,6 @@ class GroupByDataFrame(GroupBy):
         Query compiler object
     dropna:
         default is true, drop None/NaT/NaN values while grouping
-
     """
 
     def aggregate(self, func: List[str], numeric_only: bool = False) -> "pd.DataFrame":
@@ -157,13 +154,12 @@ def aggregate(self, func: List[str], numeric_only: bool = False) -> "pd.DataFram
         """
         if isinstance(func, str):
             func = [func]
-        # numeric_only is by default False because pandas does the same
-        return self._query_compiler.groupby(
+        return self._query_compiler.aggs_groupby(
             by=self._by,
             pd_aggs=func,
             dropna=self._dropna,
             numeric_only=numeric_only,
-            is_agg=True,
+            is_dataframe_agg=True,
         )
 
     agg = aggregate
diff --git a/eland/operations.py b/eland/operations.py
index 4f64daac..63fcf899 100644
--- a/eland/operations.py
+++ b/eland/operations.py
@@ -271,7 +271,7 @@ def _metric_aggs(
         min    1.000205e+02        0.000000e+00   0.000000e+00               0
         """
 
-        return self._calculate_single_agg(
+        return self._unpack_metric_aggs(
             fields=fields,
             es_aggs=es_aggs,
             pd_aggs=pd_aggs,
@@ -415,7 +415,7 @@ def _hist_aggs(self, query_compiler, num_bins):
         df_weights = pd.DataFrame(data=weights)
         return df_bins, df_weights
 
-    def _calculate_single_agg(
+    def _unpack_metric_aggs(
         self,
         fields: List["Field"],
         es_aggs: Union[List[str], List[Tuple[str, str]]],
@@ -425,8 +425,9 @@ def _calculate_single_agg(
         is_dataframe_agg: bool = False,
     ):
         """
-        This method is used to calculate single agg calculations.
-        Common for both metric aggs and groupby aggs
+        This method unpacks metric aggregations JSON response.
+        This can be called either directly on an aggs query
+        or on an individual bucket within a composite aggregation.
 
         Parameters
         ----------
@@ -533,21 +534,21 @@ def _calculate_single_agg(
 
             # If numeric_only is True and We only have a NaN type field then we check for empty.
             if values:
-                results[field.index] = values if len(values) > 1 else values[0]
+                results[field.column] = values if len(values) > 1 else values[0]
 
         return results
 
-    def groupby(
+    def aggs_groupby(
         self,
         query_compiler: "QueryCompiler",
         by: List[str],
         pd_aggs: List[str],
         dropna: bool = True,
-        is_agg: bool = False,
+        is_dataframe_agg: bool = False,
         numeric_only: bool = True,
     ) -> pd.DataFrame:
         """
-        This method is used to construct groupby dataframe
+        This method is used to construct groupby aggregation dataframe
 
         Parameters
         ----------
@@ -560,7 +561,7 @@ def groupby(
         dropna:
             Drop None values if True.
             TODO Not yet implemented
-        is_agg:
+        is_dataframe_agg:
             Know if groupby with aggregation or single agg is called.
         numeric_only:
             return either numeric values or NaN/NaT
@@ -574,13 +575,13 @@ def groupby(
             by=by,
             pd_aggs=pd_aggs,
             dropna=dropna,
-            is_agg=is_agg,
+            is_dataframe_agg=is_dataframe_agg,
             numeric_only=numeric_only,
         )
 
         agg_df = pd.DataFrame(results, columns=results.keys()).set_index(by)
 
-        if is_agg:
+        if is_dataframe_agg:
             # Convert header columns to MultiIndex
             agg_df.columns = pd.MultiIndex.from_product([headers, pd_aggs])
 
@@ -592,7 +593,7 @@ def _groupby_aggs(
         by: List[str],
         pd_aggs: List[str],
         dropna: bool = True,
-        is_agg: bool = False,
+        is_dataframe_agg: bool = False,
         numeric_only: bool = True,
     ) -> Tuple[List[str], Dict[str, Any]]:
         """
@@ -609,8 +610,8 @@ def _groupby_aggs(
         dropna:
             Drop None values if True.
             TODO Not yet implemented
-        is_agg:
-            Know if groupby aggregation or single agg is called.
+        is_dataframe_agg:
+            Know if multi aggregation or single agg is called.
         numeric_only:
             return either numeric values or NaN/NaT
 
@@ -627,13 +628,15 @@ def _groupby_aggs(
                 f"Can not count field matches if size is set {size}"
             )
 
-        by, fields = query_compiler._mappings.groupby_source_fields(by=by)
+        by_fields, agg_fields = query_compiler._mappings.groupby_source_fields(by=by)
 
         # Used defaultdict to avoid initialization of columns with lists
         response: Dict[str, List[Any]] = defaultdict(list)
 
         if numeric_only:
-            fields = [field for field in fields if (field.is_numeric or field.is_bool)]
+            agg_fields = [
+                field for field in agg_fields if (field.is_numeric or field.is_bool)
+            ]
 
         body = Query(query_params.query)
 
@@ -641,11 +644,13 @@ def _groupby_aggs(
         es_aggs = self._map_pd_aggs_to_es_aggs(pd_aggs)
 
         # Construct Query
-        for b in by:
+        for by_field in by_fields:
             # groupby fields will be term aggregations
-            body.term_aggs(f"groupby_{b.index}", b.index)
+            body.composite_agg_bucket_terms(
+                name=f"groupby_{by_field.column}", field=by_field.es_field_name
+            )
 
-        for field in fields:
+        for field in agg_fields:
             for es_agg in es_aggs:
                 if not field.is_es_agg_compatible(es_agg):
                     continue
@@ -665,11 +670,11 @@ def _groupby_aggs(
                     )
 
         # Composite aggregation
-        body.composite_agg(
+        body.composite_agg_start(
             size=DEFAULT_PAGINATION_SIZE, name="groupby_buckets", dropna=dropna
         )
 
-        def response_generator() -> Generator[List[str], None, List[str]]:
+        def bucket_generator() -> Generator[List[str], None, List[str]]:
             """
             e.g.
             "aggregations": {
@@ -696,43 +701,51 @@ def response_generator() -> Generator[List[str], None, List[str]]:
                     size=0,
                     body=body.to_search_body(),
                 )
+
                 # Pagination Logic
-                if "after_key" in res["aggregations"]["groupby_buckets"]:
+                composite_buckets = res["aggregations"]["groupby_buckets"]
+                if "after_key" in composite_buckets:
 
                     # yield the bucket which contains the result
-                    yield res["aggregations"]["groupby_buckets"]["buckets"]
+                    yield composite_buckets["buckets"]
 
                     body.composite_agg_after_key(
                         name="groupby_buckets",
-                        after_key=res["aggregations"]["groupby_buckets"]["after_key"],
+                        after_key=composite_buckets["after_key"],
                     )
                 else:
-                    return res["aggregations"]["groupby_buckets"]["buckets"]
+                    return composite_buckets["buckets"]
 
-        for buckets in response_generator():
+        for buckets in bucket_generator():
             # We recieve response row-wise
             for bucket in buckets:
                 # groupby columns are added to result same way they are returned
-                for b in by:
-                    response[b.index].append(bucket["key"][f"groupby_{b.index}"])
+                for by_field in by_fields:
+                    bucket_key = bucket["key"][f"groupby_{by_field.column}"]
+
+                    # Datetimes always come back as integers, convert to pd.Timestamp()
+                    if by_field.is_timestamp and isinstance(bucket_key, int):
+                        bucket_key = pd.to_datetime(bucket_key, unit="ms")
 
-                agg_calculation = self._calculate_single_agg(
-                    fields=fields,
+                    response[by_field.column].append(bucket_key)
+
+                agg_calculation = self._unpack_metric_aggs(
+                    fields=agg_fields,
                     es_aggs=es_aggs,
                     pd_aggs=pd_aggs,
                     response={"aggregations": bucket},
                     numeric_only=numeric_only,
-                    is_dataframe_agg=is_agg,
+                    is_dataframe_agg=is_dataframe_agg,
                 )
                 # Process the calculated agg values to response
                 for key, value in agg_calculation.items():
-                    if not is_agg:
-                        response[key].append(value)
+                    if isinstance(value, list):
+                        for pd_agg, val in zip(pd_aggs, value):
+                            response[f"{key}_{pd_agg}"].append(val)
                     else:
-                        for i in range(0, len(pd_aggs)):
-                            response[f"{key}_{pd_aggs[i]}"].append(value[i])
+                        response[key].append(value)
 
-        return [field.index for field in fields], response
+        return [field.column for field in agg_fields], response
 
     @staticmethod
     def _map_pd_aggs_to_es_aggs(pd_aggs):
diff --git a/eland/query.py b/eland/query.py
index 6961bd1c..3c2ad1b5 100644
--- a/eland/query.py
+++ b/eland/query.py
@@ -38,14 +38,17 @@ def __init__(self, query: Optional["Query"] = None):
         # type defs
         self._query: BooleanFilter
         self._aggs: Dict[str, Any]
+        self._composite_aggs: Dict[str, Any]
 
         if query is None:
             self._query = BooleanFilter()
             self._aggs = {}
+            self._composite_aggs = {}
         else:
             # Deep copy the incoming query so we can change it
             self._query = deepcopy(query._query)
             self._aggs = deepcopy(query._aggs)
+            self._composite_aggs = deepcopy(query._composite_aggs)
 
     def exists(self, field: str, must: bool = True) -> None:
         """
@@ -136,9 +139,9 @@ def metric_aggs(self, name: str, func: str, field: str) -> None:
         agg = {func: {"field": field}}
         self._aggs[name] = agg
 
-    def term_aggs(self, name: str, field: str) -> None:
+    def composite_agg_bucket_terms(self, name: str, field: str) -> None:
         """
-        Add term agg e.g.
+        Add terms agg for composite aggregation
 
         "aggs": {
             "name": {
@@ -148,17 +151,36 @@ def term_aggs(self, name: str, field: str) -> None:
             }
         }
         """
-        agg = {"terms": {"field": field}}
-        self._aggs[name] = agg
+        self._composite_aggs[name] = {"terms": {"field": field}}
 
-    def composite_agg(
+    def composite_agg_bucket_date_histogram(
+        self,
+        name: str,
+        field: str,
+        calendar_interval: Optional[str] = None,
+        fixed_interval: Optional[str] = None,
+    ) -> None:
+        if (calendar_interval is None) == (fixed_interval is None):
+            raise ValueError(
+                "calendar_interval and fixed_interval parmaeters are mutually exclusive"
+            )
+        agg = {"field": field}
+        if calendar_interval is not None:
+            agg["calendar_interval"] = calendar_interval
+        elif fixed_interval is not None:
+            agg["fixed_interval"] = fixed_interval
+        self._composite_aggs[name] = {"date_histogram": agg}
+
+    def composite_agg_start(
         self,
         name: str,
         size: int,
         dropna: bool = True,
     ) -> None:
         """
-        Add composite aggregation e.g.
+        Start a composite aggregation. This should be called
+        after calls to composite_agg_bucket_*(), etc.
+
         https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-composite-aggregation.html
 
         "aggs": {
@@ -190,22 +212,22 @@ def composite_agg(
 
         """
         sources: List[Dict[str, Dict[str, str]]] = []
-        aggregations: Dict[str, Dict[str, str]] = {}
 
-        for _name, agg in self._aggs.items():
-            if agg.get("terms"):
-                if not dropna:
-                    agg["terms"]["missing_bucket"] = "true"
-                sources.append({_name: agg})
-            else:
-                aggregations[_name] = agg
+        # Go through all composite source aggregations
+        # and apply dropna if needed.
+        for bucket_agg_name, bucket_agg in self._composite_aggs.items():
+            if bucket_agg.get("terms") and not dropna:
+                bucket_agg = bucket_agg.copy()
+                bucket_agg["terms"]["missing_bucket"] = "true"
+            sources.append({bucket_agg_name: bucket_agg})
+        self._composite_aggs.clear()
 
-        agg = {
+        aggs = {
             "composite": {"size": size, "sources": sources},
-            "aggregations": aggregations,
+            "aggregations": self._aggs.copy(),
         }
         self._aggs.clear()
-        self._aggs[name] = agg
+        self._aggs[name] = aggs
 
     def composite_agg_after_key(self, name: str, after_key: Dict[str, Any]) -> None:
         """
diff --git a/eland/query_compiler.py b/eland/query_compiler.py
index a40c41a1..f1a4240d 100644
--- a/eland/query_compiler.py
+++ b/eland/query_compiler.py
@@ -550,15 +550,22 @@ def nunique(self):
             self, ["nunique"], numeric_only=False
         )
 
-    def groupby(
+    def aggs_groupby(
         self,
         by: List[str],
         pd_aggs: List[str],
         dropna: bool = True,
-        is_agg: bool = False,
+        is_dataframe_agg: bool = False,
         numeric_only: bool = True,
     ) -> pd.DataFrame:
-        return self._operations.groupby(self, by, pd_aggs, dropna, is_agg, numeric_only)
+        return self._operations.aggs_groupby(
+            self,
+            by=by,
+            pd_aggs=pd_aggs,
+            dropna=dropna,
+            is_dataframe_agg=is_dataframe_agg,
+            numeric_only=numeric_only,
+        )
 
     def value_counts(self, es_size):
         return self._operations.value_counts(self, es_size)
diff --git a/eland/tests/dataframe/test_groupby_pytest.py b/eland/tests/dataframe/test_groupby_pytest.py
index a31da862..3cdd48a7 100644
--- a/eland/tests/dataframe/test_groupby_pytest.py
+++ b/eland/tests/dataframe/test_groupby_pytest.py
@@ -25,13 +25,10 @@
 
 class TestGroupbyDataFrame(TestData):
     funcs = ["max", "min", "mean", "sum"]
-    extended_funcs = ["median", "mad", "var", "std"]
     filter_data = [
         "AvgTicketPrice",
         "Cancelled",
         "dayOfWeek",
-        "timestamp",
-        "DestCountry",
     ]
 
     @pytest.mark.parametrize("numeric_only", [True])
@@ -41,14 +38,29 @@ def test_groupby_aggregate(self, numeric_only):
         pd_flights = self.pd_flights().filter(self.filter_data)
         ed_flights = self.ed_flights().filter(self.filter_data)
 
-        pd_groupby = pd_flights.groupby("Cancelled").agg(self.funcs, numeric_only)
-        ed_groupby = ed_flights.groupby("Cancelled").agg(self.funcs, numeric_only)
+        pd_groupby = pd_flights.groupby("Cancelled").agg(
+            self.funcs, numeric_only=numeric_only
+        )
+        ed_groupby = ed_flights.groupby("Cancelled").agg(
+            self.funcs, numeric_only=numeric_only
+        )
+
+        # checking only values because dtypes are checked in aggs tests
+        assert_frame_equal(pd_groupby, ed_groupby, check_exact=False, check_dtype=False)
+
+    @pytest.mark.parametrize("pd_agg", funcs)
+    def test_groupby_aggregate_single_aggs(self, pd_agg):
+        pd_flights = self.pd_flights().filter(self.filter_data)
+        ed_flights = self.ed_flights().filter(self.filter_data)
+
+        pd_groupby = pd_flights.groupby("Cancelled").agg([pd_agg], numeric_only=True)
+        ed_groupby = ed_flights.groupby("Cancelled").agg([pd_agg], numeric_only=True)
 
         # checking only values because dtypes are checked in aggs tests
         assert_frame_equal(pd_groupby, ed_groupby, check_exact=False, check_dtype=False)
 
     @pytest.mark.parametrize("pd_agg", ["max", "min", "mean", "sum", "median"])
-    def test_groupby_aggs_true(self, pd_agg):
+    def test_groupby_aggs_numeric_only_true(self, pd_agg):
         # Pandas has numeric_only  applicable for the above aggs with groupby only.
 
         pd_flights = self.pd_flights().filter(self.filter_data)
@@ -59,7 +71,7 @@ def test_groupby_aggs_true(self, pd_agg):
 
         # checking only values because dtypes are checked in aggs tests
         assert_frame_equal(
-            pd_groupby, ed_groupby, check_exact=False, check_dtype=False, rtol=4
+            pd_groupby, ed_groupby, check_exact=False, check_dtype=False, rtol=2
         )
 
     @pytest.mark.parametrize("pd_agg", ["mad", "var", "std"])
@@ -90,9 +102,9 @@ def test_groupby_aggs_nunique(self, pd_agg):
         )
 
     @pytest.mark.parametrize("pd_agg", ["max", "min", "mean", "median"])
-    def test_groupby_aggs_false(self, pd_agg):
-        pd_flights = self.pd_flights().filter(self.filter_data)
-        ed_flights = self.ed_flights().filter(self.filter_data)
+    def test_groupby_aggs_numeric_only_false(self, pd_agg):
+        pd_flights = self.pd_flights().filter(self.filter_data + ["timestamp"])
+        ed_flights = self.ed_flights().filter(self.filter_data + ["timestamp"])
 
         # pandas numeric_only=False, matches with Eland numeric_only=None
         pd_groupby = getattr(pd_flights.groupby("Cancelled"), pd_agg)(
@@ -114,14 +126,30 @@ def test_groupby_columns(self):
         ed_flights = self.ed_flights().filter(self.filter_data)
 
         match = "by parameter should be specified to groupby"
-        with pytest.raises(TypeError, match=match):
+        with pytest.raises(ValueError, match=match):
             ed_flights.groupby(None).mean()
 
         by = ["ABC", "Cancelled"]
-        match = "Requested columns {'ABC'} not in the DataFrame."
+        match = "Requested columns 'ABC' not in the DataFrame"
         with pytest.raises(KeyError, match=match):
             ed_flights.groupby(by).mean()
 
+    @pytest.mark.parametrize(
+        "by",
+        ["timestamp", "dayOfWeek", "Carrier", "Cancelled", ["dayOfWeek", "Carrier"]],
+    )
+    def test_groupby_different_dtypes(self, by):
+        columns = ["dayOfWeek", "Carrier", "timestamp", "Cancelled"]
+        pd_flights = self.pd_flights_small().filter(columns)
+        ed_flights = self.ed_flights_small().filter(columns)
+
+        pd_groupby = pd_flights.groupby(by).nunique()
+        ed_groupby = ed_flights.groupby(by).nunique()
+
+        assert list(pd_groupby.index) == list(ed_groupby.index)
+        assert pd_groupby.index.dtype == ed_groupby.index.dtype
+        assert list(pd_groupby.columns) == list(ed_groupby.columns)
+
     def test_groupby_dropna(self):
         # TODO Add tests once dropna is implemeted
         pass

From b87e8c0a8c829194e841bc346c698aaa5769bbd1 Mon Sep 17 00:00:00 2001
From: Seth Michael Larson <seth.larson@elastic.co>
Date: Thu, 15 Oct 2020 16:46:21 -0500
Subject: [PATCH 2/2] Add isort, rename Nox session to 'format'

---
 CONTRIBUTING.md                               |  2 +-
 docs/source/conf.py                           |  8 ++---
 eland/__init__.py                             | 22 ++++++-------
 eland/actions.py                              |  4 +--
 eland/arithmetics.py                          |  2 +-
 eland/common.py                               |  2 +-
 eland/dataframe.py                            | 10 +++---
 eland/etl.py                                  |  9 +++---
 eland/field_mappings.py                       | 27 ++++++++--------
 eland/filter.py                               |  2 +-
 eland/groupby.py                              |  3 +-
 eland/index.py                                |  3 +-
 eland/ml/__init__.py                          |  2 +-
 eland/ml/_model_serializer.py                 |  2 +-
 eland/ml/ml_model.py                          | 25 ++++++++++-----
 eland/ml/transformers/__init__.py             | 14 ++++----
 eland/ml/transformers/base.py                 |  3 +-
 eland/ml/transformers/lightgbm.py             |  9 +++---
 eland/ml/transformers/sklearn.py              | 15 ++++++---
 eland/ml/transformers/xgboost.py              | 10 +++---
 eland/ndframe.py                              |  5 +--
 eland/operations.py                           | 32 +++++++++----------
 eland/plotting/__init__.py                    |  5 +--
 eland/plotting/_core.py                       |  2 +-
 eland/plotting/_matplotlib/__init__.py        |  5 +--
 eland/plotting/_matplotlib/hist.py            |  1 +
 eland/query.py                                | 13 ++------
 eland/query_compiler.py                       | 12 +++----
 eland/series.py                               | 15 +++++----
 eland/tasks.py                                |  8 ++---
 eland/tests/__init__.py                       |  1 +
 eland/tests/common.py                         |  4 +--
 eland/tests/conftest.py                       | 23 +++++++------
 eland/tests/dataframe/test_aggs_pytest.py     |  3 +-
 .../dataframe/test_big_mapping_pytest.py      |  3 +-
 eland/tests/dataframe/test_datetime_pytest.py | 10 +++---
 eland/tests/dataframe/test_dtypes_pytest.py   |  1 +
 eland/tests/dataframe/test_es_query_pytest.py |  4 +--
 eland/tests/dataframe/test_filter_pytest.py   |  1 +
 eland/tests/dataframe/test_groupby_pytest.py  |  3 +-
 .../tests/dataframe/test_head_tail_pytest.py  |  3 +-
 eland/tests/dataframe/test_info_pytest.py     |  2 --
 eland/tests/dataframe/test_init_pytest.py     |  3 +-
 eland/tests/dataframe/test_metrics_pytest.py  |  6 ++--
 eland/tests/dataframe/test_query_pytest.py    |  4 +--
 eland/tests/dataframe/test_sample_pytest.py   |  2 +-
 .../dataframe/test_select_dtypes_pytest.py    |  3 +-
 eland/tests/dataframe/test_to_csv_pytest.py   |  6 ++--
 eland/tests/dataframe/test_utils_pytest.py    |  3 +-
 eland/tests/etl/test_pandas_to_eland.py       |  6 ++--
 .../test_aggregatables_pytest.py              |  2 +-
 .../field_mappings/test_datetime_pytest.py    |  3 +-
 .../test_field_name_pd_dtype_pytest.py        |  3 +-
 .../test_get_field_names_pytest.py            |  2 +-
 .../test_metric_source_fields_pytest.py       |  2 +-
 .../test_scripted_fields_pytest.py            |  2 +-
 .../tests/ml/test_imported_ml_model_pytest.py |  9 +++---
 .../tests/operators/test_operators_pytest.py  | 10 +++---
 eland/tests/series/test_describe_pytest.py    |  1 +
 eland/tests/series/test_dtype_pytest.py       |  6 ++--
 eland/tests/series/test_filter_pytest.py      |  4 +--
 eland/tests/series/test_head_tail_pytest.py   |  6 ++--
 eland/tests/series/test_metrics_pytest.py     |  7 ++--
 eland/tests/series/test_na_pytest.py          |  3 +-
 eland/tests/series/test_name_pytest.py        |  6 ++--
 eland/tests/series/test_rename_pytest.py      |  6 ++--
 eland/tests/series/test_repr_pytest.py        |  3 +-
 eland/tests/series/test_sample_pytest.py      |  6 ++--
 eland/tests/setup_tests.py                    | 15 ++++-----
 eland/utils.py                                |  6 ++--
 noxfile.py                                    | 12 ++++---
 setup.cfg                                     |  2 ++
 setup.py                                      |  2 +-
 utils/generate-supported-apis.py              |  7 ++--
 utils/license-headers.py                      |  3 +-
 75 files changed, 250 insertions(+), 241 deletions(-)
 create mode 100644 setup.cfg

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 7d85db3b..e5c0889a 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -160,7 +160,7 @@ currently using a minimum version of PyCharm 2019.2.4.
 -   Run `pytest --nbval` to validate install
 -   To test specific versions of Python use `nox -s test-3.8`
 -   To run the automatic formatter and check for lint issues
-    run `nox -s blacken`
+    run `nox -s format`
 
 
 ### Documentation
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 65560de3..2ad50116 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -23,13 +23,13 @@
 
 # -- Path setup --------------------------------------------------------------
 
+import datetime
+import os
+import sys
+
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
-#
-import os
-import sys
-import datetime
 
 sys.path.insert(0, os.path.abspath("../sphinxext"))
 sys.path.extend(
diff --git a/eland/__init__.py b/eland/__init__.py
index 0c9ba02c..640b18a9 100644
--- a/eland/__init__.py
+++ b/eland/__init__.py
@@ -15,22 +15,22 @@
 #  specific language governing permissions and limitations
 #  under the License.
 
-from eland._version import (  # noqa: F401
-    __title__,
-    __description__,
-    __url__,
-    __version__,
+from ._version import (  # noqa: F401
     __author__,
     __author_email__,
+    __description__,
     __maintainer__,
     __maintainer_email__,
+    __title__,
+    __url__,
+    __version__,
 )
-from eland.common import SortOrder
-from eland.index import Index
-from eland.ndframe import NDFrame
-from eland.series import Series
-from eland.dataframe import DataFrame
-from eland.etl import pandas_to_eland, eland_to_pandas, read_es, read_csv, csv_to_eland
+from .common import SortOrder
+from .dataframe import DataFrame
+from .etl import csv_to_eland, eland_to_pandas, pandas_to_eland, read_csv, read_es
+from .index import Index
+from .ndframe import NDFrame
+from .series import Series
 
 __all__ = [
     "DataFrame",
diff --git a/eland/actions.py b/eland/actions.py
index 6da1751a..d9f43069 100644
--- a/eland/actions.py
+++ b/eland/actions.py
@@ -16,9 +16,9 @@
 #  under the License.
 
 from abc import ABC, abstractmethod
-from typing import List, Optional, TYPE_CHECKING, Union
-from eland import SortOrder
+from typing import TYPE_CHECKING, List, Optional, Union
 
+from eland import SortOrder
 
 if TYPE_CHECKING:
     import pandas as pd  # type: ignore
diff --git a/eland/arithmetics.py b/eland/arithmetics.py
index e8d923b0..b9d1b80f 100644
--- a/eland/arithmetics.py
+++ b/eland/arithmetics.py
@@ -17,7 +17,7 @@
 
 from abc import ABC, abstractmethod
 from io import StringIO
-from typing import Union, List, TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, List, Union
 
 import numpy as np  # type: ignore
 
diff --git a/eland/common.py b/eland/common.py
index 772457ab..647d598e 100644
--- a/eland/common.py
+++ b/eland/common.py
@@ -18,7 +18,7 @@
 import re
 import warnings
 from enum import Enum
-from typing import Union, List, Tuple, cast, Callable, Any, Optional, Dict
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
 
 import numpy as np  # type: ignore
 import pandas as pd  # type: ignore
diff --git a/eland/dataframe.py b/eland/dataframe.py
index 5eea8762..89e110c6 100644
--- a/eland/dataframe.py
+++ b/eland/dataframe.py
@@ -15,11 +15,11 @@
 #  specific language governing permissions and limitations
 #  under the License.
 
+import re
 import sys
 import warnings
 from io import StringIO
-import re
-from typing import List, Optional, Sequence, Union, Tuple
+from typing import List, Optional, Sequence, Tuple, Union
 
 import numpy as np
 import pandas as pd
@@ -34,12 +34,12 @@
 from pandas.util._validators import validate_bool_kwarg
 
 import eland.plotting as gfx
-from eland.ndframe import NDFrame
-from eland.series import Series
 from eland.common import DEFAULT_NUM_ROWS_DISPLAYED, docstring_parameter
 from eland.filter import BooleanFilter
-from eland.utils import deprecated_api, is_valid_attr_name
 from eland.groupby import GroupByDataFrame
+from eland.ndframe import NDFrame
+from eland.series import Series
+from eland.utils import deprecated_api, is_valid_attr_name
 
 
 class DataFrame(NDFrame):
diff --git a/eland/etl.py b/eland/etl.py
index e24b3967..00e6c0f7 100644
--- a/eland/etl.py
+++ b/eland/etl.py
@@ -16,17 +16,18 @@
 #  under the License.
 
 import csv
-from typing import Generator, Union, List, Tuple, Optional, Mapping, Dict, Any
 from collections import deque
+from typing import Any, Dict, Generator, List, Mapping, Optional, Tuple, Union
+
 import pandas as pd  # type: ignore
+from elasticsearch import Elasticsearch  # type: ignore
+from elasticsearch.helpers import parallel_bulk  # type: ignore
 from pandas.io.parsers import _c_parser_defaults  # type: ignore
 
 from eland import DataFrame
+from eland.common import DEFAULT_CHUNK_SIZE, ensure_es_client
 from eland.field_mappings import FieldMappings, verify_mapping_compatibility
-from eland.common import ensure_es_client, DEFAULT_CHUNK_SIZE
 from eland.utils import deprecated_api
-from elasticsearch import Elasticsearch  # type: ignore
-from elasticsearch.helpers import parallel_bulk  # type: ignore
 
 
 @deprecated_api("eland.DataFrame()")
diff --git a/eland/field_mappings.py b/eland/field_mappings.py
index 56018276..7754d298 100644
--- a/eland/field_mappings.py
+++ b/eland/field_mappings.py
@@ -16,31 +16,32 @@
 #  under the License.
 
 import warnings
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    List,
+    Mapping,
+    NamedTuple,
+    Optional,
+    Set,
+    Tuple,
+)
 
 import numpy as np
 import pandas as pd
 from pandas.core.dtypes.common import (
-    is_float_dtype,
     is_bool_dtype,
-    is_integer_dtype,
     is_datetime_or_timedelta_dtype,
+    is_float_dtype,
+    is_integer_dtype,
     is_string_dtype,
 )
 from pandas.core.dtypes.inference import is_list_like
-from typing import (
-    NamedTuple,
-    Optional,
-    Mapping,
-    Dict,
-    Any,
-    Tuple,
-    TYPE_CHECKING,
-    List,
-    Set,
-)
 
 if TYPE_CHECKING:
     from elasticsearch import Elasticsearch
+
     from eland import DataFrame
 
 
diff --git a/eland/filter.py b/eland/filter.py
index ea77929e..3cc5806d 100644
--- a/eland/filter.py
+++ b/eland/filter.py
@@ -17,7 +17,7 @@
 
 # Originally based on code in MIT-licensed pandasticsearch filters
 
-from typing import Dict, Any, List, Optional, Union, cast
+from typing import Any, Dict, List, Optional, Union, cast
 
 
 class BooleanFilter:
diff --git a/eland/groupby.py b/eland/groupby.py
index b9979dd8..3679a8c1 100644
--- a/eland/groupby.py
+++ b/eland/groupby.py
@@ -15,7 +15,8 @@
 #  specific language governing permissions and limitations
 #  under the License.
 
-from typing import List, TYPE_CHECKING
+from typing import TYPE_CHECKING, List
+
 from eland.query_compiler import QueryCompiler
 
 if TYPE_CHECKING:
diff --git a/eland/index.py b/eland/index.py
index 08588f77..3a130adc 100644
--- a/eland/index.py
+++ b/eland/index.py
@@ -15,7 +15,8 @@
 #  specific language governing permissions and limitations
 #  under the License.
 
-from typing import Optional, TextIO, TYPE_CHECKING
+from typing import TYPE_CHECKING, Optional, TextIO
+
 from eland.utils import deprecated_api
 
 if TYPE_CHECKING:
diff --git a/eland/ml/__init__.py b/eland/ml/__init__.py
index 5326bc32..3b650620 100644
--- a/eland/ml/__init__.py
+++ b/eland/ml/__init__.py
@@ -15,7 +15,7 @@
 #  specific language governing permissions and limitations
 #  under the License.
 
-from eland.ml.ml_model import MLModel, ImportedMLModel
+from eland.ml.ml_model import ImportedMLModel, MLModel
 
 __all__ = [
     "MLModel",
diff --git a/eland/ml/_model_serializer.py b/eland/ml/_model_serializer.py
index 976a85c2..da45a00a 100644
--- a/eland/ml/_model_serializer.py
+++ b/eland/ml/_model_serializer.py
@@ -19,7 +19,7 @@
 import gzip
 import json
 from abc import ABC
-from typing import Sequence, Dict, Any, Optional, List
+from typing import Any, Dict, List, Optional, Sequence
 
 
 def add_if_exists(d: Dict[str, Any], k: str, v: Any) -> None:
diff --git a/eland/ml/ml_model.py b/eland/ml/ml_model.py
index 50d64734..3ca08948 100644
--- a/eland/ml/ml_model.py
+++ b/eland/ml/ml_model.py
@@ -15,31 +15,40 @@
 #  specific language governing permissions and limitations
 #  under the License.
 
-from typing import List, Union, cast, Optional, Dict, TYPE_CHECKING, Any, Tuple
 import warnings
-import numpy as np  # type: ignore
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast
+
 import elasticsearch  # type: ignore
-from .common import TYPE_REGRESSION, TYPE_CLASSIFICATION
-from .transformers import get_model_transformer
+import numpy as np  # type: ignore
+
 from eland.common import ensure_es_client, es_version
 from eland.utils import deprecated_api
 
+from .common import TYPE_CLASSIFICATION, TYPE_REGRESSION
+from .transformers import get_model_transformer
+
 if TYPE_CHECKING:
     from elasticsearch import Elasticsearch  # noqa: F401
 
     # Try importing each ML lib separately so mypy users don't have to
     # have both installed to use type-checking.
     try:
-        from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor  # type: ignore # noqa: F401
-        from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor  # type: ignore # noqa: F401
+        from sklearn.ensemble import (  # type: ignore # noqa: F401
+            RandomForestClassifier,
+            RandomForestRegressor,
+        )
+        from sklearn.tree import (  # type: ignore # noqa: F401
+            DecisionTreeClassifier,
+            DecisionTreeRegressor,
+        )
     except ImportError:
         pass
     try:
-        from xgboost import XGBRegressor, XGBClassifier  # type: ignore # noqa: F401
+        from xgboost import XGBClassifier, XGBRegressor  # type: ignore # noqa: F401
     except ImportError:
         pass
     try:
-        from lightgbm import LGBMRegressor, LGBMClassifier  # type: ignore # noqa: F401
+        from lightgbm import LGBMClassifier, LGBMRegressor  # type: ignore # noqa: F401
     except ImportError:
         pass
 
diff --git a/eland/ml/transformers/__init__.py b/eland/ml/transformers/__init__.py
index 76a72bbf..7c56f39a 100644
--- a/eland/ml/transformers/__init__.py
+++ b/eland/ml/transformers/__init__.py
@@ -17,8 +17,8 @@
 
 import inspect
 from typing import Any, Dict, Type
-from .base import ModelTransformer
 
+from .base import ModelTransformer
 
 __all__ = ["get_model_transformer"]
 _MODEL_TRANSFORMERS: Dict[type, Type[ModelTransformer]] = {}
@@ -42,13 +42,13 @@ def get_model_transformer(model: Any, **kwargs: Any) -> ModelTransformer:
 
 
 try:
+    from .sklearn import _MODEL_TRANSFORMERS as _SKLEARN_MODEL_TRANSFORMERS
     from .sklearn import (
         SKLearnDecisionTreeTransformer,
         SKLearnForestClassifierTransformer,
         SKLearnForestRegressorTransformer,
         SKLearnForestTransformer,
         SKLearnTransformer,
-        _MODEL_TRANSFORMERS as _SKLEARN_MODEL_TRANSFORMERS,
     )
 
     __all__ += [
@@ -63,13 +63,13 @@ def get_model_transformer(model: Any, **kwargs: Any) -> ModelTransformer:
     pass
 
 try:
+    from .xgboost import _MODEL_TRANSFORMERS as _XGBOOST_MODEL_TRANSFORMERS
     from .xgboost import (
-        XGBoostClassifierTransformer,
         XGBClassifier,
+        XGBoostClassifierTransformer,
         XGBoostForestTransformer,
         XGBoostRegressorTransformer,
         XGBRegressor,
-        _MODEL_TRANSFORMERS as _XGBOOST_MODEL_TRANSFORMERS,
     )
 
     __all__ += [
@@ -84,13 +84,13 @@ def get_model_transformer(model: Any, **kwargs: Any) -> ModelTransformer:
     pass
 
 try:
+    from .lightgbm import _MODEL_TRANSFORMERS as _LIGHTGBM_MODEL_TRANSFORMERS
     from .lightgbm import (
-        LGBMRegressor,
         LGBMClassifier,
+        LGBMClassifierTransformer,
         LGBMForestTransformer,
+        LGBMRegressor,
         LGBMRegressorTransformer,
-        LGBMClassifierTransformer,
-        _MODEL_TRANSFORMERS as _LIGHTGBM_MODEL_TRANSFORMERS,
     )
 
     __all__ += [
diff --git a/eland/ml/transformers/base.py b/eland/ml/transformers/base.py
index d251108e..a9f4fdc8 100644
--- a/eland/ml/transformers/base.py
+++ b/eland/ml/transformers/base.py
@@ -15,7 +15,8 @@
 #  specific language governing permissions and limitations
 #  under the License.
 
-from typing import Sequence, Optional, Any
+from typing import Any, Optional, Sequence
+
 from .._model_serializer import ModelSerializer
 
 
diff --git a/eland/ml/transformers/lightgbm.py b/eland/ml/transformers/lightgbm.py
index 218249c8..8e96957e 100644
--- a/eland/ml/transformers/lightgbm.py
+++ b/eland/ml/transformers/lightgbm.py
@@ -15,15 +15,16 @@
 #  specific language governing permissions and limitations
 #  under the License.
 
-from typing import Optional, List, Dict, Any, Type
-from .base import ModelTransformer
+from typing import Any, Dict, List, Optional, Type
+
 from .._model_serializer import Ensemble, Tree, TreeNode
-from ..common import TYPE_CLASSIFICATION, TYPE_REGRESSION
 from .._optional import import_optional_dependency
+from ..common import TYPE_CLASSIFICATION, TYPE_REGRESSION
+from .base import ModelTransformer
 
 import_optional_dependency("lightgbm", on_version="warn")
 
-from lightgbm import Booster, LGBMRegressor, LGBMClassifier  # type: ignore
+from lightgbm import Booster, LGBMClassifier, LGBMRegressor  # type: ignore
 
 
 def transform_decider(decider: str) -> str:
diff --git a/eland/ml/transformers/sklearn.py b/eland/ml/transformers/sklearn.py
index 79c999e4..303f5ec4 100644
--- a/eland/ml/transformers/sklearn.py
+++ b/eland/ml/transformers/sklearn.py
@@ -15,16 +15,21 @@
 #  specific language governing permissions and limitations
 #  under the License.
 
+from typing import Any, Dict, Optional, Sequence, Tuple, Type, Union
+
 import numpy as np  # type: ignore
-from typing import Optional, Sequence, Union, Dict, Any, Type, Tuple
-from .base import ModelTransformer
-from ..common import TYPE_CLASSIFICATION, TYPE_REGRESSION
-from .._optional import import_optional_dependency
+
 from .._model_serializer import Ensemble, Tree, TreeNode
+from .._optional import import_optional_dependency
+from ..common import TYPE_CLASSIFICATION, TYPE_REGRESSION
+from .base import ModelTransformer
 
 import_optional_dependency("sklearn", on_version="warn")
 
-from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor  # type: ignore
+from sklearn.ensemble import (  # type: ignore
+    RandomForestClassifier,
+    RandomForestRegressor,
+)
 from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor  # type: ignore
 from sklearn.utils.validation import check_is_fitted  # type: ignore
 
diff --git a/eland/ml/transformers/xgboost.py b/eland/ml/transformers/xgboost.py
index 4adb4d20..5d4e85ea 100644
--- a/eland/ml/transformers/xgboost.py
+++ b/eland/ml/transformers/xgboost.py
@@ -16,16 +16,18 @@
 #  under the License.
 
 import re
-from typing import Optional, List, Dict, Any, Type
-from .base import ModelTransformer
+from typing import Any, Dict, List, Optional, Type
+
 import pandas as pd  # type: ignore
+
 from .._model_serializer import Ensemble, Tree, TreeNode
-from ..common import TYPE_CLASSIFICATION, TYPE_REGRESSION
 from .._optional import import_optional_dependency
+from ..common import TYPE_CLASSIFICATION, TYPE_REGRESSION
+from .base import ModelTransformer
 
 import_optional_dependency("xgboost", on_version="warn")
 
-from xgboost import Booster, XGBRegressor, XGBClassifier  # type: ignore
+from xgboost import Booster, XGBClassifier, XGBRegressor  # type: ignore
 
 
 class XGBoostForestTransformer(ModelTransformer):
diff --git a/eland/ndframe.py b/eland/ndframe.py
index e55f5576..17ef2886 100644
--- a/eland/ndframe.py
+++ b/eland/ndframe.py
@@ -17,10 +17,11 @@
 
 import sys
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Tuple, Optional
+from typing import TYPE_CHECKING, Optional, Tuple
+
 import pandas as pd
-from eland.query_compiler import QueryCompiler
 
+from eland.query_compiler import QueryCompiler
 
 if TYPE_CHECKING:
     from eland.index import Index
diff --git a/eland/operations.py b/eland/operations.py
index 63fcf899..70251560 100644
--- a/eland/operations.py
+++ b/eland/operations.py
@@ -17,49 +17,49 @@
 
 import copy
 import warnings
+from collections import defaultdict
 from typing import (
+    TYPE_CHECKING,
+    Any,
+    Dict,
     Generator,
+    List,
     Optional,
     Sequence,
     Tuple,
-    List,
-    Dict,
-    Any,
-    TYPE_CHECKING,
     Union,
 )
 
 import numpy as np
 import pandas as pd
-from collections import defaultdict
 from elasticsearch.helpers import scan
 
-from eland.index import Index
+from eland.actions import PostProcessingAction, SortFieldAction
 from eland.common import (
-    SortOrder,
     DEFAULT_CSV_BATCH_OUTPUT_SIZE,
     DEFAULT_ES_MAX_RESULT_WINDOW,
-    elasticsearch_date_to_pandas_date,
-    build_pd_series,
     DEFAULT_PAGINATION_SIZE,
+    SortOrder,
+    build_pd_series,
+    elasticsearch_date_to_pandas_date,
 )
+from eland.index import Index
 from eland.query import Query
-from eland.actions import PostProcessingAction, SortFieldAction
 from eland.tasks import (
-    HeadTask,
     RESOLVED_TASK_TYPE,
-    TailTask,
-    SampleTask,
-    BooleanFilterTask,
     ArithmeticOpFieldsTask,
-    QueryTermsTask,
+    BooleanFilterTask,
+    HeadTask,
     QueryIdsTask,
+    QueryTermsTask,
+    SampleTask,
     SizeTask,
+    TailTask,
 )
 
 if TYPE_CHECKING:
-    from eland.query_compiler import QueryCompiler
     from eland.field_mappings import Field
+    from eland.query_compiler import QueryCompiler
 
 
 class QueryParams:
diff --git a/eland/plotting/__init__.py b/eland/plotting/__init__.py
index 61c16a05..571191f9 100644
--- a/eland/plotting/__init__.py
+++ b/eland/plotting/__init__.py
@@ -22,10 +22,7 @@
 but only supporting a subset of plotting methods (for now).
 """
 
-from eland.plotting._core import (
-    ed_hist_frame,
-    ed_hist_series,
-)
+from eland.plotting._core import ed_hist_frame, ed_hist_series
 
 __all__ = [
     "ed_hist_frame",
diff --git a/eland/plotting/_core.py b/eland/plotting/_core.py
index 26951bb0..2f402fed 100644
--- a/eland/plotting/_core.py
+++ b/eland/plotting/_core.py
@@ -15,7 +15,7 @@
 #  specific language governing permissions and limitations
 #  under the License.
 
-from eland.plotting._matplotlib.hist import hist_series, hist_frame
+from eland.plotting._matplotlib.hist import hist_frame, hist_series
 
 
 def ed_hist_series(
diff --git a/eland/plotting/_matplotlib/__init__.py b/eland/plotting/_matplotlib/__init__.py
index 3af30269..15d74ec8 100644
--- a/eland/plotting/_matplotlib/__init__.py
+++ b/eland/plotting/_matplotlib/__init__.py
@@ -22,10 +22,7 @@
 but only supporting a subset of plotting methods (for now).
 """
 
-from eland.plotting._matplotlib.hist import (
-    hist_frame,
-    hist_series,
-)
+from eland.plotting._matplotlib.hist import hist_frame, hist_series
 
 __all__ = [
     "hist_frame",
diff --git a/eland/plotting/_matplotlib/hist.py b/eland/plotting/_matplotlib/hist.py
index 197d9b95..f3b33419 100644
--- a/eland/plotting/_matplotlib/hist.py
+++ b/eland/plotting/_matplotlib/hist.py
@@ -19,6 +19,7 @@
 from pandas.core.dtypes.generic import ABCIndexClass
 from pandas.plotting._matplotlib import converter
 from pandas.plotting._matplotlib.tools import _flatten, _set_ticks_props, _subplots
+
 from eland.utils import try_sort
 
 
diff --git a/eland/query.py b/eland/query.py
index 3c2ad1b5..8d55fa25 100644
--- a/eland/query.py
+++ b/eland/query.py
@@ -17,16 +17,9 @@
 
 import warnings
 from copy import deepcopy
-from typing import Optional, Dict, List, Any
-
-from eland.filter import (
-    RandomScoreFilter,
-    BooleanFilter,
-    NotNull,
-    IsNull,
-    IsIn,
-    Rlike,
-)
+from typing import Any, Dict, List, Optional
+
+from eland.filter import BooleanFilter, IsIn, IsNull, NotNull, RandomScoreFilter, Rlike
 
 
 class Query:
diff --git a/eland/query_compiler.py b/eland/query_compiler.py
index f1a4240d..956e402d 100644
--- a/eland/query_compiler.py
+++ b/eland/query_compiler.py
@@ -17,20 +17,20 @@
 
 import copy
 from datetime import datetime
-from typing import Optional, Sequence, TYPE_CHECKING, List
+from typing import TYPE_CHECKING, List, Optional, Sequence
 
 import numpy as np  # type: ignore
 import pandas as pd  # type: ignore
 
-from eland.field_mappings import FieldMappings
-from eland.filter import QueryFilter
-from eland.operations import Operations
-from eland.index import Index
 from eland.common import (
-    ensure_es_client,
     DEFAULT_PROGRESS_REPORTING_NUM_ROWS,
     elasticsearch_date_to_pandas_date,
+    ensure_es_client,
 )
+from eland.field_mappings import FieldMappings
+from eland.filter import QueryFilter
+from eland.index import Index
+from eland.operations import Operations
 
 if TYPE_CHECKING:
     from .tasks import ArithmeticOpFieldsTask  # noqa: F401
diff --git a/eland/series.py b/eland/series.py
index fcf21738..3004693b 100644
--- a/eland/series.py
+++ b/eland/series.py
@@ -35,33 +35,34 @@
 import warnings
 from collections.abc import Collection
 from io import StringIO
-from typing import Optional, Union, Sequence, Any, Tuple, TYPE_CHECKING
+from typing import TYPE_CHECKING, Any, Optional, Sequence, Tuple, Union
 
 import numpy as np
 import pandas as pd
 from pandas.io.common import _expand_user, stringify_path
 
 import eland.plotting
-from eland import NDFrame
-from eland.arithmetics import ArithmeticSeries, ArithmeticString, ArithmeticNumber
+from eland.arithmetics import ArithmeticNumber, ArithmeticSeries, ArithmeticString
 from eland.common import DEFAULT_NUM_ROWS_DISPLAYED, docstring_parameter
 from eland.filter import (
     BooleanFilter,
-    NotFilter,
     Equal,
     Greater,
-    Less,
     GreaterEqual,
-    LessEqual,
-    ScriptFilter,
     IsIn,
     IsNull,
+    Less,
+    LessEqual,
+    NotFilter,
     NotNull,
+    ScriptFilter,
 )
+from eland.ndframe import NDFrame
 from eland.utils import deprecated_api, to_list
 
 if TYPE_CHECKING:  # type: ignore
     from elasticsearch import Elasticsearch  # noqa: F401
+
     from eland.query_compiler import QueryCompiler  # noqa: F401
 
 
diff --git a/eland/tasks.py b/eland/tasks.py
index f2b5b397..fff7ec02 100644
--- a/eland/tasks.py
+++ b/eland/tasks.py
@@ -16,18 +16,18 @@
 #  under the License.
 
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, List, Any, Tuple
+from typing import TYPE_CHECKING, Any, List, Tuple
 
 from eland import SortOrder
-from eland.actions import HeadAction, TailAction, SortIndexAction
+from eland.actions import HeadAction, SortIndexAction, TailAction
 from eland.arithmetics import ArithmeticSeries
 
 if TYPE_CHECKING:
     from .actions import PostProcessingAction  # noqa: F401
     from .filter import BooleanFilter  # noqa: F401
-    from .query_compiler import QueryCompiler  # noqa: F401
-    from .operations import QueryParams  # noqa: F401
     from .index import Index  # noqa: F401
+    from .operations import QueryParams  # noqa: F401
+    from .query_compiler import QueryCompiler  # noqa: F401
 
 RESOLVED_TASK_TYPE = Tuple["QueryParams", List["PostProcessingAction"]]
 
diff --git a/eland/tests/__init__.py b/eland/tests/__init__.py
index eb0b155a..7b0ba966 100644
--- a/eland/tests/__init__.py
+++ b/eland/tests/__init__.py
@@ -19,6 +19,7 @@
 
 import pandas as pd
 from elasticsearch import Elasticsearch
+
 from eland.common import es_version
 
 ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
diff --git a/eland/tests/common.py b/eland/tests/common.py
index 8bc4e860..21553cea 100644
--- a/eland/tests/common.py
+++ b/eland/tests/common.py
@@ -26,12 +26,12 @@
 
 # Create pandas and eland data frames
 from eland.tests import (
+    ECOMMERCE_DF_FILE_NAME,
+    ECOMMERCE_INDEX_NAME,
     ES_TEST_CLIENT,
     FLIGHTS_DF_FILE_NAME,
     FLIGHTS_INDEX_NAME,
     FLIGHTS_SMALL_INDEX_NAME,
-    ECOMMERCE_DF_FILE_NAME,
-    ECOMMERCE_INDEX_NAME,
 )
 
 _pd_flights = pd.read_json(FLIGHTS_DF_FILE_NAME).sort_index()
diff --git a/eland/tests/conftest.py b/eland/tests/conftest.py
index 525862c5..680942f1 100644
--- a/eland/tests/conftest.py
+++ b/eland/tests/conftest.py
@@ -16,22 +16,25 @@
 #  under the License.
 
 import inspect
-import pytest
+
 import pandas as pd
+import pytest
+
+import eland as ed
+
 from .common import (
-    assert_pandas_eland_frame_equal,
-    assert_pandas_eland_series_equal,
-    assert_frame_equal,
-    assert_series_equal,
-    _ed_flights,
-    _pd_flights,
+    TestData,
     _ed_ecommerce,
-    _pd_ecommerce,
+    _ed_flights,
     _ed_flights_small,
+    _pd_ecommerce,
+    _pd_flights,
     _pd_flights_small,
-    TestData,
+    assert_frame_equal,
+    assert_pandas_eland_frame_equal,
+    assert_pandas_eland_series_equal,
+    assert_series_equal,
 )
-import eland as ed
 
 
 class SymmetricAPIChecker:
diff --git a/eland/tests/dataframe/test_aggs_pytest.py b/eland/tests/dataframe/test_aggs_pytest.py
index e483f471..54b05a23 100644
--- a/eland/tests/dataframe/test_aggs_pytest.py
+++ b/eland/tests/dataframe/test_aggs_pytest.py
@@ -18,8 +18,9 @@
 # File called _pytest for PyCharm compatability
 
 import numpy as np
-from pandas.testing import assert_frame_equal, assert_series_equal
 import pytest
+from pandas.testing import assert_frame_equal, assert_series_equal
+
 from eland.tests.common import TestData
 
 
diff --git a/eland/tests/dataframe/test_big_mapping_pytest.py b/eland/tests/dataframe/test_big_mapping_pytest.py
index de268f91..425365ce 100644
--- a/eland/tests/dataframe/test_big_mapping_pytest.py
+++ b/eland/tests/dataframe/test_big_mapping_pytest.py
@@ -18,8 +18,7 @@
 # File called _pytest for PyCharm compatability
 
 import eland as ed
-from eland.tests.common import ES_TEST_CLIENT
-from eland.tests.common import TestData
+from eland.tests.common import ES_TEST_CLIENT, TestData
 
 
 class TestDataFrameBigMapping(TestData):
diff --git a/eland/tests/dataframe/test_datetime_pytest.py b/eland/tests/dataframe/test_datetime_pytest.py
index b523e563..99ae51ab 100644
--- a/eland/tests/dataframe/test_datetime_pytest.py
+++ b/eland/tests/dataframe/test_datetime_pytest.py
@@ -24,10 +24,12 @@
 
 import eland as ed
 from eland.field_mappings import FieldMappings
-from eland.tests.common import ES_TEST_CLIENT
-from eland.tests.common import TestData
-from eland.tests.common import assert_pandas_eland_frame_equal
-from eland.tests.common import assert_pandas_eland_series_equal
+from eland.tests.common import (
+    ES_TEST_CLIENT,
+    TestData,
+    assert_pandas_eland_frame_equal,
+    assert_pandas_eland_series_equal,
+)
 
 
 class TestDataFrameDateTime(TestData):
diff --git a/eland/tests/dataframe/test_dtypes_pytest.py b/eland/tests/dataframe/test_dtypes_pytest.py
index fd0630cf..729d9634 100644
--- a/eland/tests/dataframe/test_dtypes_pytest.py
+++ b/eland/tests/dataframe/test_dtypes_pytest.py
@@ -19,6 +19,7 @@
 
 import numpy as np
 import pandas as pd
+
 from eland.tests.common import assert_series_equal
 
 
diff --git a/eland/tests/dataframe/test_es_query_pytest.py b/eland/tests/dataframe/test_es_query_pytest.py
index 95e24dd8..fe9429a8 100644
--- a/eland/tests/dataframe/test_es_query_pytest.py
+++ b/eland/tests/dataframe/test_es_query_pytest.py
@@ -18,8 +18,8 @@
 # File called _pytest for PyCharm compatability
 
 import pytest
-from eland.tests.common import TestData
-from eland.tests.common import assert_eland_frame_equal
+
+from eland.tests.common import TestData, assert_eland_frame_equal
 
 
 class TestDataEsQuery(TestData):
diff --git a/eland/tests/dataframe/test_filter_pytest.py b/eland/tests/dataframe/test_filter_pytest.py
index 9524e17a..9fdc7a27 100644
--- a/eland/tests/dataframe/test_filter_pytest.py
+++ b/eland/tests/dataframe/test_filter_pytest.py
@@ -18,6 +18,7 @@
 # File called _pytest for PyCharm compatability
 
 import pytest
+
 from eland.tests.common import TestData
 
 
diff --git a/eland/tests/dataframe/test_groupby_pytest.py b/eland/tests/dataframe/test_groupby_pytest.py
index 3cdd48a7..3ae95a01 100644
--- a/eland/tests/dataframe/test_groupby_pytest.py
+++ b/eland/tests/dataframe/test_groupby_pytest.py
@@ -17,10 +17,11 @@
 
 # File called _pytest for PyCharm compatability
 
+import pandas as pd
 import pytest
 from pandas.testing import assert_frame_equal, assert_series_equal
+
 from eland.tests.common import TestData
-import pandas as pd
 
 
 class TestGroupbyDataFrame(TestData):
diff --git a/eland/tests/dataframe/test_head_tail_pytest.py b/eland/tests/dataframe/test_head_tail_pytest.py
index 829159b1..ec1f0a42 100644
--- a/eland/tests/dataframe/test_head_tail_pytest.py
+++ b/eland/tests/dataframe/test_head_tail_pytest.py
@@ -17,8 +17,7 @@
 
 # File called _pytest for PyCharm compatability
 
-from eland.tests.common import TestData
-from eland.tests.common import assert_pandas_eland_frame_equal
+from eland.tests.common import TestData, assert_pandas_eland_frame_equal
 
 
 class TestDataFrameHeadTail(TestData):
diff --git a/eland/tests/dataframe/test_info_pytest.py b/eland/tests/dataframe/test_info_pytest.py
index b7151026..6b38a0a9 100644
--- a/eland/tests/dataframe/test_info_pytest.py
+++ b/eland/tests/dataframe/test_info_pytest.py
@@ -19,9 +19,7 @@
 from io import StringIO
 
 import eland as ed
-
 from eland.tests import ES_TEST_CLIENT
-
 from eland.tests.common import TestData
 
 
diff --git a/eland/tests/dataframe/test_init_pytest.py b/eland/tests/dataframe/test_init_pytest.py
index b8c5aa12..e33dd6bf 100644
--- a/eland/tests/dataframe/test_init_pytest.py
+++ b/eland/tests/dataframe/test_init_pytest.py
@@ -21,8 +21,7 @@
 
 import eland as ed
 from eland.query_compiler import QueryCompiler
-from eland.tests import ES_TEST_CLIENT
-from eland.tests import FLIGHTS_INDEX_NAME
+from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
 
 
 class TestDataFrameInit:
diff --git a/eland/tests/dataframe/test_metrics_pytest.py b/eland/tests/dataframe/test_metrics_pytest.py
index dbef894e..d3d57851 100644
--- a/eland/tests/dataframe/test_metrics_pytest.py
+++ b/eland/tests/dataframe/test_metrics_pytest.py
@@ -15,11 +15,13 @@
 #  specific language governing permissions and limitations
 #  under the License.
 
-# File called _pytest for PyCharm compatibility
-import pytest
 import numpy as np
 import pandas as pd
+
+# File called _pytest for PyCharm compatibility
+import pytest
 from pandas.testing import assert_series_equal
+
 from eland.tests.common import TestData
 
 
diff --git a/eland/tests/dataframe/test_query_pytest.py b/eland/tests/dataframe/test_query_pytest.py
index 97a9eebd..321f351c 100644
--- a/eland/tests/dataframe/test_query_pytest.py
+++ b/eland/tests/dataframe/test_query_pytest.py
@@ -20,9 +20,7 @@
 import pandas as pd
 
 import eland as ed
-from eland.tests.common import ES_TEST_CLIENT
-from eland.tests.common import TestData
-from eland.tests.common import assert_pandas_eland_frame_equal
+from eland.tests.common import ES_TEST_CLIENT, TestData, assert_pandas_eland_frame_equal
 
 
 class TestDataFrameQuery(TestData):
diff --git a/eland/tests/dataframe/test_sample_pytest.py b/eland/tests/dataframe/test_sample_pytest.py
index 9a4a5757..33451261 100644
--- a/eland/tests/dataframe/test_sample_pytest.py
+++ b/eland/tests/dataframe/test_sample_pytest.py
@@ -19,8 +19,8 @@
 import pytest
 from pandas.testing import assert_frame_equal
 
-from eland.tests.common import TestData
 from eland import eland_to_pandas
+from eland.tests.common import TestData
 
 
 class TestDataFrameSample(TestData):
diff --git a/eland/tests/dataframe/test_select_dtypes_pytest.py b/eland/tests/dataframe/test_select_dtypes_pytest.py
index d00ae178..738bf3bd 100644
--- a/eland/tests/dataframe/test_select_dtypes_pytest.py
+++ b/eland/tests/dataframe/test_select_dtypes_pytest.py
@@ -18,8 +18,7 @@
 # File called _pytest for PyCharm compatability
 import numpy as np
 
-from eland.tests.common import TestData
-from eland.tests.common import assert_pandas_eland_frame_equal
+from eland.tests.common import TestData, assert_pandas_eland_frame_equal
 
 
 class TestDataFrameSelectDTypes(TestData):
diff --git a/eland/tests/dataframe/test_to_csv_pytest.py b/eland/tests/dataframe/test_to_csv_pytest.py
index 5ca50482..bb9560f4 100644
--- a/eland/tests/dataframe/test_to_csv_pytest.py
+++ b/eland/tests/dataframe/test_to_csv_pytest.py
@@ -24,10 +24,8 @@
 from pandas.testing import assert_frame_equal
 
 import eland as ed
-from eland.tests import ES_TEST_CLIENT
-from eland.tests import FLIGHTS_INDEX_NAME
-from eland.tests.common import ROOT_DIR
-from eland.tests.common import TestData
+from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
+from eland.tests.common import ROOT_DIR, TestData
 
 
 class TestDataFrameToCSV(TestData):
diff --git a/eland/tests/dataframe/test_utils_pytest.py b/eland/tests/dataframe/test_utils_pytest.py
index fa5985ae..e1b57c33 100644
--- a/eland/tests/dataframe/test_utils_pytest.py
+++ b/eland/tests/dataframe/test_utils_pytest.py
@@ -22,8 +22,7 @@
 
 import eland as ed
 from eland.field_mappings import FieldMappings
-from eland.tests.common import ES_TEST_CLIENT, assert_pandas_eland_frame_equal
-from eland.tests.common import TestData
+from eland.tests.common import ES_TEST_CLIENT, TestData, assert_pandas_eland_frame_equal
 
 
 class TestDataFrameUtils(TestData):
diff --git a/eland/tests/etl/test_pandas_to_eland.py b/eland/tests/etl/test_pandas_to_eland.py
index 2ffb01ba..e4a054ab 100644
--- a/eland/tests/etl/test_pandas_to_eland.py
+++ b/eland/tests/etl/test_pandas_to_eland.py
@@ -16,10 +16,12 @@
 #  under the License.
 
 from datetime import datetime, timedelta
-import pytest
+
 import pandas as pd
+import pytest
 from elasticsearch.helpers import BulkIndexError
-from eland import pandas_to_eland, DataFrame
+
+from eland import DataFrame, pandas_to_eland
 from eland.tests.common import (
     ES_TEST_CLIENT,
     assert_frame_equal,
diff --git a/eland/tests/field_mappings/test_aggregatables_pytest.py b/eland/tests/field_mappings/test_aggregatables_pytest.py
index 2f4a62f8..c6af0a30 100644
--- a/eland/tests/field_mappings/test_aggregatables_pytest.py
+++ b/eland/tests/field_mappings/test_aggregatables_pytest.py
@@ -19,7 +19,7 @@
 import pytest
 
 from eland.field_mappings import FieldMappings
-from eland.tests import ES_TEST_CLIENT, ECOMMERCE_INDEX_NAME
+from eland.tests import ECOMMERCE_INDEX_NAME, ES_TEST_CLIENT
 from eland.tests.common import TestData
 
 
diff --git a/eland/tests/field_mappings/test_datetime_pytest.py b/eland/tests/field_mappings/test_datetime_pytest.py
index 4ce575aa..447381e5 100644
--- a/eland/tests/field_mappings/test_datetime_pytest.py
+++ b/eland/tests/field_mappings/test_datetime_pytest.py
@@ -19,8 +19,7 @@
 from datetime import datetime
 
 from eland.field_mappings import FieldMappings
-from eland.tests.common import ES_TEST_CLIENT
-from eland.tests.common import TestData
+from eland.tests.common import ES_TEST_CLIENT, TestData
 
 
 class TestDateTime(TestData):
diff --git a/eland/tests/field_mappings/test_field_name_pd_dtype_pytest.py b/eland/tests/field_mappings/test_field_name_pd_dtype_pytest.py
index 30bdc346..94efe2cd 100644
--- a/eland/tests/field_mappings/test_field_name_pd_dtype_pytest.py
+++ b/eland/tests/field_mappings/test_field_name_pd_dtype_pytest.py
@@ -21,8 +21,7 @@
 
 from eland.field_mappings import FieldMappings
 from eland.tests import FLIGHTS_INDEX_NAME, FLIGHTS_MAPPING
-from eland.tests.common import ES_TEST_CLIENT
-from eland.tests.common import TestData
+from eland.tests.common import ES_TEST_CLIENT, TestData
 
 
 class TestFieldNamePDDType(TestData):
diff --git a/eland/tests/field_mappings/test_get_field_names_pytest.py b/eland/tests/field_mappings/test_get_field_names_pytest.py
index c56ccda8..59bdd385 100644
--- a/eland/tests/field_mappings/test_get_field_names_pytest.py
+++ b/eland/tests/field_mappings/test_get_field_names_pytest.py
@@ -21,7 +21,7 @@
 
 # File called _pytest for PyCharm compatability
 from eland.field_mappings import FieldMappings
-from eland.tests import FLIGHTS_INDEX_NAME, ES_TEST_CLIENT
+from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
 from eland.tests.common import TestData
 
 
diff --git a/eland/tests/field_mappings/test_metric_source_fields_pytest.py b/eland/tests/field_mappings/test_metric_source_fields_pytest.py
index 6cf8001f..9c4e2c94 100644
--- a/eland/tests/field_mappings/test_metric_source_fields_pytest.py
+++ b/eland/tests/field_mappings/test_metric_source_fields_pytest.py
@@ -20,7 +20,7 @@
 import numpy as np
 
 from eland.field_mappings import FieldMappings
-from eland.tests import ES_TEST_CLIENT, ECOMMERCE_INDEX_NAME, FLIGHTS_INDEX_NAME
+from eland.tests import ECOMMERCE_INDEX_NAME, ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
 from eland.tests.common import TestData
 
 
diff --git a/eland/tests/field_mappings/test_scripted_fields_pytest.py b/eland/tests/field_mappings/test_scripted_fields_pytest.py
index f2f276ad..0e3d4f99 100644
--- a/eland/tests/field_mappings/test_scripted_fields_pytest.py
+++ b/eland/tests/field_mappings/test_scripted_fields_pytest.py
@@ -21,7 +21,7 @@
 import numpy as np
 
 from eland.field_mappings import FieldMappings
-from eland.tests import FLIGHTS_INDEX_NAME, ES_TEST_CLIENT
+from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
 from eland.tests.common import TestData
 
 
diff --git a/eland/tests/ml/test_imported_ml_model_pytest.py b/eland/tests/ml/test_imported_ml_model_pytest.py
index d9ca4a4a..9e8445d4 100644
--- a/eland/tests/ml/test_imported_ml_model_pytest.py
+++ b/eland/tests/ml/test_imported_ml_model_pytest.py
@@ -15,16 +15,15 @@
 #  specific language governing permissions and limitations
 #  under the License.
 
-import pytest
 import numpy as np
+import pytest
 
 from eland.ml import MLModel
 from eland.tests import ES_TEST_CLIENT, ES_VERSION
 
-
 try:
     from sklearn import datasets
-    from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
+    from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
     from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
 
     HAS_SKLEARN = True
@@ -32,14 +31,14 @@
     HAS_SKLEARN = False
 
 try:
-    from xgboost import XGBRegressor, XGBClassifier
+    from xgboost import XGBClassifier, XGBRegressor
 
     HAS_XGBOOST = True
 except ImportError:
     HAS_XGBOOST = False
 
 try:
-    from lightgbm import LGBMRegressor, LGBMClassifier
+    from lightgbm import LGBMClassifier, LGBMRegressor
 
     HAS_LIGHTGBM = True
 except ImportError:
diff --git a/eland/tests/operators/test_operators_pytest.py b/eland/tests/operators/test_operators_pytest.py
index 0b7cb616..7fe602ca 100644
--- a/eland/tests/operators/test_operators_pytest.py
+++ b/eland/tests/operators/test_operators_pytest.py
@@ -16,18 +16,18 @@
 #  under the License.
 
 from eland.filter import (
+    Equal,
     Greater,
     GreaterEqual,
-    Less,
-    LessEqual,
-    Equal,
     IsIn,
     IsNull,
+    Less,
+    LessEqual,
     Like,
-    Rlike,
-    Startswith,
     NotNull,
+    Rlike,
     ScriptFilter,
+    Startswith,
 )
 
 
diff --git a/eland/tests/series/test_describe_pytest.py b/eland/tests/series/test_describe_pytest.py
index 2f6f6ec7..2f255ebf 100644
--- a/eland/tests/series/test_describe_pytest.py
+++ b/eland/tests/series/test_describe_pytest.py
@@ -16,6 +16,7 @@
 #  under the License.
 
 import pandas as pd
+
 from eland.tests.common import TestData, assert_series_equal
 
 
diff --git a/eland/tests/series/test_dtype_pytest.py b/eland/tests/series/test_dtype_pytest.py
index eb00c989..4c45531d 100644
--- a/eland/tests/series/test_dtype_pytest.py
+++ b/eland/tests/series/test_dtype_pytest.py
@@ -15,10 +15,12 @@
 #  specific language governing permissions and limitations
 #  under the License.
 
+import warnings
+
 import numpy as np
 import pandas as pd
-import warnings
-from eland.common import build_pd_series, EMPTY_SERIES_DTYPE
+
+from eland.common import EMPTY_SERIES_DTYPE, build_pd_series
 from eland.tests.common import assert_series_equal
 
 
diff --git a/eland/tests/series/test_filter_pytest.py b/eland/tests/series/test_filter_pytest.py
index 052ef9a8..9da03c7c 100644
--- a/eland/tests/series/test_filter_pytest.py
+++ b/eland/tests/series/test_filter_pytest.py
@@ -18,8 +18,8 @@
 # File called _pytest for PyCharm compatability
 
 import pytest
-from eland.tests.common import TestData
-from eland.tests.common import assert_pandas_eland_series_equal
+
+from eland.tests.common import TestData, assert_pandas_eland_series_equal
 
 
 class TestSeriesFilter(TestData):
diff --git a/eland/tests/series/test_head_tail_pytest.py b/eland/tests/series/test_head_tail_pytest.py
index 49cb83c9..93c1d3f5 100644
--- a/eland/tests/series/test_head_tail_pytest.py
+++ b/eland/tests/series/test_head_tail_pytest.py
@@ -17,10 +17,8 @@
 
 # File called _pytest for PyCharm compatability
 import eland as ed
-from eland.tests import ES_TEST_CLIENT
-from eland.tests import FLIGHTS_INDEX_NAME
-from eland.tests.common import TestData
-from eland.tests.common import assert_pandas_eland_series_equal
+from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
+from eland.tests.common import TestData, assert_pandas_eland_series_equal
 
 
 class TestSeriesHeadTail(TestData):
diff --git a/eland/tests/series/test_metrics_pytest.py b/eland/tests/series/test_metrics_pytest.py
index d8e213d2..01ce34a8 100644
--- a/eland/tests/series/test_metrics_pytest.py
+++ b/eland/tests/series/test_metrics_pytest.py
@@ -17,11 +17,12 @@
 
 # File called _pytest for PyCharm compatability
 
-import pytest
-import pandas as pd
-import numpy as np
 from datetime import timedelta
 
+import numpy as np
+import pandas as pd
+import pytest
+
 from eland.tests.common import TestData
 
 
diff --git a/eland/tests/series/test_na_pytest.py b/eland/tests/series/test_na_pytest.py
index d9267b92..a11c91c2 100644
--- a/eland/tests/series/test_na_pytest.py
+++ b/eland/tests/series/test_na_pytest.py
@@ -16,8 +16,7 @@
 #  under the License.
 
 from eland import eland_to_pandas
-from eland.tests.common import TestData
-from eland.tests.common import assert_pandas_eland_frame_equal
+from eland.tests.common import TestData, assert_pandas_eland_frame_equal
 
 
 class TestSeriesNA(TestData):
diff --git a/eland/tests/series/test_name_pytest.py b/eland/tests/series/test_name_pytest.py
index 37209165..619aa9c3 100644
--- a/eland/tests/series/test_name_pytest.py
+++ b/eland/tests/series/test_name_pytest.py
@@ -17,10 +17,8 @@
 
 # File called _pytest for PyCharm compatability
 import eland as ed
-from eland.tests import ES_TEST_CLIENT
-from eland.tests import FLIGHTS_INDEX_NAME
-from eland.tests.common import TestData
-from eland.tests.common import assert_pandas_eland_series_equal
+from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
+from eland.tests.common import TestData, assert_pandas_eland_series_equal
 
 
 class TestSeriesName(TestData):
diff --git a/eland/tests/series/test_rename_pytest.py b/eland/tests/series/test_rename_pytest.py
index ae970c05..b99b9bb6 100644
--- a/eland/tests/series/test_rename_pytest.py
+++ b/eland/tests/series/test_rename_pytest.py
@@ -17,10 +17,8 @@
 
 # File called _pytest for PyCharm compatability
 import eland as ed
-from eland.tests import ES_TEST_CLIENT
-from eland.tests import FLIGHTS_INDEX_NAME
-from eland.tests.common import TestData
-from eland.tests.common import assert_pandas_eland_series_equal
+from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
+from eland.tests.common import TestData, assert_pandas_eland_series_equal
 
 
 class TestSeriesRename(TestData):
diff --git a/eland/tests/series/test_repr_pytest.py b/eland/tests/series/test_repr_pytest.py
index f1da73d7..61c103c5 100644
--- a/eland/tests/series/test_repr_pytest.py
+++ b/eland/tests/series/test_repr_pytest.py
@@ -17,8 +17,7 @@
 
 # File called _pytest for PyCharm compatability
 import eland as ed
-from eland.tests import ES_TEST_CLIENT
-from eland.tests import FLIGHTS_INDEX_NAME
+from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
 from eland.tests.common import TestData
 
 
diff --git a/eland/tests/series/test_sample_pytest.py b/eland/tests/series/test_sample_pytest.py
index 8de43e38..184e4b3f 100644
--- a/eland/tests/series/test_sample_pytest.py
+++ b/eland/tests/series/test_sample_pytest.py
@@ -17,10 +17,8 @@
 
 # File called _pytest for PyCharm compatibility
 import eland as ed
-from eland.tests import ES_TEST_CLIENT
-from eland.tests import FLIGHTS_INDEX_NAME
-from eland.tests.common import TestData
-from eland.tests.common import assert_pandas_eland_series_equal
+from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
+from eland.tests.common import TestData, assert_pandas_eland_series_equal
 
 
 class TestSeriesSample(TestData):
diff --git a/eland/tests/setup_tests.py b/eland/tests/setup_tests.py
index b858c645..a5075237 100644
--- a/eland/tests/setup_tests.py
+++ b/eland/tests/setup_tests.py
@@ -18,25 +18,24 @@
 import pandas as pd
 from elasticsearch import helpers
 
+from eland.common import es_version
 from eland.tests import (
+    ECOMMERCE_FILE_NAME,
+    ECOMMERCE_INDEX_NAME,
+    ECOMMERCE_MAPPING,
+    ELASTICSEARCH_HOST,
+    ES_TEST_CLIENT,
     FLIGHTS_FILE_NAME,
     FLIGHTS_INDEX_NAME,
+    FLIGHTS_MAPPING,
     FLIGHTS_SMALL_FILE_NAME,
     FLIGHTS_SMALL_INDEX_NAME,
-    FLIGHTS_MAPPING,
-    ECOMMERCE_FILE_NAME,
-    ECOMMERCE_INDEX_NAME,
-    ECOMMERCE_MAPPING,
     TEST_MAPPING1,
     TEST_MAPPING1_INDEX_NAME,
     TEST_NESTED_USER_GROUP_DOCS,
     TEST_NESTED_USER_GROUP_INDEX_NAME,
     TEST_NESTED_USER_GROUP_MAPPING,
-    ES_TEST_CLIENT,
-    ELASTICSEARCH_HOST,
 )
-from eland.common import es_version
-
 
 DATA_LIST = [
     (FLIGHTS_FILE_NAME, FLIGHTS_INDEX_NAME, FLIGHTS_MAPPING),
diff --git a/eland/utils.py b/eland/utils.py
index 66bf5862..de63e19e 100644
--- a/eland/utils.py
+++ b/eland/utils.py
@@ -15,13 +15,13 @@
 #  specific language governing permissions and limitations
 #  under the License.
 
-import re
 import functools
+import re
 import warnings
-from typing import Callable, TypeVar, Any, Union, List, cast, Collection, Iterable
 from collections.abc import Collection as ABCCollection
-import pandas as pd  # type: ignore
+from typing import Any, Callable, Collection, Iterable, List, TypeVar, Union, cast
 
+import pandas as pd  # type: ignore
 
 RT = TypeVar("RT")
 
diff --git a/noxfile.py b/noxfile.py
index fe8ec113..52517f53 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -18,9 +18,9 @@
 import os
 import subprocess
 from pathlib import Path
-import nox
-import elasticsearch
 
+import elasticsearch
+import nox
 
 BASE_DIR = Path(__file__).parent
 SOURCE_FILES = (
@@ -57,18 +57,20 @@
 
 
 @nox.session(reuse_venv=True)
-def blacken(session):
-    session.install("black")
+def format(session):
+    session.install("black", "isort")
     session.run("python", "utils/license-headers.py", "fix", *SOURCE_FILES)
     session.run("black", "--target-version=py36", *SOURCE_FILES)
+    session.run("isort", *SOURCE_FILES)
     lint(session)
 
 
 @nox.session(reuse_venv=True)
 def lint(session):
-    session.install("black", "flake8", "mypy")
+    session.install("black", "flake8", "mypy", "isort")
     session.run("python", "utils/license-headers.py", "check", *SOURCE_FILES)
     session.run("black", "--check", "--target-version=py36", *SOURCE_FILES)
+    session.run("isort", "--check", *SOURCE_FILES)
     session.run("flake8", "--ignore=E501,W503,E402,E712,E203", *SOURCE_FILES)
 
     # TODO: When all files are typed we can change this to .run("mypy", "--strict", "eland/")
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 00000000..c76db01f
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,2 @@
+[isort]
+profile = black
diff --git a/setup.py b/setup.py
index 450a7be4..8b36258a 100644
--- a/setup.py
+++ b/setup.py
@@ -20,7 +20,7 @@
 from codecs import open
 from os import path
 
-from setuptools import setup, find_packages
+from setuptools import find_packages, setup
 
 here = path.abspath(path.dirname(__file__))
 about = {}
diff --git a/utils/generate-supported-apis.py b/utils/generate-supported-apis.py
index 58020da5..ba4b939a 100644
--- a/utils/generate-supported-apis.py
+++ b/utils/generate-supported-apis.py
@@ -17,12 +17,13 @@
 
 """Script that is used to create the compatibility matrix in the documentation"""
 
-import re
-import eland
-import pandas
 import inspect
+import re
 from pathlib import Path
 
+import pandas
+
+import eland
 
 api_docs_dir = Path(__file__).absolute().parent.parent / "docs/source/reference/api"
 is_supported = []
diff --git a/utils/license-headers.py b/utils/license-headers.py
index 993334d9..d501c85e 100644
--- a/utils/license-headers.py
+++ b/utils/license-headers.py
@@ -23,9 +23,8 @@
 
 import os
 import sys
-from typing import List, Iterator
 from itertools import chain
-
+from typing import Iterator, List
 
 lines_to_keep = ["# -*- coding: utf-8 -*-\n", "#!/usr/bin/env python\n"]
 license_header_lines = [