From 45ee495efb9c0cc1b105888a64cf103b5b21721f Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Thu, 15 Oct 2020 16:22:15 -0500 Subject: [PATCH 1/2] Document DataFrame.groupby() and rename Field.index -> .column --- .../reference/api/eland.DataFrame.groupby.rst | 6 ++ docs/source/reference/dataframe.rst | 1 + eland/dataframe.py | 11 +-- eland/field_mappings.py | 24 +++--- eland/groupby.py | 28 +++--- eland/operations.py | 85 +++++++++++-------- eland/query.py | 56 ++++++++---- eland/query_compiler.py | 13 ++- eland/tests/dataframe/test_groupby_pytest.py | 52 +++++++++--- 9 files changed, 173 insertions(+), 103 deletions(-) create mode 100644 docs/source/reference/api/eland.DataFrame.groupby.rst diff --git a/docs/source/reference/api/eland.DataFrame.groupby.rst b/docs/source/reference/api/eland.DataFrame.groupby.rst new file mode 100644 index 00000000..e31ce1d3 --- /dev/null +++ b/docs/source/reference/api/eland.DataFrame.groupby.rst @@ -0,0 +1,6 @@ +eland.DataFrame.groupby +======================= + +.. currentmodule:: eland + +.. automethod:: DataFrame.groupby diff --git a/docs/source/reference/dataframe.rst b/docs/source/reference/dataframe.rst index ffd9f32e..cc05497c 100644 --- a/docs/source/reference/dataframe.rst +++ b/docs/source/reference/dataframe.rst @@ -46,6 +46,7 @@ Function Application, GroupBy & Window DataFrame.agg DataFrame.aggregate + DataFrame.groupby .. _api.dataframe.stats: diff --git a/eland/dataframe.py b/eland/dataframe.py index 98293961..5eea8762 100644 --- a/eland/dataframe.py +++ b/eland/dataframe.py @@ -1442,13 +1442,10 @@ def groupby( by: column or list of columns used to groupby Currently accepts column or list of columns - TODO Implement other combinations of by similar to pandas dropna: default True If True, and if group keys contain NA values, NA values together with row/column will be dropped. - TODO Implement False - TODO Implement remainder of pandas arguments Returns ------- GroupByDataFrame @@ -1495,18 +1492,18 @@ def groupby( [63 rows x 2 columns] """ if by is None: - raise TypeError("by parameter should be specified to groupby") + raise ValueError("by parameter should be specified to groupby") if isinstance(by, str): by = [by] if isinstance(by, (list, tuple)): - remaining_columns = set(by) - set(self._query_compiler.columns) + remaining_columns = sorted(set(by) - set(self._query_compiler.columns)) if remaining_columns: raise KeyError( - f"Requested columns {remaining_columns} not in the DataFrame." + f"Requested columns {repr(remaining_columns)[1:-1]} not in the DataFrame" ) return GroupByDataFrame( - by=by, query_compiler=self._query_compiler, dropna=dropna + by=by, query_compiler=self._query_compiler.copy(), dropna=dropna ) def query(self, expr) -> "DataFrame": diff --git a/eland/field_mappings.py b/eland/field_mappings.py index de7001a3..56018276 100644 --- a/eland/field_mappings.py +++ b/eland/field_mappings.py @@ -64,7 +64,7 @@ class Field(NamedTuple): """Holds all information on a particular field in the mapping""" - index: str + column: str es_field_name: str is_source: bool es_dtype: str @@ -129,7 +129,7 @@ class FieldMappings: _mappings_capabilities: pandas.DataFrame A data frame summarising the capabilities of the index mapping - index - the eland display name + column (index) - the eland display name es_field_name - the Elasticsearch field name is_source - is top level field (i.e. not a multi-field sub-field) @@ -537,13 +537,13 @@ def _generate_es_mappings( """ mapping_props = {} - for field_name_name, dtype in dataframe.dtypes.iteritems(): - if es_type_overrides is not None and field_name_name in es_type_overrides: - es_dtype = es_type_overrides[field_name_name] + for column, dtype in dataframe.dtypes.iteritems(): + if es_type_overrides is not None and column in es_type_overrides: + es_dtype = es_type_overrides[column] else: es_dtype = FieldMappings._pd_dtype_to_es_dtype(dtype) - mapping_props[field_name_name] = {"type": es_dtype} + mapping_props[column] = {"type": es_dtype} return {"mappings": {"properties": mapping_props}} @@ -708,9 +708,9 @@ def all_source_fields(self) -> List[Field]: """ source_fields: List[Field] = [] - for index, row in self._mappings_capabilities.iterrows(): + for column, row in self._mappings_capabilities.iterrows(): row = row.to_dict() - row["index"] = index + row["column"] = column source_fields.append(Field(**row)) return source_fields @@ -731,13 +731,13 @@ def groupby_source_fields(self, by: List[str]) -> Tuple[List[Field], List[Field] groupby_fields: Dict[str, Field] = {} # groupby_fields: Union[List[Field], List[None]] = [None] * len(by) aggregatable_fields: List[Field] = [] - for index_name, row in self._mappings_capabilities.iterrows(): + for column, row in self._mappings_capabilities.iterrows(): row = row.to_dict() - row["index"] = index_name - if index_name not in by: + row["column"] = column + if column not in by: aggregatable_fields.append(Field(**row)) else: - groupby_fields[index_name] = Field(**row) + groupby_fields[column] = Field(**row) # Maintain groupby order as given input return [groupby_fields[column] for column in by], aggregatable_fields diff --git a/eland/groupby.py b/eland/groupby.py index 515d351a..b9979dd8 100644 --- a/eland/groupby.py +++ b/eland/groupby.py @@ -24,7 +24,7 @@ class GroupBy: """ - This holds all the groupby base methods + Base class for calls to X.groupby([...]) Parameters ---------- @@ -34,7 +34,6 @@ class GroupBy: Query compiler object dropna: default is true, drop None/NaT/NaN values while grouping - """ def __init__( @@ -47,9 +46,8 @@ def __init__( self._dropna: bool = dropna self._by: List[str] = by - # numeric_only=True by default for all aggs because pandas does the same def mean(self, numeric_only: bool = True) -> "pd.DataFrame": - return self._query_compiler.groupby( + return self._query_compiler.aggs_groupby( by=self._by, pd_aggs=["mean"], dropna=self._dropna, @@ -57,7 +55,7 @@ def mean(self, numeric_only: bool = True) -> "pd.DataFrame": ) def var(self, numeric_only: bool = True) -> "pd.DataFrame": - return self._query_compiler.groupby( + return self._query_compiler.aggs_groupby( by=self._by, pd_aggs=["var"], dropna=self._dropna, @@ -65,7 +63,7 @@ def var(self, numeric_only: bool = True) -> "pd.DataFrame": ) def std(self, numeric_only: bool = True) -> "pd.DataFrame": - return self._query_compiler.groupby( + return self._query_compiler.aggs_groupby( by=self._by, pd_aggs=["std"], dropna=self._dropna, @@ -73,7 +71,7 @@ def std(self, numeric_only: bool = True) -> "pd.DataFrame": ) def mad(self, numeric_only: bool = True) -> "pd.DataFrame": - return self._query_compiler.groupby( + return self._query_compiler.aggs_groupby( by=self._by, pd_aggs=["mad"], dropna=self._dropna, @@ -81,7 +79,7 @@ def mad(self, numeric_only: bool = True) -> "pd.DataFrame": ) def median(self, numeric_only: bool = True) -> "pd.DataFrame": - return self._query_compiler.groupby( + return self._query_compiler.aggs_groupby( by=self._by, pd_aggs=["median"], dropna=self._dropna, @@ -89,7 +87,7 @@ def median(self, numeric_only: bool = True) -> "pd.DataFrame": ) def sum(self, numeric_only: bool = True) -> "pd.DataFrame": - return self._query_compiler.groupby( + return self._query_compiler.aggs_groupby( by=self._by, pd_aggs=["sum"], dropna=self._dropna, @@ -97,7 +95,7 @@ def sum(self, numeric_only: bool = True) -> "pd.DataFrame": ) def min(self, numeric_only: bool = True) -> "pd.DataFrame": - return self._query_compiler.groupby( + return self._query_compiler.aggs_groupby( by=self._by, pd_aggs=["min"], dropna=self._dropna, @@ -105,7 +103,7 @@ def min(self, numeric_only: bool = True) -> "pd.DataFrame": ) def max(self, numeric_only: bool = True) -> "pd.DataFrame": - return self._query_compiler.groupby( + return self._query_compiler.aggs_groupby( by=self._by, pd_aggs=["max"], dropna=self._dropna, @@ -113,7 +111,7 @@ def max(self, numeric_only: bool = True) -> "pd.DataFrame": ) def nunique(self) -> "pd.DataFrame": - return self._query_compiler.groupby( + return self._query_compiler.aggs_groupby( by=self._by, pd_aggs=["nunique"], dropna=self._dropna, @@ -133,7 +131,6 @@ class GroupByDataFrame(GroupBy): Query compiler object dropna: default is true, drop None/NaT/NaN values while grouping - """ def aggregate(self, func: List[str], numeric_only: bool = False) -> "pd.DataFrame": @@ -157,13 +154,12 @@ def aggregate(self, func: List[str], numeric_only: bool = False) -> "pd.DataFram """ if isinstance(func, str): func = [func] - # numeric_only is by default False because pandas does the same - return self._query_compiler.groupby( + return self._query_compiler.aggs_groupby( by=self._by, pd_aggs=func, dropna=self._dropna, numeric_only=numeric_only, - is_agg=True, + is_dataframe_agg=True, ) agg = aggregate diff --git a/eland/operations.py b/eland/operations.py index 4f64daac..63fcf899 100644 --- a/eland/operations.py +++ b/eland/operations.py @@ -271,7 +271,7 @@ def _metric_aggs( min 1.000205e+02 0.000000e+00 0.000000e+00 0 """ - return self._calculate_single_agg( + return self._unpack_metric_aggs( fields=fields, es_aggs=es_aggs, pd_aggs=pd_aggs, @@ -415,7 +415,7 @@ def _hist_aggs(self, query_compiler, num_bins): df_weights = pd.DataFrame(data=weights) return df_bins, df_weights - def _calculate_single_agg( + def _unpack_metric_aggs( self, fields: List["Field"], es_aggs: Union[List[str], List[Tuple[str, str]]], @@ -425,8 +425,9 @@ def _calculate_single_agg( is_dataframe_agg: bool = False, ): """ - This method is used to calculate single agg calculations. - Common for both metric aggs and groupby aggs + This method unpacks metric aggregations JSON response. + This can be called either directly on an aggs query + or on an individual bucket within a composite aggregation. Parameters ---------- @@ -533,21 +534,21 @@ def _calculate_single_agg( # If numeric_only is True and We only have a NaN type field then we check for empty. if values: - results[field.index] = values if len(values) > 1 else values[0] + results[field.column] = values if len(values) > 1 else values[0] return results - def groupby( + def aggs_groupby( self, query_compiler: "QueryCompiler", by: List[str], pd_aggs: List[str], dropna: bool = True, - is_agg: bool = False, + is_dataframe_agg: bool = False, numeric_only: bool = True, ) -> pd.DataFrame: """ - This method is used to construct groupby dataframe + This method is used to construct groupby aggregation dataframe Parameters ---------- @@ -560,7 +561,7 @@ def groupby( dropna: Drop None values if True. TODO Not yet implemented - is_agg: + is_dataframe_agg: Know if groupby with aggregation or single agg is called. numeric_only: return either numeric values or NaN/NaT @@ -574,13 +575,13 @@ def groupby( by=by, pd_aggs=pd_aggs, dropna=dropna, - is_agg=is_agg, + is_dataframe_agg=is_dataframe_agg, numeric_only=numeric_only, ) agg_df = pd.DataFrame(results, columns=results.keys()).set_index(by) - if is_agg: + if is_dataframe_agg: # Convert header columns to MultiIndex agg_df.columns = pd.MultiIndex.from_product([headers, pd_aggs]) @@ -592,7 +593,7 @@ def _groupby_aggs( by: List[str], pd_aggs: List[str], dropna: bool = True, - is_agg: bool = False, + is_dataframe_agg: bool = False, numeric_only: bool = True, ) -> Tuple[List[str], Dict[str, Any]]: """ @@ -609,8 +610,8 @@ def _groupby_aggs( dropna: Drop None values if True. TODO Not yet implemented - is_agg: - Know if groupby aggregation or single agg is called. + is_dataframe_agg: + Know if multi aggregation or single agg is called. numeric_only: return either numeric values or NaN/NaT @@ -627,13 +628,15 @@ def _groupby_aggs( f"Can not count field matches if size is set {size}" ) - by, fields = query_compiler._mappings.groupby_source_fields(by=by) + by_fields, agg_fields = query_compiler._mappings.groupby_source_fields(by=by) # Used defaultdict to avoid initialization of columns with lists response: Dict[str, List[Any]] = defaultdict(list) if numeric_only: - fields = [field for field in fields if (field.is_numeric or field.is_bool)] + agg_fields = [ + field for field in agg_fields if (field.is_numeric or field.is_bool) + ] body = Query(query_params.query) @@ -641,11 +644,13 @@ def _groupby_aggs( es_aggs = self._map_pd_aggs_to_es_aggs(pd_aggs) # Construct Query - for b in by: + for by_field in by_fields: # groupby fields will be term aggregations - body.term_aggs(f"groupby_{b.index}", b.index) + body.composite_agg_bucket_terms( + name=f"groupby_{by_field.column}", field=by_field.es_field_name + ) - for field in fields: + for field in agg_fields: for es_agg in es_aggs: if not field.is_es_agg_compatible(es_agg): continue @@ -665,11 +670,11 @@ def _groupby_aggs( ) # Composite aggregation - body.composite_agg( + body.composite_agg_start( size=DEFAULT_PAGINATION_SIZE, name="groupby_buckets", dropna=dropna ) - def response_generator() -> Generator[List[str], None, List[str]]: + def bucket_generator() -> Generator[List[str], None, List[str]]: """ e.g. "aggregations": { @@ -696,43 +701,51 @@ def response_generator() -> Generator[List[str], None, List[str]]: size=0, body=body.to_search_body(), ) + # Pagination Logic - if "after_key" in res["aggregations"]["groupby_buckets"]: + composite_buckets = res["aggregations"]["groupby_buckets"] + if "after_key" in composite_buckets: # yield the bucket which contains the result - yield res["aggregations"]["groupby_buckets"]["buckets"] + yield composite_buckets["buckets"] body.composite_agg_after_key( name="groupby_buckets", - after_key=res["aggregations"]["groupby_buckets"]["after_key"], + after_key=composite_buckets["after_key"], ) else: - return res["aggregations"]["groupby_buckets"]["buckets"] + return composite_buckets["buckets"] - for buckets in response_generator(): + for buckets in bucket_generator(): # We recieve response row-wise for bucket in buckets: # groupby columns are added to result same way they are returned - for b in by: - response[b.index].append(bucket["key"][f"groupby_{b.index}"]) + for by_field in by_fields: + bucket_key = bucket["key"][f"groupby_{by_field.column}"] + + # Datetimes always come back as integers, convert to pd.Timestamp() + if by_field.is_timestamp and isinstance(bucket_key, int): + bucket_key = pd.to_datetime(bucket_key, unit="ms") - agg_calculation = self._calculate_single_agg( - fields=fields, + response[by_field.column].append(bucket_key) + + agg_calculation = self._unpack_metric_aggs( + fields=agg_fields, es_aggs=es_aggs, pd_aggs=pd_aggs, response={"aggregations": bucket}, numeric_only=numeric_only, - is_dataframe_agg=is_agg, + is_dataframe_agg=is_dataframe_agg, ) # Process the calculated agg values to response for key, value in agg_calculation.items(): - if not is_agg: - response[key].append(value) + if isinstance(value, list): + for pd_agg, val in zip(pd_aggs, value): + response[f"{key}_{pd_agg}"].append(val) else: - for i in range(0, len(pd_aggs)): - response[f"{key}_{pd_aggs[i]}"].append(value[i]) + response[key].append(value) - return [field.index for field in fields], response + return [field.column for field in agg_fields], response @staticmethod def _map_pd_aggs_to_es_aggs(pd_aggs): diff --git a/eland/query.py b/eland/query.py index 6961bd1c..3c2ad1b5 100644 --- a/eland/query.py +++ b/eland/query.py @@ -38,14 +38,17 @@ def __init__(self, query: Optional["Query"] = None): # type defs self._query: BooleanFilter self._aggs: Dict[str, Any] + self._composite_aggs: Dict[str, Any] if query is None: self._query = BooleanFilter() self._aggs = {} + self._composite_aggs = {} else: # Deep copy the incoming query so we can change it self._query = deepcopy(query._query) self._aggs = deepcopy(query._aggs) + self._composite_aggs = deepcopy(query._composite_aggs) def exists(self, field: str, must: bool = True) -> None: """ @@ -136,9 +139,9 @@ def metric_aggs(self, name: str, func: str, field: str) -> None: agg = {func: {"field": field}} self._aggs[name] = agg - def term_aggs(self, name: str, field: str) -> None: + def composite_agg_bucket_terms(self, name: str, field: str) -> None: """ - Add term agg e.g. + Add terms agg for composite aggregation "aggs": { "name": { @@ -148,17 +151,36 @@ def term_aggs(self, name: str, field: str) -> None: } } """ - agg = {"terms": {"field": field}} - self._aggs[name] = agg + self._composite_aggs[name] = {"terms": {"field": field}} - def composite_agg( + def composite_agg_bucket_date_histogram( + self, + name: str, + field: str, + calendar_interval: Optional[str] = None, + fixed_interval: Optional[str] = None, + ) -> None: + if (calendar_interval is None) == (fixed_interval is None): + raise ValueError( + "calendar_interval and fixed_interval parmaeters are mutually exclusive" + ) + agg = {"field": field} + if calendar_interval is not None: + agg["calendar_interval"] = calendar_interval + elif fixed_interval is not None: + agg["fixed_interval"] = fixed_interval + self._composite_aggs[name] = {"date_histogram": agg} + + def composite_agg_start( self, name: str, size: int, dropna: bool = True, ) -> None: """ - Add composite aggregation e.g. + Start a composite aggregation. This should be called + after calls to composite_agg_bucket_*(), etc. + https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-composite-aggregation.html "aggs": { @@ -190,22 +212,22 @@ def composite_agg( """ sources: List[Dict[str, Dict[str, str]]] = [] - aggregations: Dict[str, Dict[str, str]] = {} - for _name, agg in self._aggs.items(): - if agg.get("terms"): - if not dropna: - agg["terms"]["missing_bucket"] = "true" - sources.append({_name: agg}) - else: - aggregations[_name] = agg + # Go through all composite source aggregations + # and apply dropna if needed. + for bucket_agg_name, bucket_agg in self._composite_aggs.items(): + if bucket_agg.get("terms") and not dropna: + bucket_agg = bucket_agg.copy() + bucket_agg["terms"]["missing_bucket"] = "true" + sources.append({bucket_agg_name: bucket_agg}) + self._composite_aggs.clear() - agg = { + aggs = { "composite": {"size": size, "sources": sources}, - "aggregations": aggregations, + "aggregations": self._aggs.copy(), } self._aggs.clear() - self._aggs[name] = agg + self._aggs[name] = aggs def composite_agg_after_key(self, name: str, after_key: Dict[str, Any]) -> None: """ diff --git a/eland/query_compiler.py b/eland/query_compiler.py index a40c41a1..f1a4240d 100644 --- a/eland/query_compiler.py +++ b/eland/query_compiler.py @@ -550,15 +550,22 @@ def nunique(self): self, ["nunique"], numeric_only=False ) - def groupby( + def aggs_groupby( self, by: List[str], pd_aggs: List[str], dropna: bool = True, - is_agg: bool = False, + is_dataframe_agg: bool = False, numeric_only: bool = True, ) -> pd.DataFrame: - return self._operations.groupby(self, by, pd_aggs, dropna, is_agg, numeric_only) + return self._operations.aggs_groupby( + self, + by=by, + pd_aggs=pd_aggs, + dropna=dropna, + is_dataframe_agg=is_dataframe_agg, + numeric_only=numeric_only, + ) def value_counts(self, es_size): return self._operations.value_counts(self, es_size) diff --git a/eland/tests/dataframe/test_groupby_pytest.py b/eland/tests/dataframe/test_groupby_pytest.py index a31da862..3cdd48a7 100644 --- a/eland/tests/dataframe/test_groupby_pytest.py +++ b/eland/tests/dataframe/test_groupby_pytest.py @@ -25,13 +25,10 @@ class TestGroupbyDataFrame(TestData): funcs = ["max", "min", "mean", "sum"] - extended_funcs = ["median", "mad", "var", "std"] filter_data = [ "AvgTicketPrice", "Cancelled", "dayOfWeek", - "timestamp", - "DestCountry", ] @pytest.mark.parametrize("numeric_only", [True]) @@ -41,14 +38,29 @@ def test_groupby_aggregate(self, numeric_only): pd_flights = self.pd_flights().filter(self.filter_data) ed_flights = self.ed_flights().filter(self.filter_data) - pd_groupby = pd_flights.groupby("Cancelled").agg(self.funcs, numeric_only) - ed_groupby = ed_flights.groupby("Cancelled").agg(self.funcs, numeric_only) + pd_groupby = pd_flights.groupby("Cancelled").agg( + self.funcs, numeric_only=numeric_only + ) + ed_groupby = ed_flights.groupby("Cancelled").agg( + self.funcs, numeric_only=numeric_only + ) + + # checking only values because dtypes are checked in aggs tests + assert_frame_equal(pd_groupby, ed_groupby, check_exact=False, check_dtype=False) + + @pytest.mark.parametrize("pd_agg", funcs) + def test_groupby_aggregate_single_aggs(self, pd_agg): + pd_flights = self.pd_flights().filter(self.filter_data) + ed_flights = self.ed_flights().filter(self.filter_data) + + pd_groupby = pd_flights.groupby("Cancelled").agg([pd_agg], numeric_only=True) + ed_groupby = ed_flights.groupby("Cancelled").agg([pd_agg], numeric_only=True) # checking only values because dtypes are checked in aggs tests assert_frame_equal(pd_groupby, ed_groupby, check_exact=False, check_dtype=False) @pytest.mark.parametrize("pd_agg", ["max", "min", "mean", "sum", "median"]) - def test_groupby_aggs_true(self, pd_agg): + def test_groupby_aggs_numeric_only_true(self, pd_agg): # Pandas has numeric_only applicable for the above aggs with groupby only. pd_flights = self.pd_flights().filter(self.filter_data) @@ -59,7 +71,7 @@ def test_groupby_aggs_true(self, pd_agg): # checking only values because dtypes are checked in aggs tests assert_frame_equal( - pd_groupby, ed_groupby, check_exact=False, check_dtype=False, rtol=4 + pd_groupby, ed_groupby, check_exact=False, check_dtype=False, rtol=2 ) @pytest.mark.parametrize("pd_agg", ["mad", "var", "std"]) @@ -90,9 +102,9 @@ def test_groupby_aggs_nunique(self, pd_agg): ) @pytest.mark.parametrize("pd_agg", ["max", "min", "mean", "median"]) - def test_groupby_aggs_false(self, pd_agg): - pd_flights = self.pd_flights().filter(self.filter_data) - ed_flights = self.ed_flights().filter(self.filter_data) + def test_groupby_aggs_numeric_only_false(self, pd_agg): + pd_flights = self.pd_flights().filter(self.filter_data + ["timestamp"]) + ed_flights = self.ed_flights().filter(self.filter_data + ["timestamp"]) # pandas numeric_only=False, matches with Eland numeric_only=None pd_groupby = getattr(pd_flights.groupby("Cancelled"), pd_agg)( @@ -114,14 +126,30 @@ def test_groupby_columns(self): ed_flights = self.ed_flights().filter(self.filter_data) match = "by parameter should be specified to groupby" - with pytest.raises(TypeError, match=match): + with pytest.raises(ValueError, match=match): ed_flights.groupby(None).mean() by = ["ABC", "Cancelled"] - match = "Requested columns {'ABC'} not in the DataFrame." + match = "Requested columns 'ABC' not in the DataFrame" with pytest.raises(KeyError, match=match): ed_flights.groupby(by).mean() + @pytest.mark.parametrize( + "by", + ["timestamp", "dayOfWeek", "Carrier", "Cancelled", ["dayOfWeek", "Carrier"]], + ) + def test_groupby_different_dtypes(self, by): + columns = ["dayOfWeek", "Carrier", "timestamp", "Cancelled"] + pd_flights = self.pd_flights_small().filter(columns) + ed_flights = self.ed_flights_small().filter(columns) + + pd_groupby = pd_flights.groupby(by).nunique() + ed_groupby = ed_flights.groupby(by).nunique() + + assert list(pd_groupby.index) == list(ed_groupby.index) + assert pd_groupby.index.dtype == ed_groupby.index.dtype + assert list(pd_groupby.columns) == list(ed_groupby.columns) + def test_groupby_dropna(self): # TODO Add tests once dropna is implemeted pass From b87e8c0a8c829194e841bc346c698aaa5769bbd1 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Thu, 15 Oct 2020 16:46:21 -0500 Subject: [PATCH 2/2] Add isort, rename Nox session to 'format' --- CONTRIBUTING.md | 2 +- docs/source/conf.py | 8 ++--- eland/__init__.py | 22 ++++++------- eland/actions.py | 4 +-- eland/arithmetics.py | 2 +- eland/common.py | 2 +- eland/dataframe.py | 10 +++--- eland/etl.py | 9 +++--- eland/field_mappings.py | 27 ++++++++-------- eland/filter.py | 2 +- eland/groupby.py | 3 +- eland/index.py | 3 +- eland/ml/__init__.py | 2 +- eland/ml/_model_serializer.py | 2 +- eland/ml/ml_model.py | 25 ++++++++++----- eland/ml/transformers/__init__.py | 14 ++++---- eland/ml/transformers/base.py | 3 +- eland/ml/transformers/lightgbm.py | 9 +++--- eland/ml/transformers/sklearn.py | 15 ++++++--- eland/ml/transformers/xgboost.py | 10 +++--- eland/ndframe.py | 5 +-- eland/operations.py | 32 +++++++++---------- eland/plotting/__init__.py | 5 +-- eland/plotting/_core.py | 2 +- eland/plotting/_matplotlib/__init__.py | 5 +-- eland/plotting/_matplotlib/hist.py | 1 + eland/query.py | 13 ++------ eland/query_compiler.py | 12 +++---- eland/series.py | 15 +++++---- eland/tasks.py | 8 ++--- eland/tests/__init__.py | 1 + eland/tests/common.py | 4 +-- eland/tests/conftest.py | 23 +++++++------ eland/tests/dataframe/test_aggs_pytest.py | 3 +- .../dataframe/test_big_mapping_pytest.py | 3 +- eland/tests/dataframe/test_datetime_pytest.py | 10 +++--- eland/tests/dataframe/test_dtypes_pytest.py | 1 + eland/tests/dataframe/test_es_query_pytest.py | 4 +-- eland/tests/dataframe/test_filter_pytest.py | 1 + eland/tests/dataframe/test_groupby_pytest.py | 3 +- .../tests/dataframe/test_head_tail_pytest.py | 3 +- eland/tests/dataframe/test_info_pytest.py | 2 -- eland/tests/dataframe/test_init_pytest.py | 3 +- eland/tests/dataframe/test_metrics_pytest.py | 6 ++-- eland/tests/dataframe/test_query_pytest.py | 4 +-- eland/tests/dataframe/test_sample_pytest.py | 2 +- .../dataframe/test_select_dtypes_pytest.py | 3 +- eland/tests/dataframe/test_to_csv_pytest.py | 6 ++-- eland/tests/dataframe/test_utils_pytest.py | 3 +- eland/tests/etl/test_pandas_to_eland.py | 6 ++-- .../test_aggregatables_pytest.py | 2 +- .../field_mappings/test_datetime_pytest.py | 3 +- .../test_field_name_pd_dtype_pytest.py | 3 +- .../test_get_field_names_pytest.py | 2 +- .../test_metric_source_fields_pytest.py | 2 +- .../test_scripted_fields_pytest.py | 2 +- .../tests/ml/test_imported_ml_model_pytest.py | 9 +++--- .../tests/operators/test_operators_pytest.py | 10 +++--- eland/tests/series/test_describe_pytest.py | 1 + eland/tests/series/test_dtype_pytest.py | 6 ++-- eland/tests/series/test_filter_pytest.py | 4 +-- eland/tests/series/test_head_tail_pytest.py | 6 ++-- eland/tests/series/test_metrics_pytest.py | 7 ++-- eland/tests/series/test_na_pytest.py | 3 +- eland/tests/series/test_name_pytest.py | 6 ++-- eland/tests/series/test_rename_pytest.py | 6 ++-- eland/tests/series/test_repr_pytest.py | 3 +- eland/tests/series/test_sample_pytest.py | 6 ++-- eland/tests/setup_tests.py | 15 ++++----- eland/utils.py | 6 ++-- noxfile.py | 12 ++++--- setup.cfg | 2 ++ setup.py | 2 +- utils/generate-supported-apis.py | 7 ++-- utils/license-headers.py | 3 +- 75 files changed, 250 insertions(+), 241 deletions(-) create mode 100644 setup.cfg diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7d85db3b..e5c0889a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -160,7 +160,7 @@ currently using a minimum version of PyCharm 2019.2.4. - Run `pytest --nbval` to validate install - To test specific versions of Python use `nox -s test-3.8` - To run the automatic formatter and check for lint issues - run `nox -s blacken` + run `nox -s format` ### Documentation diff --git a/docs/source/conf.py b/docs/source/conf.py index 65560de3..2ad50116 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -23,13 +23,13 @@ # -- Path setup -------------------------------------------------------------- +import datetime +import os +import sys + # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -# -import os -import sys -import datetime sys.path.insert(0, os.path.abspath("../sphinxext")) sys.path.extend( diff --git a/eland/__init__.py b/eland/__init__.py index 0c9ba02c..640b18a9 100644 --- a/eland/__init__.py +++ b/eland/__init__.py @@ -15,22 +15,22 @@ # specific language governing permissions and limitations # under the License. -from eland._version import ( # noqa: F401 - __title__, - __description__, - __url__, - __version__, +from ._version import ( # noqa: F401 __author__, __author_email__, + __description__, __maintainer__, __maintainer_email__, + __title__, + __url__, + __version__, ) -from eland.common import SortOrder -from eland.index import Index -from eland.ndframe import NDFrame -from eland.series import Series -from eland.dataframe import DataFrame -from eland.etl import pandas_to_eland, eland_to_pandas, read_es, read_csv, csv_to_eland +from .common import SortOrder +from .dataframe import DataFrame +from .etl import csv_to_eland, eland_to_pandas, pandas_to_eland, read_csv, read_es +from .index import Index +from .ndframe import NDFrame +from .series import Series __all__ = [ "DataFrame", diff --git a/eland/actions.py b/eland/actions.py index 6da1751a..d9f43069 100644 --- a/eland/actions.py +++ b/eland/actions.py @@ -16,9 +16,9 @@ # under the License. from abc import ABC, abstractmethod -from typing import List, Optional, TYPE_CHECKING, Union -from eland import SortOrder +from typing import TYPE_CHECKING, List, Optional, Union +from eland import SortOrder if TYPE_CHECKING: import pandas as pd # type: ignore diff --git a/eland/arithmetics.py b/eland/arithmetics.py index e8d923b0..b9d1b80f 100644 --- a/eland/arithmetics.py +++ b/eland/arithmetics.py @@ -17,7 +17,7 @@ from abc import ABC, abstractmethod from io import StringIO -from typing import Union, List, TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, List, Union import numpy as np # type: ignore diff --git a/eland/common.py b/eland/common.py index 772457ab..647d598e 100644 --- a/eland/common.py +++ b/eland/common.py @@ -18,7 +18,7 @@ import re import warnings from enum import Enum -from typing import Union, List, Tuple, cast, Callable, Any, Optional, Dict +from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast import numpy as np # type: ignore import pandas as pd # type: ignore diff --git a/eland/dataframe.py b/eland/dataframe.py index 5eea8762..89e110c6 100644 --- a/eland/dataframe.py +++ b/eland/dataframe.py @@ -15,11 +15,11 @@ # specific language governing permissions and limitations # under the License. +import re import sys import warnings from io import StringIO -import re -from typing import List, Optional, Sequence, Union, Tuple +from typing import List, Optional, Sequence, Tuple, Union import numpy as np import pandas as pd @@ -34,12 +34,12 @@ from pandas.util._validators import validate_bool_kwarg import eland.plotting as gfx -from eland.ndframe import NDFrame -from eland.series import Series from eland.common import DEFAULT_NUM_ROWS_DISPLAYED, docstring_parameter from eland.filter import BooleanFilter -from eland.utils import deprecated_api, is_valid_attr_name from eland.groupby import GroupByDataFrame +from eland.ndframe import NDFrame +from eland.series import Series +from eland.utils import deprecated_api, is_valid_attr_name class DataFrame(NDFrame): diff --git a/eland/etl.py b/eland/etl.py index e24b3967..00e6c0f7 100644 --- a/eland/etl.py +++ b/eland/etl.py @@ -16,17 +16,18 @@ # under the License. import csv -from typing import Generator, Union, List, Tuple, Optional, Mapping, Dict, Any from collections import deque +from typing import Any, Dict, Generator, List, Mapping, Optional, Tuple, Union + import pandas as pd # type: ignore +from elasticsearch import Elasticsearch # type: ignore +from elasticsearch.helpers import parallel_bulk # type: ignore from pandas.io.parsers import _c_parser_defaults # type: ignore from eland import DataFrame +from eland.common import DEFAULT_CHUNK_SIZE, ensure_es_client from eland.field_mappings import FieldMappings, verify_mapping_compatibility -from eland.common import ensure_es_client, DEFAULT_CHUNK_SIZE from eland.utils import deprecated_api -from elasticsearch import Elasticsearch # type: ignore -from elasticsearch.helpers import parallel_bulk # type: ignore @deprecated_api("eland.DataFrame()") diff --git a/eland/field_mappings.py b/eland/field_mappings.py index 56018276..7754d298 100644 --- a/eland/field_mappings.py +++ b/eland/field_mappings.py @@ -16,31 +16,32 @@ # under the License. import warnings +from typing import ( + TYPE_CHECKING, + Any, + Dict, + List, + Mapping, + NamedTuple, + Optional, + Set, + Tuple, +) import numpy as np import pandas as pd from pandas.core.dtypes.common import ( - is_float_dtype, is_bool_dtype, - is_integer_dtype, is_datetime_or_timedelta_dtype, + is_float_dtype, + is_integer_dtype, is_string_dtype, ) from pandas.core.dtypes.inference import is_list_like -from typing import ( - NamedTuple, - Optional, - Mapping, - Dict, - Any, - Tuple, - TYPE_CHECKING, - List, - Set, -) if TYPE_CHECKING: from elasticsearch import Elasticsearch + from eland import DataFrame diff --git a/eland/filter.py b/eland/filter.py index ea77929e..3cc5806d 100644 --- a/eland/filter.py +++ b/eland/filter.py @@ -17,7 +17,7 @@ # Originally based on code in MIT-licensed pandasticsearch filters -from typing import Dict, Any, List, Optional, Union, cast +from typing import Any, Dict, List, Optional, Union, cast class BooleanFilter: diff --git a/eland/groupby.py b/eland/groupby.py index b9979dd8..3679a8c1 100644 --- a/eland/groupby.py +++ b/eland/groupby.py @@ -15,7 +15,8 @@ # specific language governing permissions and limitations # under the License. -from typing import List, TYPE_CHECKING +from typing import TYPE_CHECKING, List + from eland.query_compiler import QueryCompiler if TYPE_CHECKING: diff --git a/eland/index.py b/eland/index.py index 08588f77..3a130adc 100644 --- a/eland/index.py +++ b/eland/index.py @@ -15,7 +15,8 @@ # specific language governing permissions and limitations # under the License. -from typing import Optional, TextIO, TYPE_CHECKING +from typing import TYPE_CHECKING, Optional, TextIO + from eland.utils import deprecated_api if TYPE_CHECKING: diff --git a/eland/ml/__init__.py b/eland/ml/__init__.py index 5326bc32..3b650620 100644 --- a/eland/ml/__init__.py +++ b/eland/ml/__init__.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -from eland.ml.ml_model import MLModel, ImportedMLModel +from eland.ml.ml_model import ImportedMLModel, MLModel __all__ = [ "MLModel", diff --git a/eland/ml/_model_serializer.py b/eland/ml/_model_serializer.py index 976a85c2..da45a00a 100644 --- a/eland/ml/_model_serializer.py +++ b/eland/ml/_model_serializer.py @@ -19,7 +19,7 @@ import gzip import json from abc import ABC -from typing import Sequence, Dict, Any, Optional, List +from typing import Any, Dict, List, Optional, Sequence def add_if_exists(d: Dict[str, Any], k: str, v: Any) -> None: diff --git a/eland/ml/ml_model.py b/eland/ml/ml_model.py index 50d64734..3ca08948 100644 --- a/eland/ml/ml_model.py +++ b/eland/ml/ml_model.py @@ -15,31 +15,40 @@ # specific language governing permissions and limitations # under the License. -from typing import List, Union, cast, Optional, Dict, TYPE_CHECKING, Any, Tuple import warnings -import numpy as np # type: ignore +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast + import elasticsearch # type: ignore -from .common import TYPE_REGRESSION, TYPE_CLASSIFICATION -from .transformers import get_model_transformer +import numpy as np # type: ignore + from eland.common import ensure_es_client, es_version from eland.utils import deprecated_api +from .common import TYPE_CLASSIFICATION, TYPE_REGRESSION +from .transformers import get_model_transformer + if TYPE_CHECKING: from elasticsearch import Elasticsearch # noqa: F401 # Try importing each ML lib separately so mypy users don't have to # have both installed to use type-checking. try: - from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor # type: ignore # noqa: F401 - from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor # type: ignore # noqa: F401 + from sklearn.ensemble import ( # type: ignore # noqa: F401 + RandomForestClassifier, + RandomForestRegressor, + ) + from sklearn.tree import ( # type: ignore # noqa: F401 + DecisionTreeClassifier, + DecisionTreeRegressor, + ) except ImportError: pass try: - from xgboost import XGBRegressor, XGBClassifier # type: ignore # noqa: F401 + from xgboost import XGBClassifier, XGBRegressor # type: ignore # noqa: F401 except ImportError: pass try: - from lightgbm import LGBMRegressor, LGBMClassifier # type: ignore # noqa: F401 + from lightgbm import LGBMClassifier, LGBMRegressor # type: ignore # noqa: F401 except ImportError: pass diff --git a/eland/ml/transformers/__init__.py b/eland/ml/transformers/__init__.py index 76a72bbf..7c56f39a 100644 --- a/eland/ml/transformers/__init__.py +++ b/eland/ml/transformers/__init__.py @@ -17,8 +17,8 @@ import inspect from typing import Any, Dict, Type -from .base import ModelTransformer +from .base import ModelTransformer __all__ = ["get_model_transformer"] _MODEL_TRANSFORMERS: Dict[type, Type[ModelTransformer]] = {} @@ -42,13 +42,13 @@ def get_model_transformer(model: Any, **kwargs: Any) -> ModelTransformer: try: + from .sklearn import _MODEL_TRANSFORMERS as _SKLEARN_MODEL_TRANSFORMERS from .sklearn import ( SKLearnDecisionTreeTransformer, SKLearnForestClassifierTransformer, SKLearnForestRegressorTransformer, SKLearnForestTransformer, SKLearnTransformer, - _MODEL_TRANSFORMERS as _SKLEARN_MODEL_TRANSFORMERS, ) __all__ += [ @@ -63,13 +63,13 @@ def get_model_transformer(model: Any, **kwargs: Any) -> ModelTransformer: pass try: + from .xgboost import _MODEL_TRANSFORMERS as _XGBOOST_MODEL_TRANSFORMERS from .xgboost import ( - XGBoostClassifierTransformer, XGBClassifier, + XGBoostClassifierTransformer, XGBoostForestTransformer, XGBoostRegressorTransformer, XGBRegressor, - _MODEL_TRANSFORMERS as _XGBOOST_MODEL_TRANSFORMERS, ) __all__ += [ @@ -84,13 +84,13 @@ def get_model_transformer(model: Any, **kwargs: Any) -> ModelTransformer: pass try: + from .lightgbm import _MODEL_TRANSFORMERS as _LIGHTGBM_MODEL_TRANSFORMERS from .lightgbm import ( - LGBMRegressor, LGBMClassifier, + LGBMClassifierTransformer, LGBMForestTransformer, + LGBMRegressor, LGBMRegressorTransformer, - LGBMClassifierTransformer, - _MODEL_TRANSFORMERS as _LIGHTGBM_MODEL_TRANSFORMERS, ) __all__ += [ diff --git a/eland/ml/transformers/base.py b/eland/ml/transformers/base.py index d251108e..a9f4fdc8 100644 --- a/eland/ml/transformers/base.py +++ b/eland/ml/transformers/base.py @@ -15,7 +15,8 @@ # specific language governing permissions and limitations # under the License. -from typing import Sequence, Optional, Any +from typing import Any, Optional, Sequence + from .._model_serializer import ModelSerializer diff --git a/eland/ml/transformers/lightgbm.py b/eland/ml/transformers/lightgbm.py index 218249c8..8e96957e 100644 --- a/eland/ml/transformers/lightgbm.py +++ b/eland/ml/transformers/lightgbm.py @@ -15,15 +15,16 @@ # specific language governing permissions and limitations # under the License. -from typing import Optional, List, Dict, Any, Type -from .base import ModelTransformer +from typing import Any, Dict, List, Optional, Type + from .._model_serializer import Ensemble, Tree, TreeNode -from ..common import TYPE_CLASSIFICATION, TYPE_REGRESSION from .._optional import import_optional_dependency +from ..common import TYPE_CLASSIFICATION, TYPE_REGRESSION +from .base import ModelTransformer import_optional_dependency("lightgbm", on_version="warn") -from lightgbm import Booster, LGBMRegressor, LGBMClassifier # type: ignore +from lightgbm import Booster, LGBMClassifier, LGBMRegressor # type: ignore def transform_decider(decider: str) -> str: diff --git a/eland/ml/transformers/sklearn.py b/eland/ml/transformers/sklearn.py index 79c999e4..303f5ec4 100644 --- a/eland/ml/transformers/sklearn.py +++ b/eland/ml/transformers/sklearn.py @@ -15,16 +15,21 @@ # specific language governing permissions and limitations # under the License. +from typing import Any, Dict, Optional, Sequence, Tuple, Type, Union + import numpy as np # type: ignore -from typing import Optional, Sequence, Union, Dict, Any, Type, Tuple -from .base import ModelTransformer -from ..common import TYPE_CLASSIFICATION, TYPE_REGRESSION -from .._optional import import_optional_dependency + from .._model_serializer import Ensemble, Tree, TreeNode +from .._optional import import_optional_dependency +from ..common import TYPE_CLASSIFICATION, TYPE_REGRESSION +from .base import ModelTransformer import_optional_dependency("sklearn", on_version="warn") -from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor # type: ignore +from sklearn.ensemble import ( # type: ignore + RandomForestClassifier, + RandomForestRegressor, +) from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor # type: ignore from sklearn.utils.validation import check_is_fitted # type: ignore diff --git a/eland/ml/transformers/xgboost.py b/eland/ml/transformers/xgboost.py index 4adb4d20..5d4e85ea 100644 --- a/eland/ml/transformers/xgboost.py +++ b/eland/ml/transformers/xgboost.py @@ -16,16 +16,18 @@ # under the License. import re -from typing import Optional, List, Dict, Any, Type -from .base import ModelTransformer +from typing import Any, Dict, List, Optional, Type + import pandas as pd # type: ignore + from .._model_serializer import Ensemble, Tree, TreeNode -from ..common import TYPE_CLASSIFICATION, TYPE_REGRESSION from .._optional import import_optional_dependency +from ..common import TYPE_CLASSIFICATION, TYPE_REGRESSION +from .base import ModelTransformer import_optional_dependency("xgboost", on_version="warn") -from xgboost import Booster, XGBRegressor, XGBClassifier # type: ignore +from xgboost import Booster, XGBClassifier, XGBRegressor # type: ignore class XGBoostForestTransformer(ModelTransformer): diff --git a/eland/ndframe.py b/eland/ndframe.py index e55f5576..17ef2886 100644 --- a/eland/ndframe.py +++ b/eland/ndframe.py @@ -17,10 +17,11 @@ import sys from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Tuple, Optional +from typing import TYPE_CHECKING, Optional, Tuple + import pandas as pd -from eland.query_compiler import QueryCompiler +from eland.query_compiler import QueryCompiler if TYPE_CHECKING: from eland.index import Index diff --git a/eland/operations.py b/eland/operations.py index 63fcf899..70251560 100644 --- a/eland/operations.py +++ b/eland/operations.py @@ -17,49 +17,49 @@ import copy import warnings +from collections import defaultdict from typing import ( + TYPE_CHECKING, + Any, + Dict, Generator, + List, Optional, Sequence, Tuple, - List, - Dict, - Any, - TYPE_CHECKING, Union, ) import numpy as np import pandas as pd -from collections import defaultdict from elasticsearch.helpers import scan -from eland.index import Index +from eland.actions import PostProcessingAction, SortFieldAction from eland.common import ( - SortOrder, DEFAULT_CSV_BATCH_OUTPUT_SIZE, DEFAULT_ES_MAX_RESULT_WINDOW, - elasticsearch_date_to_pandas_date, - build_pd_series, DEFAULT_PAGINATION_SIZE, + SortOrder, + build_pd_series, + elasticsearch_date_to_pandas_date, ) +from eland.index import Index from eland.query import Query -from eland.actions import PostProcessingAction, SortFieldAction from eland.tasks import ( - HeadTask, RESOLVED_TASK_TYPE, - TailTask, - SampleTask, - BooleanFilterTask, ArithmeticOpFieldsTask, - QueryTermsTask, + BooleanFilterTask, + HeadTask, QueryIdsTask, + QueryTermsTask, + SampleTask, SizeTask, + TailTask, ) if TYPE_CHECKING: - from eland.query_compiler import QueryCompiler from eland.field_mappings import Field + from eland.query_compiler import QueryCompiler class QueryParams: diff --git a/eland/plotting/__init__.py b/eland/plotting/__init__.py index 61c16a05..571191f9 100644 --- a/eland/plotting/__init__.py +++ b/eland/plotting/__init__.py @@ -22,10 +22,7 @@ but only supporting a subset of plotting methods (for now). """ -from eland.plotting._core import ( - ed_hist_frame, - ed_hist_series, -) +from eland.plotting._core import ed_hist_frame, ed_hist_series __all__ = [ "ed_hist_frame", diff --git a/eland/plotting/_core.py b/eland/plotting/_core.py index 26951bb0..2f402fed 100644 --- a/eland/plotting/_core.py +++ b/eland/plotting/_core.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -from eland.plotting._matplotlib.hist import hist_series, hist_frame +from eland.plotting._matplotlib.hist import hist_frame, hist_series def ed_hist_series( diff --git a/eland/plotting/_matplotlib/__init__.py b/eland/plotting/_matplotlib/__init__.py index 3af30269..15d74ec8 100644 --- a/eland/plotting/_matplotlib/__init__.py +++ b/eland/plotting/_matplotlib/__init__.py @@ -22,10 +22,7 @@ but only supporting a subset of plotting methods (for now). """ -from eland.plotting._matplotlib.hist import ( - hist_frame, - hist_series, -) +from eland.plotting._matplotlib.hist import hist_frame, hist_series __all__ = [ "hist_frame", diff --git a/eland/plotting/_matplotlib/hist.py b/eland/plotting/_matplotlib/hist.py index 197d9b95..f3b33419 100644 --- a/eland/plotting/_matplotlib/hist.py +++ b/eland/plotting/_matplotlib/hist.py @@ -19,6 +19,7 @@ from pandas.core.dtypes.generic import ABCIndexClass from pandas.plotting._matplotlib import converter from pandas.plotting._matplotlib.tools import _flatten, _set_ticks_props, _subplots + from eland.utils import try_sort diff --git a/eland/query.py b/eland/query.py index 3c2ad1b5..8d55fa25 100644 --- a/eland/query.py +++ b/eland/query.py @@ -17,16 +17,9 @@ import warnings from copy import deepcopy -from typing import Optional, Dict, List, Any - -from eland.filter import ( - RandomScoreFilter, - BooleanFilter, - NotNull, - IsNull, - IsIn, - Rlike, -) +from typing import Any, Dict, List, Optional + +from eland.filter import BooleanFilter, IsIn, IsNull, NotNull, RandomScoreFilter, Rlike class Query: diff --git a/eland/query_compiler.py b/eland/query_compiler.py index f1a4240d..956e402d 100644 --- a/eland/query_compiler.py +++ b/eland/query_compiler.py @@ -17,20 +17,20 @@ import copy from datetime import datetime -from typing import Optional, Sequence, TYPE_CHECKING, List +from typing import TYPE_CHECKING, List, Optional, Sequence import numpy as np # type: ignore import pandas as pd # type: ignore -from eland.field_mappings import FieldMappings -from eland.filter import QueryFilter -from eland.operations import Operations -from eland.index import Index from eland.common import ( - ensure_es_client, DEFAULT_PROGRESS_REPORTING_NUM_ROWS, elasticsearch_date_to_pandas_date, + ensure_es_client, ) +from eland.field_mappings import FieldMappings +from eland.filter import QueryFilter +from eland.index import Index +from eland.operations import Operations if TYPE_CHECKING: from .tasks import ArithmeticOpFieldsTask # noqa: F401 diff --git a/eland/series.py b/eland/series.py index fcf21738..3004693b 100644 --- a/eland/series.py +++ b/eland/series.py @@ -35,33 +35,34 @@ import warnings from collections.abc import Collection from io import StringIO -from typing import Optional, Union, Sequence, Any, Tuple, TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Optional, Sequence, Tuple, Union import numpy as np import pandas as pd from pandas.io.common import _expand_user, stringify_path import eland.plotting -from eland import NDFrame -from eland.arithmetics import ArithmeticSeries, ArithmeticString, ArithmeticNumber +from eland.arithmetics import ArithmeticNumber, ArithmeticSeries, ArithmeticString from eland.common import DEFAULT_NUM_ROWS_DISPLAYED, docstring_parameter from eland.filter import ( BooleanFilter, - NotFilter, Equal, Greater, - Less, GreaterEqual, - LessEqual, - ScriptFilter, IsIn, IsNull, + Less, + LessEqual, + NotFilter, NotNull, + ScriptFilter, ) +from eland.ndframe import NDFrame from eland.utils import deprecated_api, to_list if TYPE_CHECKING: # type: ignore from elasticsearch import Elasticsearch # noqa: F401 + from eland.query_compiler import QueryCompiler # noqa: F401 diff --git a/eland/tasks.py b/eland/tasks.py index f2b5b397..fff7ec02 100644 --- a/eland/tasks.py +++ b/eland/tasks.py @@ -16,18 +16,18 @@ # under the License. from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, List, Any, Tuple +from typing import TYPE_CHECKING, Any, List, Tuple from eland import SortOrder -from eland.actions import HeadAction, TailAction, SortIndexAction +from eland.actions import HeadAction, SortIndexAction, TailAction from eland.arithmetics import ArithmeticSeries if TYPE_CHECKING: from .actions import PostProcessingAction # noqa: F401 from .filter import BooleanFilter # noqa: F401 - from .query_compiler import QueryCompiler # noqa: F401 - from .operations import QueryParams # noqa: F401 from .index import Index # noqa: F401 + from .operations import QueryParams # noqa: F401 + from .query_compiler import QueryCompiler # noqa: F401 RESOLVED_TASK_TYPE = Tuple["QueryParams", List["PostProcessingAction"]] diff --git a/eland/tests/__init__.py b/eland/tests/__init__.py index eb0b155a..7b0ba966 100644 --- a/eland/tests/__init__.py +++ b/eland/tests/__init__.py @@ -19,6 +19,7 @@ import pandas as pd from elasticsearch import Elasticsearch + from eland.common import es_version ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) diff --git a/eland/tests/common.py b/eland/tests/common.py index 8bc4e860..21553cea 100644 --- a/eland/tests/common.py +++ b/eland/tests/common.py @@ -26,12 +26,12 @@ # Create pandas and eland data frames from eland.tests import ( + ECOMMERCE_DF_FILE_NAME, + ECOMMERCE_INDEX_NAME, ES_TEST_CLIENT, FLIGHTS_DF_FILE_NAME, FLIGHTS_INDEX_NAME, FLIGHTS_SMALL_INDEX_NAME, - ECOMMERCE_DF_FILE_NAME, - ECOMMERCE_INDEX_NAME, ) _pd_flights = pd.read_json(FLIGHTS_DF_FILE_NAME).sort_index() diff --git a/eland/tests/conftest.py b/eland/tests/conftest.py index 525862c5..680942f1 100644 --- a/eland/tests/conftest.py +++ b/eland/tests/conftest.py @@ -16,22 +16,25 @@ # under the License. import inspect -import pytest + import pandas as pd +import pytest + +import eland as ed + from .common import ( - assert_pandas_eland_frame_equal, - assert_pandas_eland_series_equal, - assert_frame_equal, - assert_series_equal, - _ed_flights, - _pd_flights, + TestData, _ed_ecommerce, - _pd_ecommerce, + _ed_flights, _ed_flights_small, + _pd_ecommerce, + _pd_flights, _pd_flights_small, - TestData, + assert_frame_equal, + assert_pandas_eland_frame_equal, + assert_pandas_eland_series_equal, + assert_series_equal, ) -import eland as ed class SymmetricAPIChecker: diff --git a/eland/tests/dataframe/test_aggs_pytest.py b/eland/tests/dataframe/test_aggs_pytest.py index e483f471..54b05a23 100644 --- a/eland/tests/dataframe/test_aggs_pytest.py +++ b/eland/tests/dataframe/test_aggs_pytest.py @@ -18,8 +18,9 @@ # File called _pytest for PyCharm compatability import numpy as np -from pandas.testing import assert_frame_equal, assert_series_equal import pytest +from pandas.testing import assert_frame_equal, assert_series_equal + from eland.tests.common import TestData diff --git a/eland/tests/dataframe/test_big_mapping_pytest.py b/eland/tests/dataframe/test_big_mapping_pytest.py index de268f91..425365ce 100644 --- a/eland/tests/dataframe/test_big_mapping_pytest.py +++ b/eland/tests/dataframe/test_big_mapping_pytest.py @@ -18,8 +18,7 @@ # File called _pytest for PyCharm compatability import eland as ed -from eland.tests.common import ES_TEST_CLIENT -from eland.tests.common import TestData +from eland.tests.common import ES_TEST_CLIENT, TestData class TestDataFrameBigMapping(TestData): diff --git a/eland/tests/dataframe/test_datetime_pytest.py b/eland/tests/dataframe/test_datetime_pytest.py index b523e563..99ae51ab 100644 --- a/eland/tests/dataframe/test_datetime_pytest.py +++ b/eland/tests/dataframe/test_datetime_pytest.py @@ -24,10 +24,12 @@ import eland as ed from eland.field_mappings import FieldMappings -from eland.tests.common import ES_TEST_CLIENT -from eland.tests.common import TestData -from eland.tests.common import assert_pandas_eland_frame_equal -from eland.tests.common import assert_pandas_eland_series_equal +from eland.tests.common import ( + ES_TEST_CLIENT, + TestData, + assert_pandas_eland_frame_equal, + assert_pandas_eland_series_equal, +) class TestDataFrameDateTime(TestData): diff --git a/eland/tests/dataframe/test_dtypes_pytest.py b/eland/tests/dataframe/test_dtypes_pytest.py index fd0630cf..729d9634 100644 --- a/eland/tests/dataframe/test_dtypes_pytest.py +++ b/eland/tests/dataframe/test_dtypes_pytest.py @@ -19,6 +19,7 @@ import numpy as np import pandas as pd + from eland.tests.common import assert_series_equal diff --git a/eland/tests/dataframe/test_es_query_pytest.py b/eland/tests/dataframe/test_es_query_pytest.py index 95e24dd8..fe9429a8 100644 --- a/eland/tests/dataframe/test_es_query_pytest.py +++ b/eland/tests/dataframe/test_es_query_pytest.py @@ -18,8 +18,8 @@ # File called _pytest for PyCharm compatability import pytest -from eland.tests.common import TestData -from eland.tests.common import assert_eland_frame_equal + +from eland.tests.common import TestData, assert_eland_frame_equal class TestDataEsQuery(TestData): diff --git a/eland/tests/dataframe/test_filter_pytest.py b/eland/tests/dataframe/test_filter_pytest.py index 9524e17a..9fdc7a27 100644 --- a/eland/tests/dataframe/test_filter_pytest.py +++ b/eland/tests/dataframe/test_filter_pytest.py @@ -18,6 +18,7 @@ # File called _pytest for PyCharm compatability import pytest + from eland.tests.common import TestData diff --git a/eland/tests/dataframe/test_groupby_pytest.py b/eland/tests/dataframe/test_groupby_pytest.py index 3cdd48a7..3ae95a01 100644 --- a/eland/tests/dataframe/test_groupby_pytest.py +++ b/eland/tests/dataframe/test_groupby_pytest.py @@ -17,10 +17,11 @@ # File called _pytest for PyCharm compatability +import pandas as pd import pytest from pandas.testing import assert_frame_equal, assert_series_equal + from eland.tests.common import TestData -import pandas as pd class TestGroupbyDataFrame(TestData): diff --git a/eland/tests/dataframe/test_head_tail_pytest.py b/eland/tests/dataframe/test_head_tail_pytest.py index 829159b1..ec1f0a42 100644 --- a/eland/tests/dataframe/test_head_tail_pytest.py +++ b/eland/tests/dataframe/test_head_tail_pytest.py @@ -17,8 +17,7 @@ # File called _pytest for PyCharm compatability -from eland.tests.common import TestData -from eland.tests.common import assert_pandas_eland_frame_equal +from eland.tests.common import TestData, assert_pandas_eland_frame_equal class TestDataFrameHeadTail(TestData): diff --git a/eland/tests/dataframe/test_info_pytest.py b/eland/tests/dataframe/test_info_pytest.py index b7151026..6b38a0a9 100644 --- a/eland/tests/dataframe/test_info_pytest.py +++ b/eland/tests/dataframe/test_info_pytest.py @@ -19,9 +19,7 @@ from io import StringIO import eland as ed - from eland.tests import ES_TEST_CLIENT - from eland.tests.common import TestData diff --git a/eland/tests/dataframe/test_init_pytest.py b/eland/tests/dataframe/test_init_pytest.py index b8c5aa12..e33dd6bf 100644 --- a/eland/tests/dataframe/test_init_pytest.py +++ b/eland/tests/dataframe/test_init_pytest.py @@ -21,8 +21,7 @@ import eland as ed from eland.query_compiler import QueryCompiler -from eland.tests import ES_TEST_CLIENT -from eland.tests import FLIGHTS_INDEX_NAME +from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME class TestDataFrameInit: diff --git a/eland/tests/dataframe/test_metrics_pytest.py b/eland/tests/dataframe/test_metrics_pytest.py index dbef894e..d3d57851 100644 --- a/eland/tests/dataframe/test_metrics_pytest.py +++ b/eland/tests/dataframe/test_metrics_pytest.py @@ -15,11 +15,13 @@ # specific language governing permissions and limitations # under the License. -# File called _pytest for PyCharm compatibility -import pytest import numpy as np import pandas as pd + +# File called _pytest for PyCharm compatibility +import pytest from pandas.testing import assert_series_equal + from eland.tests.common import TestData diff --git a/eland/tests/dataframe/test_query_pytest.py b/eland/tests/dataframe/test_query_pytest.py index 97a9eebd..321f351c 100644 --- a/eland/tests/dataframe/test_query_pytest.py +++ b/eland/tests/dataframe/test_query_pytest.py @@ -20,9 +20,7 @@ import pandas as pd import eland as ed -from eland.tests.common import ES_TEST_CLIENT -from eland.tests.common import TestData -from eland.tests.common import assert_pandas_eland_frame_equal +from eland.tests.common import ES_TEST_CLIENT, TestData, assert_pandas_eland_frame_equal class TestDataFrameQuery(TestData): diff --git a/eland/tests/dataframe/test_sample_pytest.py b/eland/tests/dataframe/test_sample_pytest.py index 9a4a5757..33451261 100644 --- a/eland/tests/dataframe/test_sample_pytest.py +++ b/eland/tests/dataframe/test_sample_pytest.py @@ -19,8 +19,8 @@ import pytest from pandas.testing import assert_frame_equal -from eland.tests.common import TestData from eland import eland_to_pandas +from eland.tests.common import TestData class TestDataFrameSample(TestData): diff --git a/eland/tests/dataframe/test_select_dtypes_pytest.py b/eland/tests/dataframe/test_select_dtypes_pytest.py index d00ae178..738bf3bd 100644 --- a/eland/tests/dataframe/test_select_dtypes_pytest.py +++ b/eland/tests/dataframe/test_select_dtypes_pytest.py @@ -18,8 +18,7 @@ # File called _pytest for PyCharm compatability import numpy as np -from eland.tests.common import TestData -from eland.tests.common import assert_pandas_eland_frame_equal +from eland.tests.common import TestData, assert_pandas_eland_frame_equal class TestDataFrameSelectDTypes(TestData): diff --git a/eland/tests/dataframe/test_to_csv_pytest.py b/eland/tests/dataframe/test_to_csv_pytest.py index 5ca50482..bb9560f4 100644 --- a/eland/tests/dataframe/test_to_csv_pytest.py +++ b/eland/tests/dataframe/test_to_csv_pytest.py @@ -24,10 +24,8 @@ from pandas.testing import assert_frame_equal import eland as ed -from eland.tests import ES_TEST_CLIENT -from eland.tests import FLIGHTS_INDEX_NAME -from eland.tests.common import ROOT_DIR -from eland.tests.common import TestData +from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME +from eland.tests.common import ROOT_DIR, TestData class TestDataFrameToCSV(TestData): diff --git a/eland/tests/dataframe/test_utils_pytest.py b/eland/tests/dataframe/test_utils_pytest.py index fa5985ae..e1b57c33 100644 --- a/eland/tests/dataframe/test_utils_pytest.py +++ b/eland/tests/dataframe/test_utils_pytest.py @@ -22,8 +22,7 @@ import eland as ed from eland.field_mappings import FieldMappings -from eland.tests.common import ES_TEST_CLIENT, assert_pandas_eland_frame_equal -from eland.tests.common import TestData +from eland.tests.common import ES_TEST_CLIENT, TestData, assert_pandas_eland_frame_equal class TestDataFrameUtils(TestData): diff --git a/eland/tests/etl/test_pandas_to_eland.py b/eland/tests/etl/test_pandas_to_eland.py index 2ffb01ba..e4a054ab 100644 --- a/eland/tests/etl/test_pandas_to_eland.py +++ b/eland/tests/etl/test_pandas_to_eland.py @@ -16,10 +16,12 @@ # under the License. from datetime import datetime, timedelta -import pytest + import pandas as pd +import pytest from elasticsearch.helpers import BulkIndexError -from eland import pandas_to_eland, DataFrame + +from eland import DataFrame, pandas_to_eland from eland.tests.common import ( ES_TEST_CLIENT, assert_frame_equal, diff --git a/eland/tests/field_mappings/test_aggregatables_pytest.py b/eland/tests/field_mappings/test_aggregatables_pytest.py index 2f4a62f8..c6af0a30 100644 --- a/eland/tests/field_mappings/test_aggregatables_pytest.py +++ b/eland/tests/field_mappings/test_aggregatables_pytest.py @@ -19,7 +19,7 @@ import pytest from eland.field_mappings import FieldMappings -from eland.tests import ES_TEST_CLIENT, ECOMMERCE_INDEX_NAME +from eland.tests import ECOMMERCE_INDEX_NAME, ES_TEST_CLIENT from eland.tests.common import TestData diff --git a/eland/tests/field_mappings/test_datetime_pytest.py b/eland/tests/field_mappings/test_datetime_pytest.py index 4ce575aa..447381e5 100644 --- a/eland/tests/field_mappings/test_datetime_pytest.py +++ b/eland/tests/field_mappings/test_datetime_pytest.py @@ -19,8 +19,7 @@ from datetime import datetime from eland.field_mappings import FieldMappings -from eland.tests.common import ES_TEST_CLIENT -from eland.tests.common import TestData +from eland.tests.common import ES_TEST_CLIENT, TestData class TestDateTime(TestData): diff --git a/eland/tests/field_mappings/test_field_name_pd_dtype_pytest.py b/eland/tests/field_mappings/test_field_name_pd_dtype_pytest.py index 30bdc346..94efe2cd 100644 --- a/eland/tests/field_mappings/test_field_name_pd_dtype_pytest.py +++ b/eland/tests/field_mappings/test_field_name_pd_dtype_pytest.py @@ -21,8 +21,7 @@ from eland.field_mappings import FieldMappings from eland.tests import FLIGHTS_INDEX_NAME, FLIGHTS_MAPPING -from eland.tests.common import ES_TEST_CLIENT -from eland.tests.common import TestData +from eland.tests.common import ES_TEST_CLIENT, TestData class TestFieldNamePDDType(TestData): diff --git a/eland/tests/field_mappings/test_get_field_names_pytest.py b/eland/tests/field_mappings/test_get_field_names_pytest.py index c56ccda8..59bdd385 100644 --- a/eland/tests/field_mappings/test_get_field_names_pytest.py +++ b/eland/tests/field_mappings/test_get_field_names_pytest.py @@ -21,7 +21,7 @@ # File called _pytest for PyCharm compatability from eland.field_mappings import FieldMappings -from eland.tests import FLIGHTS_INDEX_NAME, ES_TEST_CLIENT +from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME from eland.tests.common import TestData diff --git a/eland/tests/field_mappings/test_metric_source_fields_pytest.py b/eland/tests/field_mappings/test_metric_source_fields_pytest.py index 6cf8001f..9c4e2c94 100644 --- a/eland/tests/field_mappings/test_metric_source_fields_pytest.py +++ b/eland/tests/field_mappings/test_metric_source_fields_pytest.py @@ -20,7 +20,7 @@ import numpy as np from eland.field_mappings import FieldMappings -from eland.tests import ES_TEST_CLIENT, ECOMMERCE_INDEX_NAME, FLIGHTS_INDEX_NAME +from eland.tests import ECOMMERCE_INDEX_NAME, ES_TEST_CLIENT, FLIGHTS_INDEX_NAME from eland.tests.common import TestData diff --git a/eland/tests/field_mappings/test_scripted_fields_pytest.py b/eland/tests/field_mappings/test_scripted_fields_pytest.py index f2f276ad..0e3d4f99 100644 --- a/eland/tests/field_mappings/test_scripted_fields_pytest.py +++ b/eland/tests/field_mappings/test_scripted_fields_pytest.py @@ -21,7 +21,7 @@ import numpy as np from eland.field_mappings import FieldMappings -from eland.tests import FLIGHTS_INDEX_NAME, ES_TEST_CLIENT +from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME from eland.tests.common import TestData diff --git a/eland/tests/ml/test_imported_ml_model_pytest.py b/eland/tests/ml/test_imported_ml_model_pytest.py index d9ca4a4a..9e8445d4 100644 --- a/eland/tests/ml/test_imported_ml_model_pytest.py +++ b/eland/tests/ml/test_imported_ml_model_pytest.py @@ -15,16 +15,15 @@ # specific language governing permissions and limitations # under the License. -import pytest import numpy as np +import pytest from eland.ml import MLModel from eland.tests import ES_TEST_CLIENT, ES_VERSION - try: from sklearn import datasets - from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier + from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor HAS_SKLEARN = True @@ -32,14 +31,14 @@ HAS_SKLEARN = False try: - from xgboost import XGBRegressor, XGBClassifier + from xgboost import XGBClassifier, XGBRegressor HAS_XGBOOST = True except ImportError: HAS_XGBOOST = False try: - from lightgbm import LGBMRegressor, LGBMClassifier + from lightgbm import LGBMClassifier, LGBMRegressor HAS_LIGHTGBM = True except ImportError: diff --git a/eland/tests/operators/test_operators_pytest.py b/eland/tests/operators/test_operators_pytest.py index 0b7cb616..7fe602ca 100644 --- a/eland/tests/operators/test_operators_pytest.py +++ b/eland/tests/operators/test_operators_pytest.py @@ -16,18 +16,18 @@ # under the License. from eland.filter import ( + Equal, Greater, GreaterEqual, - Less, - LessEqual, - Equal, IsIn, IsNull, + Less, + LessEqual, Like, - Rlike, - Startswith, NotNull, + Rlike, ScriptFilter, + Startswith, ) diff --git a/eland/tests/series/test_describe_pytest.py b/eland/tests/series/test_describe_pytest.py index 2f6f6ec7..2f255ebf 100644 --- a/eland/tests/series/test_describe_pytest.py +++ b/eland/tests/series/test_describe_pytest.py @@ -16,6 +16,7 @@ # under the License. import pandas as pd + from eland.tests.common import TestData, assert_series_equal diff --git a/eland/tests/series/test_dtype_pytest.py b/eland/tests/series/test_dtype_pytest.py index eb00c989..4c45531d 100644 --- a/eland/tests/series/test_dtype_pytest.py +++ b/eland/tests/series/test_dtype_pytest.py @@ -15,10 +15,12 @@ # specific language governing permissions and limitations # under the License. +import warnings + import numpy as np import pandas as pd -import warnings -from eland.common import build_pd_series, EMPTY_SERIES_DTYPE + +from eland.common import EMPTY_SERIES_DTYPE, build_pd_series from eland.tests.common import assert_series_equal diff --git a/eland/tests/series/test_filter_pytest.py b/eland/tests/series/test_filter_pytest.py index 052ef9a8..9da03c7c 100644 --- a/eland/tests/series/test_filter_pytest.py +++ b/eland/tests/series/test_filter_pytest.py @@ -18,8 +18,8 @@ # File called _pytest for PyCharm compatability import pytest -from eland.tests.common import TestData -from eland.tests.common import assert_pandas_eland_series_equal + +from eland.tests.common import TestData, assert_pandas_eland_series_equal class TestSeriesFilter(TestData): diff --git a/eland/tests/series/test_head_tail_pytest.py b/eland/tests/series/test_head_tail_pytest.py index 49cb83c9..93c1d3f5 100644 --- a/eland/tests/series/test_head_tail_pytest.py +++ b/eland/tests/series/test_head_tail_pytest.py @@ -17,10 +17,8 @@ # File called _pytest for PyCharm compatability import eland as ed -from eland.tests import ES_TEST_CLIENT -from eland.tests import FLIGHTS_INDEX_NAME -from eland.tests.common import TestData -from eland.tests.common import assert_pandas_eland_series_equal +from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME +from eland.tests.common import TestData, assert_pandas_eland_series_equal class TestSeriesHeadTail(TestData): diff --git a/eland/tests/series/test_metrics_pytest.py b/eland/tests/series/test_metrics_pytest.py index d8e213d2..01ce34a8 100644 --- a/eland/tests/series/test_metrics_pytest.py +++ b/eland/tests/series/test_metrics_pytest.py @@ -17,11 +17,12 @@ # File called _pytest for PyCharm compatability -import pytest -import pandas as pd -import numpy as np from datetime import timedelta +import numpy as np +import pandas as pd +import pytest + from eland.tests.common import TestData diff --git a/eland/tests/series/test_na_pytest.py b/eland/tests/series/test_na_pytest.py index d9267b92..a11c91c2 100644 --- a/eland/tests/series/test_na_pytest.py +++ b/eland/tests/series/test_na_pytest.py @@ -16,8 +16,7 @@ # under the License. from eland import eland_to_pandas -from eland.tests.common import TestData -from eland.tests.common import assert_pandas_eland_frame_equal +from eland.tests.common import TestData, assert_pandas_eland_frame_equal class TestSeriesNA(TestData): diff --git a/eland/tests/series/test_name_pytest.py b/eland/tests/series/test_name_pytest.py index 37209165..619aa9c3 100644 --- a/eland/tests/series/test_name_pytest.py +++ b/eland/tests/series/test_name_pytest.py @@ -17,10 +17,8 @@ # File called _pytest for PyCharm compatability import eland as ed -from eland.tests import ES_TEST_CLIENT -from eland.tests import FLIGHTS_INDEX_NAME -from eland.tests.common import TestData -from eland.tests.common import assert_pandas_eland_series_equal +from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME +from eland.tests.common import TestData, assert_pandas_eland_series_equal class TestSeriesName(TestData): diff --git a/eland/tests/series/test_rename_pytest.py b/eland/tests/series/test_rename_pytest.py index ae970c05..b99b9bb6 100644 --- a/eland/tests/series/test_rename_pytest.py +++ b/eland/tests/series/test_rename_pytest.py @@ -17,10 +17,8 @@ # File called _pytest for PyCharm compatability import eland as ed -from eland.tests import ES_TEST_CLIENT -from eland.tests import FLIGHTS_INDEX_NAME -from eland.tests.common import TestData -from eland.tests.common import assert_pandas_eland_series_equal +from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME +from eland.tests.common import TestData, assert_pandas_eland_series_equal class TestSeriesRename(TestData): diff --git a/eland/tests/series/test_repr_pytest.py b/eland/tests/series/test_repr_pytest.py index f1da73d7..61c103c5 100644 --- a/eland/tests/series/test_repr_pytest.py +++ b/eland/tests/series/test_repr_pytest.py @@ -17,8 +17,7 @@ # File called _pytest for PyCharm compatability import eland as ed -from eland.tests import ES_TEST_CLIENT -from eland.tests import FLIGHTS_INDEX_NAME +from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME from eland.tests.common import TestData diff --git a/eland/tests/series/test_sample_pytest.py b/eland/tests/series/test_sample_pytest.py index 8de43e38..184e4b3f 100644 --- a/eland/tests/series/test_sample_pytest.py +++ b/eland/tests/series/test_sample_pytest.py @@ -17,10 +17,8 @@ # File called _pytest for PyCharm compatibility import eland as ed -from eland.tests import ES_TEST_CLIENT -from eland.tests import FLIGHTS_INDEX_NAME -from eland.tests.common import TestData -from eland.tests.common import assert_pandas_eland_series_equal +from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME +from eland.tests.common import TestData, assert_pandas_eland_series_equal class TestSeriesSample(TestData): diff --git a/eland/tests/setup_tests.py b/eland/tests/setup_tests.py index b858c645..a5075237 100644 --- a/eland/tests/setup_tests.py +++ b/eland/tests/setup_tests.py @@ -18,25 +18,24 @@ import pandas as pd from elasticsearch import helpers +from eland.common import es_version from eland.tests import ( + ECOMMERCE_FILE_NAME, + ECOMMERCE_INDEX_NAME, + ECOMMERCE_MAPPING, + ELASTICSEARCH_HOST, + ES_TEST_CLIENT, FLIGHTS_FILE_NAME, FLIGHTS_INDEX_NAME, + FLIGHTS_MAPPING, FLIGHTS_SMALL_FILE_NAME, FLIGHTS_SMALL_INDEX_NAME, - FLIGHTS_MAPPING, - ECOMMERCE_FILE_NAME, - ECOMMERCE_INDEX_NAME, - ECOMMERCE_MAPPING, TEST_MAPPING1, TEST_MAPPING1_INDEX_NAME, TEST_NESTED_USER_GROUP_DOCS, TEST_NESTED_USER_GROUP_INDEX_NAME, TEST_NESTED_USER_GROUP_MAPPING, - ES_TEST_CLIENT, - ELASTICSEARCH_HOST, ) -from eland.common import es_version - DATA_LIST = [ (FLIGHTS_FILE_NAME, FLIGHTS_INDEX_NAME, FLIGHTS_MAPPING), diff --git a/eland/utils.py b/eland/utils.py index 66bf5862..de63e19e 100644 --- a/eland/utils.py +++ b/eland/utils.py @@ -15,13 +15,13 @@ # specific language governing permissions and limitations # under the License. -import re import functools +import re import warnings -from typing import Callable, TypeVar, Any, Union, List, cast, Collection, Iterable from collections.abc import Collection as ABCCollection -import pandas as pd # type: ignore +from typing import Any, Callable, Collection, Iterable, List, TypeVar, Union, cast +import pandas as pd # type: ignore RT = TypeVar("RT") diff --git a/noxfile.py b/noxfile.py index fe8ec113..52517f53 100644 --- a/noxfile.py +++ b/noxfile.py @@ -18,9 +18,9 @@ import os import subprocess from pathlib import Path -import nox -import elasticsearch +import elasticsearch +import nox BASE_DIR = Path(__file__).parent SOURCE_FILES = ( @@ -57,18 +57,20 @@ @nox.session(reuse_venv=True) -def blacken(session): - session.install("black") +def format(session): + session.install("black", "isort") session.run("python", "utils/license-headers.py", "fix", *SOURCE_FILES) session.run("black", "--target-version=py36", *SOURCE_FILES) + session.run("isort", *SOURCE_FILES) lint(session) @nox.session(reuse_venv=True) def lint(session): - session.install("black", "flake8", "mypy") + session.install("black", "flake8", "mypy", "isort") session.run("python", "utils/license-headers.py", "check", *SOURCE_FILES) session.run("black", "--check", "--target-version=py36", *SOURCE_FILES) + session.run("isort", "--check", *SOURCE_FILES) session.run("flake8", "--ignore=E501,W503,E402,E712,E203", *SOURCE_FILES) # TODO: When all files are typed we can change this to .run("mypy", "--strict", "eland/") diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..c76db01f --- /dev/null +++ b/setup.cfg @@ -0,0 +1,2 @@ +[isort] +profile = black diff --git a/setup.py b/setup.py index 450a7be4..8b36258a 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ from codecs import open from os import path -from setuptools import setup, find_packages +from setuptools import find_packages, setup here = path.abspath(path.dirname(__file__)) about = {} diff --git a/utils/generate-supported-apis.py b/utils/generate-supported-apis.py index 58020da5..ba4b939a 100644 --- a/utils/generate-supported-apis.py +++ b/utils/generate-supported-apis.py @@ -17,12 +17,13 @@ """Script that is used to create the compatibility matrix in the documentation""" -import re -import eland -import pandas import inspect +import re from pathlib import Path +import pandas + +import eland api_docs_dir = Path(__file__).absolute().parent.parent / "docs/source/reference/api" is_supported = [] diff --git a/utils/license-headers.py b/utils/license-headers.py index 993334d9..d501c85e 100644 --- a/utils/license-headers.py +++ b/utils/license-headers.py @@ -23,9 +23,8 @@ import os import sys -from typing import List, Iterator from itertools import chain - +from typing import Iterator, List lines_to_keep = ["# -*- coding: utf-8 -*-\n", "#!/usr/bin/env python\n"] license_header_lines = [