From e2353fe0d99fbf01fed07e12d4dd996ab144aa30 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Tue, 23 Aug 2022 02:35:52 -0700 Subject: [PATCH 01/71] First almost direct port of resolver, dataschema and ColumnSchema --- .../com/whylogs/core/resolvers/Resolver.java | 10 +++ .../whylogs/core/schemas/ColumnSchema.java | 26 ++++++ .../whylogs/core/schemas/DatasetSchema.java | 82 +++++++++++++++++++ 3 files changed, 118 insertions(+) create mode 100644 java/core/src/main/java/com/whylogs/core/resolvers/Resolver.java create mode 100644 java/core/src/main/java/com/whylogs/core/schemas/ColumnSchema.java create mode 100644 java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java diff --git a/java/core/src/main/java/com/whylogs/core/resolvers/Resolver.java b/java/core/src/main/java/com/whylogs/core/resolvers/Resolver.java new file mode 100644 index 0000000000..895080ee05 --- /dev/null +++ b/java/core/src/main/java/com/whylogs/core/resolvers/Resolver.java @@ -0,0 +1,10 @@ +package com.whylogs.core.resolvers; + +import com.whylogs.core.metrics.Metric; +import com.whylogs.core.schemas.ColumnSchema; + +import java.util.HashMap; + +public abstract class Resolver { + public abstract HashMap resolve(ColumnSchema schema); +} diff --git a/java/core/src/main/java/com/whylogs/core/schemas/ColumnSchema.java b/java/core/src/main/java/com/whylogs/core/schemas/ColumnSchema.java new file mode 100644 index 0000000000..ba0057e6b9 --- /dev/null +++ b/java/core/src/main/java/com/whylogs/core/schemas/ColumnSchema.java @@ -0,0 +1,26 @@ +package com.whylogs.core.schemas; + +import com.whylogs.core.metrics.Metric; +import com.whylogs.core.metrics.MetricConfig; +import com.whylogs.core.resolvers.Resolver; +import lombok.Data; + +import java.lang.reflect.Type; +import java.util.HashMap; + +@Data +public class ColumnSchema { + // do I need dtype and mapper? + Type type; + MetricConfig config; + Resolver resolver; + + public ColumnSchema(Type type, MetricConfig config, Resolver resolver) { + this.config = config; + this.resolver = resolver; + } + + public HashMap getMetrics(){ + return this.resolver.resolve(this); + } +} diff --git a/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java b/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java new file mode 100644 index 0000000000..4fb1c68de4 --- /dev/null +++ b/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java @@ -0,0 +1,82 @@ +package com.whylogs.core.schemas; + +import com.whylogs.core.metrics.MetricConfig; +import com.whylogs.core.resolvers.Resolver; +import lombok.Data; + +import java.lang.reflect.Type; +import java.util.HashMap; +import java.util.Optional; +import java.util.Set; + +@Data +public class DatasetSchema { + + private HashMap type = new HashMap<>(); + private final int LARGE_CACHE_SIZE_LIMIT = 1024 * 100; + public HashMap columns; + public MetricConfig defaultConfig; + // TODO: typemapper + public Resolver resolver; + public int cache_size = 1024; + public boolean schema_based_automerge = false; + + public DatasetSchema() { + this.columns = new HashMap<>(); + this.defaultConfig = new MetricConfig(); + } + + public DatasetSchema(int cache_size , boolean schema_based_automerge) { + this.columns = new HashMap<>(); + this.defaultConfig = new MetricConfig(); + this.cache_size = cache_size; + this.schema_based_automerge = schema_based_automerge; + + if(cache_size < 0) { + // TODO: log warning + this.cache_size = 0; + } + + if(cache_size > LARGE_CACHE_SIZE_LIMIT) { + // TODO: log warning + } + + if(!this.type.isEmpty()){ + for(String key : this.type.keySet()){ + this.columns.put(key, new ColumnSchema(this.type.get(key), this.defaultConfig, this.resolver)); + } + } + } + + // TODO: java version of post init + + public DatasetSchema copy() { + DatasetSchema copy = new DatasetSchema(); + // TODO: copy over + + return copy; + } + + public boolean resolve(HashMap data) { + for (String key : data.keySet()) { + if(this.columns.containsKey(key)) { + continue; + } + + this.columns.put(key, new ColumnSchema( + data.get(key).getClass(), + this.defaultConfig, + this.resolver + )); + } + return true; + } + + public Optional get(String name) { + return Optional.ofNullable(this.columns.get(name)); + } + + public Set getColNames() { + return this.columns.keySet(); + } +} From 37afe48bd03985159cf771d8d9b1e6bc796c94ba Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Thu, 25 Aug 2022 17:01:49 -0700 Subject: [PATCH 02/71] Add a factory for the StandardMetrics --- .../core/metrics/StandardMetricFactory.java | 35 +++++++++++++++++++ .../metrics/TestStandardMetricFactory.java | 23 ++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 java/core/src/main/java/com/whylogs/core/metrics/StandardMetricFactory.java create mode 100644 java/core/src/test/java/com/whylogs/core/metrics/TestStandardMetricFactory.java diff --git a/java/core/src/main/java/com/whylogs/core/metrics/StandardMetricFactory.java b/java/core/src/main/java/com/whylogs/core/metrics/StandardMetricFactory.java new file mode 100644 index 0000000000..b5a0c862c3 --- /dev/null +++ b/java/core/src/main/java/com/whylogs/core/metrics/StandardMetricFactory.java @@ -0,0 +1,35 @@ +package com.whylogs.core.metrics; + +public enum StandardMetricFactory { + /* + types { + + }, + distribution{ + + }, + counts{ + + },*/ + ints { + @Override + public IntegralMetric zero(MetricConfig config) { + return IntegralMetric.zero(config); + } + }, + /* + cardinality { + + }, + frequent_items { + + }, + unicode_range { + + }, + condition_count{ + + }*/ + ; + abstract T zero(MetricConfig config); +} diff --git a/java/core/src/test/java/com/whylogs/core/metrics/TestStandardMetricFactory.java b/java/core/src/test/java/com/whylogs/core/metrics/TestStandardMetricFactory.java new file mode 100644 index 0000000000..96121544ae --- /dev/null +++ b/java/core/src/test/java/com/whylogs/core/metrics/TestStandardMetricFactory.java @@ -0,0 +1,23 @@ +package com.whylogs.core.metrics; + +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.lang.reflect.Array; +import java.util.ArrayList; + +@Test +public class TestStandardMetricFactory { + + @Test + public void test_standardMetric(){ + IntegralMetric ints = StandardMetricFactory.ints.zero(new MetricConfig()); + Assert.assertEquals((int) ints.getMaxComponent().getValue(), Integer.MIN_VALUE); + + ArrayList list = new ArrayList<>(); + list.add(ints); + list.add(StandardMetricFactory.ints.zero(new MetricConfig())); + Assert.assertEquals(list.size(), 2); + } + +} From afa2b766f036dac6f5050d02da2e3a30c7f8395f Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Fri, 26 Aug 2022 12:09:23 -0700 Subject: [PATCH 03/71] Add data typing to match types to fractional, integer, and string --- .../main/java/com/whylogs/core/DataTypes.java | 69 +++++++++++++++++++ .../whylogs/core/schemas/TestDataTypes.java | 19 +++++ 2 files changed, 88 insertions(+) create mode 100644 java/core/src/main/java/com/whylogs/core/DataTypes.java create mode 100644 java/core/src/test/java/com/whylogs/core/schemas/TestDataTypes.java diff --git a/java/core/src/main/java/com/whylogs/core/DataTypes.java b/java/core/src/main/java/com/whylogs/core/DataTypes.java new file mode 100644 index 0000000000..60dec77ab9 --- /dev/null +++ b/java/core/src/main/java/com/whylogs/core/DataTypes.java @@ -0,0 +1,69 @@ +package com.whylogs.core; + +import java.lang.reflect.Type; +import java.util.HashSet; + +public enum DataTypes { + Numerical { + @Override + public HashSet getTypes() { + HashSet dataTypes = new HashSet<>(); + dataTypes.add(Long.class); + dataTypes.add(Integer.class); + dataTypes.add(Double.class); + dataTypes.add(Float.class); + return dataTypes; + } + + @Override + public boolean includes(Type type) { + return getTypes().contains(type); + } + }, + Integral { + @Override + public HashSet getTypes() { + HashSet types = new HashSet(); + types.add(Long.class); + types.add(Integer.class); + return types; + } + + @Override + public boolean includes(Type type) { + return getTypes().contains(type); + } + }, + Fractional { + @Override + public HashSet getTypes() { + HashSet types = new HashSet(); + types.add(Double.class); + types.add(Float.class); + return types; + } + + @Override + public boolean includes(Type type) { + return getTypes().contains(type); + } + + }, + String { + @Override + public HashSet getTypes() { + HashSet types = new HashSet(); + types.add(String.class); + return types; + } + + @Override + public boolean includes(Type type) { + return getTypes().contains(type); + } + } + ; + + public abstract HashSet getTypes(); + public abstract boolean includes(Type type); +} diff --git a/java/core/src/test/java/com/whylogs/core/schemas/TestDataTypes.java b/java/core/src/test/java/com/whylogs/core/schemas/TestDataTypes.java new file mode 100644 index 0000000000..f1235f5450 --- /dev/null +++ b/java/core/src/test/java/com/whylogs/core/schemas/TestDataTypes.java @@ -0,0 +1,19 @@ +package com.whylogs.core.schemas; + +import com.whylogs.core.DataTypes; +import org.testng.Assert; +import org.testng.annotations.Test; + +import javax.xml.crypto.Data; + +@Test +public class TestDataTypes { + + @Test + public void test_enum_datatypes() { + DataTypes dataTypes = DataTypes.Integral; + Assert.assertEquals(dataTypes.name(), "Integral"); + Assert.assertTrue(dataTypes.includes(Integer.class)); + Assert.assertFalse(dataTypes.includes(String.class)); + } +} From d5fab50d37222fbbe0503c8bb2da7a46d5b80e25 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Fri, 26 Aug 2022 12:43:33 -0700 Subject: [PATCH 04/71] ColumnSchema and DatasetSchema with tests --- .../whylogs/core/schemas/ColumnSchema.java | 9 ++++--- .../whylogs/core/schemas/DatasetSchema.java | 8 +++--- .../core/schemas/TestColumnSchema.java | 27 +++++++++++++++++++ .../core/schemas/TestDatasetSchema.java | 23 ++++++++++++++++ 4 files changed, 60 insertions(+), 7 deletions(-) create mode 100644 java/core/src/test/java/com/whylogs/core/schemas/TestColumnSchema.java create mode 100644 java/core/src/test/java/com/whylogs/core/schemas/TestDatasetSchema.java diff --git a/java/core/src/main/java/com/whylogs/core/schemas/ColumnSchema.java b/java/core/src/main/java/com/whylogs/core/schemas/ColumnSchema.java index ba0057e6b9..2ac4c86023 100644 --- a/java/core/src/main/java/com/whylogs/core/schemas/ColumnSchema.java +++ b/java/core/src/main/java/com/whylogs/core/schemas/ColumnSchema.java @@ -9,18 +9,21 @@ import java.util.HashMap; @Data -public class ColumnSchema { - // do I need dtype and mapper? +public class ColumnSchema{ + // Thoughts: we could have this ColumnSchema instead of having it as a member + // bu this might be easier to use? If we did we would need to use the CRTP again + // like in Metric to be able to see the type but also have them in a collection togehter Type type; MetricConfig config; Resolver resolver; public ColumnSchema(Type type, MetricConfig config, Resolver resolver) { + this.type = type; this.config = config; this.resolver = resolver; } - public HashMap getMetrics(){ + public HashMap getMetrics(){ return this.resolver.resolve(this); } } diff --git a/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java b/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java index 4fb1c68de4..0a520d3814 100644 --- a/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java +++ b/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java @@ -12,7 +12,7 @@ @Data public class DatasetSchema { - private HashMap type = new HashMap<>(); + private HashMap types = new HashMap<>(); private final int LARGE_CACHE_SIZE_LIMIT = 1024 * 100; public HashMap columns; public MetricConfig defaultConfig; @@ -41,9 +41,9 @@ public DatasetSchema(int cache_size , boolean schema_based_automerge) { // TODO: log warning } - if(!this.type.isEmpty()){ - for(String key : this.type.keySet()){ - this.columns.put(key, new ColumnSchema(this.type.get(key), this.defaultConfig, this.resolver)); + if(!this.types.isEmpty()){ + for(String key : this.types.keySet()){ + this.columns.put(key, new ColumnSchema(this.types.get(key), this.defaultConfig, this.resolver)); } } } diff --git a/java/core/src/test/java/com/whylogs/core/schemas/TestColumnSchema.java b/java/core/src/test/java/com/whylogs/core/schemas/TestColumnSchema.java new file mode 100644 index 0000000000..45ecdf53ea --- /dev/null +++ b/java/core/src/test/java/com/whylogs/core/schemas/TestColumnSchema.java @@ -0,0 +1,27 @@ +package com.whylogs.core.schemas; + +import com.whylogs.core.DataTypes; +import com.whylogs.core.metrics.IntegralMetric; +import com.whylogs.core.metrics.Metric; +import com.whylogs.core.metrics.MetricConfig; +import com.whylogs.core.metrics.StandardMetric; +import com.whylogs.core.resolvers.StandardResolver; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.util.HashMap; + +@Test +public class TestColumnSchema { + + @Test + public void test_column_schema() { + ColumnSchema columnSchema = new ColumnSchema(Integer.class, new MetricConfig(), new StandardResolver()); + HashMap metrics = columnSchema.getMetrics(); + + // TODO: I'm not sure I like this. Might want to rethink the Metric just a little + Assert.assertEquals(metrics.get("ints").getClass(), IntegralMetric.class); + IntegralMetric ints = (IntegralMetric) metrics.get("ints"); + Assert.assertEquals((int) ints.getMaxComponent().getValue(), Integer.MIN_VALUE); + } +} diff --git a/java/core/src/test/java/com/whylogs/core/schemas/TestDatasetSchema.java b/java/core/src/test/java/com/whylogs/core/schemas/TestDatasetSchema.java new file mode 100644 index 0000000000..745a8c451a --- /dev/null +++ b/java/core/src/test/java/com/whylogs/core/schemas/TestDatasetSchema.java @@ -0,0 +1,23 @@ +package com.whylogs.core.schemas; + +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.util.HashMap; + +@Test +public class TestDatasetSchema { + + @Test + public void test_dataset_schema() { + DatasetSchema datasetSchema = new DatasetSchema(); + Assert.assertEquals(datasetSchema.getCache_size(), 1024); + + HashMap data = new HashMap<>(); + data.put("test", 1); + data.put("test2", "2"); + datasetSchema.resolve(data); + Assert.assertEquals(datasetSchema.getColumns().get("test").getType(), Integer.class); + Assert.assertEquals(datasetSchema.getColumns().get("test2").getType(), String.class); + } +} From 0d71bdff0f149f147f7d777fa9dea256df4a2316 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Fri, 26 Aug 2022 12:44:08 -0700 Subject: [PATCH 05/71] Moves file --- .../java/com/whylogs/core/{schemas => }/TestDataTypes.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) rename java/core/src/test/java/com/whylogs/core/{schemas => }/TestDataTypes.java (79%) diff --git a/java/core/src/test/java/com/whylogs/core/schemas/TestDataTypes.java b/java/core/src/test/java/com/whylogs/core/TestDataTypes.java similarity index 79% rename from java/core/src/test/java/com/whylogs/core/schemas/TestDataTypes.java rename to java/core/src/test/java/com/whylogs/core/TestDataTypes.java index f1235f5450..1074973821 100644 --- a/java/core/src/test/java/com/whylogs/core/schemas/TestDataTypes.java +++ b/java/core/src/test/java/com/whylogs/core/TestDataTypes.java @@ -1,10 +1,8 @@ -package com.whylogs.core.schemas; +package com.whylogs.core; -import com.whylogs.core.DataTypes; import org.testng.Assert; import org.testng.annotations.Test; -import javax.xml.crypto.Data; @Test public class TestDataTypes { From 65340f6507f4758e089a8ce4093f4f7f37c99998 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Fri, 26 Aug 2022 12:45:23 -0700 Subject: [PATCH 06/71] Code and testing for StandardMetric (note this is a factory were we will need to add the rest of the metrics that are currently commented out after ints are done) --- .../whylogs/core/metrics/StandardMetric.java | 51 +++++++++++++++++++ .../core/metrics/StandardMetricFactory.java | 35 ------------- .../metrics/TestStandardMetricFactory.java | 6 +-- 3 files changed, 53 insertions(+), 39 deletions(-) create mode 100644 java/core/src/main/java/com/whylogs/core/metrics/StandardMetric.java delete mode 100644 java/core/src/main/java/com/whylogs/core/metrics/StandardMetricFactory.java diff --git a/java/core/src/main/java/com/whylogs/core/metrics/StandardMetric.java b/java/core/src/main/java/com/whylogs/core/metrics/StandardMetric.java new file mode 100644 index 0000000000..6f8ed6f72a --- /dev/null +++ b/java/core/src/main/java/com/whylogs/core/metrics/StandardMetric.java @@ -0,0 +1,51 @@ +package com.whylogs.core.metrics; + +public enum StandardMetric { + /* + types { + + }, + distribution{ + + }, + counts{ + + },*/ + ints { + @Override + public IntegralMetric zero(MetricConfig config) { + return IntegralMetric.zero(config); + } + }, + /* + cardinality { + + }, + frequent_items { + + }, + unicode_range { + + }, + condition_count{ + + }*/ + ; + abstract T zero(MetricConfig config); + + public static T getMetric(String name){ + return StandardMetric.valueOf(name).zero(new MetricConfig()); + } + + public static T getMetric(String name, MetricConfig config){ + return StandardMetric.valueOf(name).zero(config); + } + + public static T getMetric(StandardMetric metric){ + return metric.zero(new MetricConfig()); + } + + public static T getMetric(StandardMetric metric, MetricConfig config){ + return metric.zero(config); + } +} diff --git a/java/core/src/main/java/com/whylogs/core/metrics/StandardMetricFactory.java b/java/core/src/main/java/com/whylogs/core/metrics/StandardMetricFactory.java deleted file mode 100644 index b5a0c862c3..0000000000 --- a/java/core/src/main/java/com/whylogs/core/metrics/StandardMetricFactory.java +++ /dev/null @@ -1,35 +0,0 @@ -package com.whylogs.core.metrics; - -public enum StandardMetricFactory { - /* - types { - - }, - distribution{ - - }, - counts{ - - },*/ - ints { - @Override - public IntegralMetric zero(MetricConfig config) { - return IntegralMetric.zero(config); - } - }, - /* - cardinality { - - }, - frequent_items { - - }, - unicode_range { - - }, - condition_count{ - - }*/ - ; - abstract T zero(MetricConfig config); -} diff --git a/java/core/src/test/java/com/whylogs/core/metrics/TestStandardMetricFactory.java b/java/core/src/test/java/com/whylogs/core/metrics/TestStandardMetricFactory.java index 96121544ae..7713b74441 100644 --- a/java/core/src/test/java/com/whylogs/core/metrics/TestStandardMetricFactory.java +++ b/java/core/src/test/java/com/whylogs/core/metrics/TestStandardMetricFactory.java @@ -3,7 +3,6 @@ import org.testng.Assert; import org.testng.annotations.Test; -import java.lang.reflect.Array; import java.util.ArrayList; @Test @@ -11,13 +10,12 @@ public class TestStandardMetricFactory { @Test public void test_standardMetric(){ - IntegralMetric ints = StandardMetricFactory.ints.zero(new MetricConfig()); + IntegralMetric ints = StandardMetric.ints.zero(new MetricConfig()); Assert.assertEquals((int) ints.getMaxComponent().getValue(), Integer.MIN_VALUE); ArrayList list = new ArrayList<>(); list.add(ints); - list.add(StandardMetricFactory.ints.zero(new MetricConfig())); - Assert.assertEquals(list.size(), 2); + list.add(StandardMetric.ints.zero(new MetricConfig())); } } From 1eea1e7a12ad0a1c8f886a7b08e637b0334720ad Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Fri, 26 Aug 2022 12:51:20 -0700 Subject: [PATCH 07/71] Reolver, StandardResolver, and tests --- .../com/whylogs/core/resolvers/Resolver.java | 7 ++- .../core/resolvers/StandardResolver.java | 54 +++++++++++++++++++ .../core/resolvers/TestStandardResolver.java | 24 +++++++++ 3 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 java/core/src/main/java/com/whylogs/core/resolvers/StandardResolver.java create mode 100644 java/core/src/test/java/com/whylogs/core/resolvers/TestStandardResolver.java diff --git a/java/core/src/main/java/com/whylogs/core/resolvers/Resolver.java b/java/core/src/main/java/com/whylogs/core/resolvers/Resolver.java index 895080ee05..1910a67190 100644 --- a/java/core/src/main/java/com/whylogs/core/resolvers/Resolver.java +++ b/java/core/src/main/java/com/whylogs/core/resolvers/Resolver.java @@ -3,8 +3,13 @@ import com.whylogs.core.metrics.Metric; import com.whylogs.core.schemas.ColumnSchema; +import java.lang.reflect.Type; +import java.util.ArrayList; import java.util.HashMap; public abstract class Resolver { - public abstract HashMap resolve(ColumnSchema schema); + + + + public abstract HashMap resolve(ColumnSchema schema); } diff --git a/java/core/src/main/java/com/whylogs/core/resolvers/StandardResolver.java b/java/core/src/main/java/com/whylogs/core/resolvers/StandardResolver.java new file mode 100644 index 0000000000..e8f7a20bc8 --- /dev/null +++ b/java/core/src/main/java/com/whylogs/core/resolvers/StandardResolver.java @@ -0,0 +1,54 @@ +package com.whylogs.core.resolvers; + +import com.whylogs.core.DataTypes; +import com.whylogs.core.metrics.Metric; +import com.whylogs.core.metrics.StandardMetric; +import com.whylogs.core.schemas.ColumnSchema; + +import java.util.ArrayList; +import java.util.HashMap; + +public class StandardResolver extends Resolver { + public StandardResolver() { + super(); + } + + // TODO: the rest of the metrics need implmeented + // TODO: does this Metric loose it's typing? + @Override + public HashMap resolve(ColumnSchema schema) { + ArrayList standardMetrics = new ArrayList<>(); + // standardMetrics.add(StandardMetric.counts, StandardMetric.types) + + if(DataTypes.Integral.includes(schema.getType())) { + standardMetrics.add(StandardMetric.ints); + // standardMetrics.add(StandardMetric.distribution); + // standardMetrics.add(StandardMetric.cardinality); + // standardMetrics.add(StandardMetric.frequent_items); + } else if (DataTypes.Fractional.includes(schema.getType())) { + // standardMetrics.add(StandardMetric.distribution); + // standardMetrics.add(StandardMetric.cardinality); + } else if (DataTypes.String.includes(schema.getType())) { + // standardMetrics.add(StandardMetric.cardinality); + // standardMetrics.add(StandardMetric.distribution); + // standardMetrics.add(StandardMetric.frequent_items); + + if(schema.getConfig().isTrack_unicode_ranges()) { + // standardMetrics.add(StandardMetric.unicode_range); + } + } + + if(schema.getConfig().isFi_disabled()){ + //standardMetrics.remove(StandardMetric.frequent_items); + } + + HashMap result = new HashMap<>(); + + for(StandardMetric metric : standardMetrics) { + result.put(metric.name(), StandardMetric.getMetric(metric, schema.getConfig())); + } + + return result; + + } +} diff --git a/java/core/src/test/java/com/whylogs/core/resolvers/TestStandardResolver.java b/java/core/src/test/java/com/whylogs/core/resolvers/TestStandardResolver.java new file mode 100644 index 0000000000..4016936c68 --- /dev/null +++ b/java/core/src/test/java/com/whylogs/core/resolvers/TestStandardResolver.java @@ -0,0 +1,24 @@ +package com.whylogs.core.resolvers; + +import com.whylogs.core.metrics.IntegralMetric; +import com.whylogs.core.metrics.Metric; +import com.whylogs.core.metrics.MetricConfig; +import com.whylogs.core.schemas.ColumnSchema; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.util.HashMap; + +@Test +public class TestStandardResolver { + + @Test + public void test_integralInput() { + StandardResolver resolver = new StandardResolver(); + ColumnSchema columnSchema = new ColumnSchema(Integer.class, new MetricConfig(), resolver); + HashMap metrics = resolver.resolve(columnSchema); + Assert.assertEquals(metrics.get("ints").getClass(), IntegralMetric.class); + } + + // TODO: add tests when other metrics get added +} From 0164e93f99731b40066e5899aebd6ef6d55dde1f Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Fri, 26 Aug 2022 12:52:23 -0700 Subject: [PATCH 08/71] Java Linter --- .../main/java/com/whylogs/core/DataTypes.java | 111 +++++++++--------- .../whylogs/core/metrics/StandardMetric.java | 73 ++++++------ .../com/whylogs/core/resolvers/Resolver.java | 7 +- .../core/resolvers/StandardResolver.java | 76 ++++++------ .../whylogs/core/schemas/ColumnSchema.java | 33 +++--- .../whylogs/core/schemas/DatasetSchema.java | 111 +++++++++--------- .../java/com/whylogs/core/TestDataTypes.java | 15 ++- .../metrics/TestStandardMetricFactory.java | 20 ++-- .../core/resolvers/TestStandardResolver.java | 19 ++- .../core/schemas/TestColumnSchema.java | 24 ++-- .../core/schemas/TestDatasetSchema.java | 25 ++-- 11 files changed, 248 insertions(+), 266 deletions(-) diff --git a/java/core/src/main/java/com/whylogs/core/DataTypes.java b/java/core/src/main/java/com/whylogs/core/DataTypes.java index 60dec77ab9..13b74c25c9 100644 --- a/java/core/src/main/java/com/whylogs/core/DataTypes.java +++ b/java/core/src/main/java/com/whylogs/core/DataTypes.java @@ -4,66 +4,65 @@ import java.util.HashSet; public enum DataTypes { - Numerical { - @Override - public HashSet getTypes() { - HashSet dataTypes = new HashSet<>(); - dataTypes.add(Long.class); - dataTypes.add(Integer.class); - dataTypes.add(Double.class); - dataTypes.add(Float.class); - return dataTypes; - } - - @Override - public boolean includes(Type type) { - return getTypes().contains(type); - } - }, - Integral { - @Override - public HashSet getTypes() { - HashSet types = new HashSet(); - types.add(Long.class); - types.add(Integer.class); - return types; - } + Numerical { + @Override + public HashSet getTypes() { + HashSet dataTypes = new HashSet<>(); + dataTypes.add(Long.class); + dataTypes.add(Integer.class); + dataTypes.add(Double.class); + dataTypes.add(Float.class); + return dataTypes; + } - @Override - public boolean includes(Type type) { - return getTypes().contains(type); - } - }, - Fractional { - @Override - public HashSet getTypes() { - HashSet types = new HashSet(); - types.add(Double.class); - types.add(Float.class); - return types; - } + @Override + public boolean includes(Type type) { + return getTypes().contains(type); + } + }, + Integral { + @Override + public HashSet getTypes() { + HashSet types = new HashSet(); + types.add(Long.class); + types.add(Integer.class); + return types; + } - @Override - public boolean includes(Type type) { - return getTypes().contains(type); - } + @Override + public boolean includes(Type type) { + return getTypes().contains(type); + } + }, + Fractional { + @Override + public HashSet getTypes() { + HashSet types = new HashSet(); + types.add(Double.class); + types.add(Float.class); + return types; + } - }, - String { - @Override - public HashSet getTypes() { - HashSet types = new HashSet(); - types.add(String.class); - return types; - } + @Override + public boolean includes(Type type) { + return getTypes().contains(type); + } + }, + String { + @Override + public HashSet getTypes() { + HashSet types = new HashSet(); + types.add(String.class); + return types; + } - @Override - public boolean includes(Type type) { - return getTypes().contains(type); - } + @Override + public boolean includes(Type type) { + return getTypes().contains(type); } - ; + }; + + public abstract HashSet getTypes(); - public abstract HashSet getTypes(); - public abstract boolean includes(Type type); + public abstract boolean includes(Type type); } diff --git a/java/core/src/main/java/com/whylogs/core/metrics/StandardMetric.java b/java/core/src/main/java/com/whylogs/core/metrics/StandardMetric.java index 6f8ed6f72a..d99ebe3594 100644 --- a/java/core/src/main/java/com/whylogs/core/metrics/StandardMetric.java +++ b/java/core/src/main/java/com/whylogs/core/metrics/StandardMetric.java @@ -1,51 +1,52 @@ package com.whylogs.core.metrics; public enum StandardMetric { - /* - types { + /* + types { - }, - distribution{ + }, + distribution{ - }, - counts{ + }, + counts{ - },*/ - ints { - @Override - public IntegralMetric zero(MetricConfig config) { - return IntegralMetric.zero(config); - } - }, - /* - cardinality { + },*/ + ints { + @Override + public IntegralMetric zero(MetricConfig config) { + return IntegralMetric.zero(config); + } + }, +/* +cardinality { - }, - frequent_items { +}, +frequent_items { - }, - unicode_range { +}, +unicode_range { - }, - condition_count{ +}, +condition_count{ - }*/ - ; - abstract T zero(MetricConfig config); +}*/ +; - public static T getMetric(String name){ - return StandardMetric.valueOf(name).zero(new MetricConfig()); - } + abstract T zero(MetricConfig config); - public static T getMetric(String name, MetricConfig config){ - return StandardMetric.valueOf(name).zero(config); - } + public static T getMetric(String name) { + return StandardMetric.valueOf(name).zero(new MetricConfig()); + } - public static T getMetric(StandardMetric metric){ - return metric.zero(new MetricConfig()); - } + public static T getMetric(String name, MetricConfig config) { + return StandardMetric.valueOf(name).zero(config); + } - public static T getMetric(StandardMetric metric, MetricConfig config){ - return metric.zero(config); - } + public static T getMetric(StandardMetric metric) { + return metric.zero(new MetricConfig()); + } + + public static T getMetric(StandardMetric metric, MetricConfig config) { + return metric.zero(config); + } } diff --git a/java/core/src/main/java/com/whylogs/core/resolvers/Resolver.java b/java/core/src/main/java/com/whylogs/core/resolvers/Resolver.java index 1910a67190..93e5350d75 100644 --- a/java/core/src/main/java/com/whylogs/core/resolvers/Resolver.java +++ b/java/core/src/main/java/com/whylogs/core/resolvers/Resolver.java @@ -2,14 +2,9 @@ import com.whylogs.core.metrics.Metric; import com.whylogs.core.schemas.ColumnSchema; - -import java.lang.reflect.Type; -import java.util.ArrayList; import java.util.HashMap; public abstract class Resolver { - - - public abstract HashMap resolve(ColumnSchema schema); + public abstract HashMap resolve(ColumnSchema schema); } diff --git a/java/core/src/main/java/com/whylogs/core/resolvers/StandardResolver.java b/java/core/src/main/java/com/whylogs/core/resolvers/StandardResolver.java index e8f7a20bc8..03098acd6e 100644 --- a/java/core/src/main/java/com/whylogs/core/resolvers/StandardResolver.java +++ b/java/core/src/main/java/com/whylogs/core/resolvers/StandardResolver.java @@ -4,51 +4,49 @@ import com.whylogs.core.metrics.Metric; import com.whylogs.core.metrics.StandardMetric; import com.whylogs.core.schemas.ColumnSchema; - import java.util.ArrayList; import java.util.HashMap; public class StandardResolver extends Resolver { - public StandardResolver() { - super(); + public StandardResolver() { + super(); + } + + // TODO: the rest of the metrics need implmeented + // TODO: does this Metric loose it's typing? + @Override + public HashMap resolve(ColumnSchema schema) { + ArrayList standardMetrics = new ArrayList<>(); + // standardMetrics.add(StandardMetric.counts, StandardMetric.types) + + if (DataTypes.Integral.includes(schema.getType())) { + standardMetrics.add(StandardMetric.ints); + // standardMetrics.add(StandardMetric.distribution); + // standardMetrics.add(StandardMetric.cardinality); + // standardMetrics.add(StandardMetric.frequent_items); + } else if (DataTypes.Fractional.includes(schema.getType())) { + // standardMetrics.add(StandardMetric.distribution); + // standardMetrics.add(StandardMetric.cardinality); + } else if (DataTypes.String.includes(schema.getType())) { + // standardMetrics.add(StandardMetric.cardinality); + // standardMetrics.add(StandardMetric.distribution); + // standardMetrics.add(StandardMetric.frequent_items); + + if (schema.getConfig().isTrack_unicode_ranges()) { + // standardMetrics.add(StandardMetric.unicode_range); + } + } + + if (schema.getConfig().isFi_disabled()) { + // standardMetrics.remove(StandardMetric.frequent_items); } - // TODO: the rest of the metrics need implmeented - // TODO: does this Metric loose it's typing? - @Override - public HashMap resolve(ColumnSchema schema) { - ArrayList standardMetrics = new ArrayList<>(); - // standardMetrics.add(StandardMetric.counts, StandardMetric.types) - - if(DataTypes.Integral.includes(schema.getType())) { - standardMetrics.add(StandardMetric.ints); - // standardMetrics.add(StandardMetric.distribution); - // standardMetrics.add(StandardMetric.cardinality); - // standardMetrics.add(StandardMetric.frequent_items); - } else if (DataTypes.Fractional.includes(schema.getType())) { - // standardMetrics.add(StandardMetric.distribution); - // standardMetrics.add(StandardMetric.cardinality); - } else if (DataTypes.String.includes(schema.getType())) { - // standardMetrics.add(StandardMetric.cardinality); - // standardMetrics.add(StandardMetric.distribution); - // standardMetrics.add(StandardMetric.frequent_items); - - if(schema.getConfig().isTrack_unicode_ranges()) { - // standardMetrics.add(StandardMetric.unicode_range); - } - } - - if(schema.getConfig().isFi_disabled()){ - //standardMetrics.remove(StandardMetric.frequent_items); - } - - HashMap result = new HashMap<>(); - - for(StandardMetric metric : standardMetrics) { - result.put(metric.name(), StandardMetric.getMetric(metric, schema.getConfig())); - } - - return result; + HashMap result = new HashMap<>(); + for (StandardMetric metric : standardMetrics) { + result.put(metric.name(), StandardMetric.getMetric(metric, schema.getConfig())); } + + return result; + } } diff --git a/java/core/src/main/java/com/whylogs/core/schemas/ColumnSchema.java b/java/core/src/main/java/com/whylogs/core/schemas/ColumnSchema.java index 2ac4c86023..d09a000660 100644 --- a/java/core/src/main/java/com/whylogs/core/schemas/ColumnSchema.java +++ b/java/core/src/main/java/com/whylogs/core/schemas/ColumnSchema.java @@ -3,27 +3,26 @@ import com.whylogs.core.metrics.Metric; import com.whylogs.core.metrics.MetricConfig; import com.whylogs.core.resolvers.Resolver; -import lombok.Data; - import java.lang.reflect.Type; import java.util.HashMap; +import lombok.Data; @Data -public class ColumnSchema{ - // Thoughts: we could have this ColumnSchema instead of having it as a member - // bu this might be easier to use? If we did we would need to use the CRTP again - // like in Metric to be able to see the type but also have them in a collection togehter - Type type; - MetricConfig config; - Resolver resolver; +public class ColumnSchema { + // Thoughts: we could have this ColumnSchema instead of having it as a member + // bu this might be easier to use? If we did we would need to use the CRTP again + // like in Metric to be able to see the type but also have them in a collection togehter + Type type; + MetricConfig config; + Resolver resolver; - public ColumnSchema(Type type, MetricConfig config, Resolver resolver) { - this.type = type; - this.config = config; - this.resolver = resolver; - } + public ColumnSchema(Type type, MetricConfig config, Resolver resolver) { + this.type = type; + this.config = config; + this.resolver = resolver; + } - public HashMap getMetrics(){ - return this.resolver.resolve(this); - } + public HashMap getMetrics() { + return this.resolver.resolve(this); + } } diff --git a/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java b/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java index 0a520d3814..cc0295c8c5 100644 --- a/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java +++ b/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java @@ -2,81 +2,78 @@ import com.whylogs.core.metrics.MetricConfig; import com.whylogs.core.resolvers.Resolver; -import lombok.Data; - import java.lang.reflect.Type; import java.util.HashMap; import java.util.Optional; import java.util.Set; +import lombok.Data; @Data public class DatasetSchema { - private HashMap types = new HashMap<>(); - private final int LARGE_CACHE_SIZE_LIMIT = 1024 * 100; - public HashMap columns; - public MetricConfig defaultConfig; - // TODO: typemapper - public Resolver resolver; - public int cache_size = 1024; - public boolean schema_based_automerge = false; - - public DatasetSchema() { - this.columns = new HashMap<>(); - this.defaultConfig = new MetricConfig(); + private HashMap types = new HashMap<>(); + private final int LARGE_CACHE_SIZE_LIMIT = 1024 * 100; + public HashMap columns; + public MetricConfig defaultConfig; + // TODO: typemapper + public Resolver resolver; + public int cache_size = 1024; + public boolean schema_based_automerge = false; + + public DatasetSchema() { + this.columns = new HashMap<>(); + this.defaultConfig = new MetricConfig(); + } + + public DatasetSchema(int cache_size, boolean schema_based_automerge) { + this.columns = new HashMap<>(); + this.defaultConfig = new MetricConfig(); + this.cache_size = cache_size; + this.schema_based_automerge = schema_based_automerge; + + if (cache_size < 0) { + // TODO: log warning + this.cache_size = 0; } - public DatasetSchema(int cache_size , boolean schema_based_automerge) { - this.columns = new HashMap<>(); - this.defaultConfig = new MetricConfig(); - this.cache_size = cache_size; - this.schema_based_automerge = schema_based_automerge; - - if(cache_size < 0) { - // TODO: log warning - this.cache_size = 0; - } - - if(cache_size > LARGE_CACHE_SIZE_LIMIT) { - // TODO: log warning - } + if (cache_size > LARGE_CACHE_SIZE_LIMIT) { + // TODO: log warning + } - if(!this.types.isEmpty()){ - for(String key : this.types.keySet()){ - this.columns.put(key, new ColumnSchema(this.types.get(key), this.defaultConfig, this.resolver)); - } - } + if (!this.types.isEmpty()) { + for (String key : this.types.keySet()) { + this.columns.put( + key, new ColumnSchema(this.types.get(key), this.defaultConfig, this.resolver)); + } } + } - // TODO: java version of post init + // TODO: java version of post init - public DatasetSchema copy() { - DatasetSchema copy = new DatasetSchema(); - // TODO: copy over + public DatasetSchema copy() { + DatasetSchema copy = new DatasetSchema(); + // TODO: copy over - return copy; - } + return copy; + } - public boolean resolve(HashMap data) { - for (String key : data.keySet()) { - if(this.columns.containsKey(key)) { - continue; - } + public boolean resolve(HashMap data) { + for (String key : data.keySet()) { + if (this.columns.containsKey(key)) { + continue; + } - this.columns.put(key, new ColumnSchema( - data.get(key).getClass(), - this.defaultConfig, - this.resolver - )); - } - return true; + this.columns.put( + key, new ColumnSchema(data.get(key).getClass(), this.defaultConfig, this.resolver)); } + return true; + } - public Optional get(String name) { - return Optional.ofNullable(this.columns.get(name)); - } + public Optional get(String name) { + return Optional.ofNullable(this.columns.get(name)); + } - public Set getColNames() { - return this.columns.keySet(); - } + public Set getColNames() { + return this.columns.keySet(); + } } diff --git a/java/core/src/test/java/com/whylogs/core/TestDataTypes.java b/java/core/src/test/java/com/whylogs/core/TestDataTypes.java index 1074973821..01a39ce49d 100644 --- a/java/core/src/test/java/com/whylogs/core/TestDataTypes.java +++ b/java/core/src/test/java/com/whylogs/core/TestDataTypes.java @@ -3,15 +3,14 @@ import org.testng.Assert; import org.testng.annotations.Test; - @Test public class TestDataTypes { - @Test - public void test_enum_datatypes() { - DataTypes dataTypes = DataTypes.Integral; - Assert.assertEquals(dataTypes.name(), "Integral"); - Assert.assertTrue(dataTypes.includes(Integer.class)); - Assert.assertFalse(dataTypes.includes(String.class)); - } + @Test + public void test_enum_datatypes() { + DataTypes dataTypes = DataTypes.Integral; + Assert.assertEquals(dataTypes.name(), "Integral"); + Assert.assertTrue(dataTypes.includes(Integer.class)); + Assert.assertFalse(dataTypes.includes(String.class)); + } } diff --git a/java/core/src/test/java/com/whylogs/core/metrics/TestStandardMetricFactory.java b/java/core/src/test/java/com/whylogs/core/metrics/TestStandardMetricFactory.java index 7713b74441..9b739c0508 100644 --- a/java/core/src/test/java/com/whylogs/core/metrics/TestStandardMetricFactory.java +++ b/java/core/src/test/java/com/whylogs/core/metrics/TestStandardMetricFactory.java @@ -1,21 +1,19 @@ package com.whylogs.core.metrics; +import java.util.ArrayList; import org.testng.Assert; import org.testng.annotations.Test; -import java.util.ArrayList; - @Test public class TestStandardMetricFactory { - @Test - public void test_standardMetric(){ - IntegralMetric ints = StandardMetric.ints.zero(new MetricConfig()); - Assert.assertEquals((int) ints.getMaxComponent().getValue(), Integer.MIN_VALUE); - - ArrayList list = new ArrayList<>(); - list.add(ints); - list.add(StandardMetric.ints.zero(new MetricConfig())); - } + @Test + public void test_standardMetric() { + IntegralMetric ints = StandardMetric.ints.zero(new MetricConfig()); + Assert.assertEquals((int) ints.getMaxComponent().getValue(), Integer.MIN_VALUE); + ArrayList list = new ArrayList<>(); + list.add(ints); + list.add(StandardMetric.ints.zero(new MetricConfig())); + } } diff --git a/java/core/src/test/java/com/whylogs/core/resolvers/TestStandardResolver.java b/java/core/src/test/java/com/whylogs/core/resolvers/TestStandardResolver.java index 4016936c68..b71ada5868 100644 --- a/java/core/src/test/java/com/whylogs/core/resolvers/TestStandardResolver.java +++ b/java/core/src/test/java/com/whylogs/core/resolvers/TestStandardResolver.java @@ -4,21 +4,20 @@ import com.whylogs.core.metrics.Metric; import com.whylogs.core.metrics.MetricConfig; import com.whylogs.core.schemas.ColumnSchema; +import java.util.HashMap; import org.testng.Assert; import org.testng.annotations.Test; -import java.util.HashMap; - @Test public class TestStandardResolver { - @Test - public void test_integralInput() { - StandardResolver resolver = new StandardResolver(); - ColumnSchema columnSchema = new ColumnSchema(Integer.class, new MetricConfig(), resolver); - HashMap metrics = resolver.resolve(columnSchema); - Assert.assertEquals(metrics.get("ints").getClass(), IntegralMetric.class); - } + @Test + public void test_integralInput() { + StandardResolver resolver = new StandardResolver(); + ColumnSchema columnSchema = new ColumnSchema(Integer.class, new MetricConfig(), resolver); + HashMap metrics = resolver.resolve(columnSchema); + Assert.assertEquals(metrics.get("ints").getClass(), IntegralMetric.class); + } - // TODO: add tests when other metrics get added + // TODO: add tests when other metrics get added } diff --git a/java/core/src/test/java/com/whylogs/core/schemas/TestColumnSchema.java b/java/core/src/test/java/com/whylogs/core/schemas/TestColumnSchema.java index 45ecdf53ea..9654651a43 100644 --- a/java/core/src/test/java/com/whylogs/core/schemas/TestColumnSchema.java +++ b/java/core/src/test/java/com/whylogs/core/schemas/TestColumnSchema.java @@ -1,27 +1,25 @@ package com.whylogs.core.schemas; -import com.whylogs.core.DataTypes; import com.whylogs.core.metrics.IntegralMetric; import com.whylogs.core.metrics.Metric; import com.whylogs.core.metrics.MetricConfig; -import com.whylogs.core.metrics.StandardMetric; import com.whylogs.core.resolvers.StandardResolver; +import java.util.HashMap; import org.testng.Assert; import org.testng.annotations.Test; -import java.util.HashMap; - @Test public class TestColumnSchema { - @Test - public void test_column_schema() { - ColumnSchema columnSchema = new ColumnSchema(Integer.class, new MetricConfig(), new StandardResolver()); - HashMap metrics = columnSchema.getMetrics(); + @Test + public void test_column_schema() { + ColumnSchema columnSchema = + new ColumnSchema(Integer.class, new MetricConfig(), new StandardResolver()); + HashMap metrics = columnSchema.getMetrics(); - // TODO: I'm not sure I like this. Might want to rethink the Metric just a little - Assert.assertEquals(metrics.get("ints").getClass(), IntegralMetric.class); - IntegralMetric ints = (IntegralMetric) metrics.get("ints"); - Assert.assertEquals((int) ints.getMaxComponent().getValue(), Integer.MIN_VALUE); - } + // TODO: I'm not sure I like this. Might want to rethink the Metric just a little + Assert.assertEquals(metrics.get("ints").getClass(), IntegralMetric.class); + IntegralMetric ints = (IntegralMetric) metrics.get("ints"); + Assert.assertEquals((int) ints.getMaxComponent().getValue(), Integer.MIN_VALUE); + } } diff --git a/java/core/src/test/java/com/whylogs/core/schemas/TestDatasetSchema.java b/java/core/src/test/java/com/whylogs/core/schemas/TestDatasetSchema.java index 745a8c451a..ab6a4e1492 100644 --- a/java/core/src/test/java/com/whylogs/core/schemas/TestDatasetSchema.java +++ b/java/core/src/test/java/com/whylogs/core/schemas/TestDatasetSchema.java @@ -1,23 +1,22 @@ package com.whylogs.core.schemas; +import java.util.HashMap; import org.testng.Assert; import org.testng.annotations.Test; -import java.util.HashMap; - @Test public class TestDatasetSchema { - @Test - public void test_dataset_schema() { - DatasetSchema datasetSchema = new DatasetSchema(); - Assert.assertEquals(datasetSchema.getCache_size(), 1024); + @Test + public void test_dataset_schema() { + DatasetSchema datasetSchema = new DatasetSchema(); + Assert.assertEquals(datasetSchema.getCache_size(), 1024); - HashMap data = new HashMap<>(); - data.put("test", 1); - data.put("test2", "2"); - datasetSchema.resolve(data); - Assert.assertEquals(datasetSchema.getColumns().get("test").getType(), Integer.class); - Assert.assertEquals(datasetSchema.getColumns().get("test2").getType(), String.class); - } + HashMap data = new HashMap<>(); + data.put("test", 1); + data.put("test2", "2"); + datasetSchema.resolve(data); + Assert.assertEquals(datasetSchema.getColumns().get("test").getType(), Integer.class); + Assert.assertEquals(datasetSchema.getColumns().get("test2").getType(), String.class); + } } From 4e9cf91c61735d56655d141db9ef497ca24267cd Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Tue, 30 Aug 2022 14:40:49 -0700 Subject: [PATCH 09/71] Adding in privacy and an lombrok constructor instead --- .../com/whylogs/core/schemas/ColumnSchema.java | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/java/core/src/main/java/com/whylogs/core/schemas/ColumnSchema.java b/java/core/src/main/java/com/whylogs/core/schemas/ColumnSchema.java index d09a000660..7a6733e240 100644 --- a/java/core/src/main/java/com/whylogs/core/schemas/ColumnSchema.java +++ b/java/core/src/main/java/com/whylogs/core/schemas/ColumnSchema.java @@ -5,22 +5,18 @@ import com.whylogs.core.resolvers.Resolver; import java.lang.reflect.Type; import java.util.HashMap; + +import lombok.AllArgsConstructor; import lombok.Data; -@Data +@Data @AllArgsConstructor public class ColumnSchema { // Thoughts: we could have this ColumnSchema instead of having it as a member // bu this might be easier to use? If we did we would need to use the CRTP again // like in Metric to be able to see the type but also have them in a collection togehter - Type type; - MetricConfig config; - Resolver resolver; - - public ColumnSchema(Type type, MetricConfig config, Resolver resolver) { - this.type = type; - this.config = config; - this.resolver = resolver; - } + private Type type; + private MetricConfig config; + private Resolver resolver; public HashMap getMetrics() { return this.resolver.resolve(this); From f7dc0a91ff54363883c4b5fe279a31146fcf1291 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Tue, 30 Aug 2022 15:32:20 -0700 Subject: [PATCH 10/71] Changes based off review --- .../com/whylogs/core/resolvers/Resolver.java | 2 +- .../core/resolvers/StandardResolver.java | 2 +- .../com/whylogs/core/schemas/ColumnSchema.java | 6 +++--- .../whylogs/core/schemas/DatasetSchema.java | 18 ++++++++---------- .../core/resolvers/TestStandardResolver.java | 2 +- 5 files changed, 14 insertions(+), 16 deletions(-) diff --git a/java/core/src/main/java/com/whylogs/core/resolvers/Resolver.java b/java/core/src/main/java/com/whylogs/core/resolvers/Resolver.java index 93e5350d75..25fd280481 100644 --- a/java/core/src/main/java/com/whylogs/core/resolvers/Resolver.java +++ b/java/core/src/main/java/com/whylogs/core/resolvers/Resolver.java @@ -6,5 +6,5 @@ public abstract class Resolver { - public abstract HashMap resolve(ColumnSchema schema); + public abstract HashMap resolve(ColumnSchema schema); } diff --git a/java/core/src/main/java/com/whylogs/core/resolvers/StandardResolver.java b/java/core/src/main/java/com/whylogs/core/resolvers/StandardResolver.java index 03098acd6e..bf2c1f27c0 100644 --- a/java/core/src/main/java/com/whylogs/core/resolvers/StandardResolver.java +++ b/java/core/src/main/java/com/whylogs/core/resolvers/StandardResolver.java @@ -15,7 +15,7 @@ public StandardResolver() { // TODO: the rest of the metrics need implmeented // TODO: does this Metric loose it's typing? @Override - public HashMap resolve(ColumnSchema schema) { + public HashMap resolve(ColumnSchema schema) { ArrayList standardMetrics = new ArrayList<>(); // standardMetrics.add(StandardMetric.counts, StandardMetric.types) diff --git a/java/core/src/main/java/com/whylogs/core/schemas/ColumnSchema.java b/java/core/src/main/java/com/whylogs/core/schemas/ColumnSchema.java index 7a6733e240..9a1ef50709 100644 --- a/java/core/src/main/java/com/whylogs/core/schemas/ColumnSchema.java +++ b/java/core/src/main/java/com/whylogs/core/schemas/ColumnSchema.java @@ -5,11 +5,11 @@ import com.whylogs.core.resolvers.Resolver; import java.lang.reflect.Type; import java.util.HashMap; - import lombok.AllArgsConstructor; import lombok.Data; -@Data @AllArgsConstructor +@Data +@AllArgsConstructor public class ColumnSchema { // Thoughts: we could have this ColumnSchema instead of having it as a member // bu this might be easier to use? If we did we would need to use the CRTP again @@ -18,7 +18,7 @@ public class ColumnSchema { private MetricConfig config; private Resolver resolver; - public HashMap getMetrics() { + public HashMap getMetrics() { return this.resolver.resolve(this); } } diff --git a/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java b/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java index cc0295c8c5..c79fa53a28 100644 --- a/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java +++ b/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java @@ -10,12 +10,10 @@ @Data public class DatasetSchema { - private HashMap types = new HashMap<>(); private final int LARGE_CACHE_SIZE_LIMIT = 1024 * 100; public HashMap columns; public MetricConfig defaultConfig; - // TODO: typemapper public Resolver resolver; public int cache_size = 1024; public boolean schema_based_automerge = false; @@ -40,31 +38,31 @@ public DatasetSchema(int cache_size, boolean schema_based_automerge) { // TODO: log warning } + // Type name will be used as the column name if (!this.types.isEmpty()) { - for (String key : this.types.keySet()) { + for (String typeName : this.types.keySet()) { this.columns.put( - key, new ColumnSchema(this.types.get(key), this.defaultConfig, this.resolver)); + typeName, + new ColumnSchema(this.types.get(typeName), this.defaultConfig, this.resolver)); } } } - // TODO: java version of post init - public DatasetSchema copy() { DatasetSchema copy = new DatasetSchema(); // TODO: copy over - return copy; } public boolean resolve(HashMap data) { - for (String key : data.keySet()) { - if (this.columns.containsKey(key)) { + for (String columnName : data.keySet()) { + if (this.columns.containsKey(columnName)) { continue; } this.columns.put( - key, new ColumnSchema(data.get(key).getClass(), this.defaultConfig, this.resolver)); + columnName, + new ColumnSchema(data.get(columnName).getClass(), this.defaultConfig, this.resolver)); } return true; } diff --git a/java/core/src/test/java/com/whylogs/core/resolvers/TestStandardResolver.java b/java/core/src/test/java/com/whylogs/core/resolvers/TestStandardResolver.java index b71ada5868..2fd891031c 100644 --- a/java/core/src/test/java/com/whylogs/core/resolvers/TestStandardResolver.java +++ b/java/core/src/test/java/com/whylogs/core/resolvers/TestStandardResolver.java @@ -15,7 +15,7 @@ public class TestStandardResolver { public void test_integralInput() { StandardResolver resolver = new StandardResolver(); ColumnSchema columnSchema = new ColumnSchema(Integer.class, new MetricConfig(), resolver); - HashMap metrics = resolver.resolve(columnSchema); + HashMap metrics = resolver.resolve(columnSchema); Assert.assertEquals(metrics.get("ints").getClass(), IntegralMetric.class); } From 96f02328f79d264d53534252d80ee13af9e27d56 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Thu, 1 Sep 2022 14:17:53 -0700 Subject: [PATCH 11/71] Refactor to not use the StandardMetric.java partial factory. Allow resolver to directly call zero() as it builds instead of reflection or through another point --- .../whylogs/core/metrics/StandardMetric.java | 52 ------------------- .../core/resolvers/StandardResolver.java | 34 ++++-------- .../metrics/TestStandardMetricFactory.java | 19 ------- 3 files changed, 9 insertions(+), 96 deletions(-) delete mode 100644 java/core/src/main/java/com/whylogs/core/metrics/StandardMetric.java delete mode 100644 java/core/src/test/java/com/whylogs/core/metrics/TestStandardMetricFactory.java diff --git a/java/core/src/main/java/com/whylogs/core/metrics/StandardMetric.java b/java/core/src/main/java/com/whylogs/core/metrics/StandardMetric.java deleted file mode 100644 index d99ebe3594..0000000000 --- a/java/core/src/main/java/com/whylogs/core/metrics/StandardMetric.java +++ /dev/null @@ -1,52 +0,0 @@ -package com.whylogs.core.metrics; - -public enum StandardMetric { - /* - types { - - }, - distribution{ - - }, - counts{ - - },*/ - ints { - @Override - public IntegralMetric zero(MetricConfig config) { - return IntegralMetric.zero(config); - } - }, -/* -cardinality { - -}, -frequent_items { - -}, -unicode_range { - -}, -condition_count{ - -}*/ -; - - abstract T zero(MetricConfig config); - - public static T getMetric(String name) { - return StandardMetric.valueOf(name).zero(new MetricConfig()); - } - - public static T getMetric(String name, MetricConfig config) { - return StandardMetric.valueOf(name).zero(config); - } - - public static T getMetric(StandardMetric metric) { - return metric.zero(new MetricConfig()); - } - - public static T getMetric(StandardMetric metric, MetricConfig config) { - return metric.zero(config); - } -} diff --git a/java/core/src/main/java/com/whylogs/core/resolvers/StandardResolver.java b/java/core/src/main/java/com/whylogs/core/resolvers/StandardResolver.java index bf2c1f27c0..14a16b0a4b 100644 --- a/java/core/src/main/java/com/whylogs/core/resolvers/StandardResolver.java +++ b/java/core/src/main/java/com/whylogs/core/resolvers/StandardResolver.java @@ -1,10 +1,11 @@ package com.whylogs.core.resolvers; import com.whylogs.core.DataTypes; +import com.whylogs.core.metrics.IntegralMetric; import com.whylogs.core.metrics.Metric; -import com.whylogs.core.metrics.StandardMetric; import com.whylogs.core.schemas.ColumnSchema; -import java.util.ArrayList; +import org.apache.commons.lang3.NotImplementedException; + import java.util.HashMap; public class StandardResolver extends Resolver { @@ -13,40 +14,23 @@ public StandardResolver() { } // TODO: the rest of the metrics need implmeented - // TODO: does this Metric loose it's typing? @Override public HashMap resolve(ColumnSchema schema) { - ArrayList standardMetrics = new ArrayList<>(); - // standardMetrics.add(StandardMetric.counts, StandardMetric.types) + HashMap resolvedMetrics = new HashMap<>(); if (DataTypes.Integral.includes(schema.getType())) { - standardMetrics.add(StandardMetric.ints); - // standardMetrics.add(StandardMetric.distribution); - // standardMetrics.add(StandardMetric.cardinality); - // standardMetrics.add(StandardMetric.frequent_items); + resolvedMetrics.put(IntegralMetric.NAMESPACE, IntegralMetric.zero(schema.getConfig())); } else if (DataTypes.Fractional.includes(schema.getType())) { - // standardMetrics.add(StandardMetric.distribution); - // standardMetrics.add(StandardMetric.cardinality); + throw new NotImplementedException("Fractional metrics not implemented"); } else if (DataTypes.String.includes(schema.getType())) { - // standardMetrics.add(StandardMetric.cardinality); - // standardMetrics.add(StandardMetric.distribution); - // standardMetrics.add(StandardMetric.frequent_items); - if (schema.getConfig().isTrack_unicode_ranges()) { - // standardMetrics.add(StandardMetric.unicode_range); + throw new NotImplementedException("String & Unicode metrics not implemented"); } } if (schema.getConfig().isFi_disabled()) { - // standardMetrics.remove(StandardMetric.frequent_items); - } - - HashMap result = new HashMap<>(); - - for (StandardMetric metric : standardMetrics) { - result.put(metric.name(), StandardMetric.getMetric(metric, schema.getConfig())); + throw new NotImplementedException("Frequent Items metrics not implemented"); } - - return result; + return resolvedMetrics; } } diff --git a/java/core/src/test/java/com/whylogs/core/metrics/TestStandardMetricFactory.java b/java/core/src/test/java/com/whylogs/core/metrics/TestStandardMetricFactory.java deleted file mode 100644 index 9b739c0508..0000000000 --- a/java/core/src/test/java/com/whylogs/core/metrics/TestStandardMetricFactory.java +++ /dev/null @@ -1,19 +0,0 @@ -package com.whylogs.core.metrics; - -import java.util.ArrayList; -import org.testng.Assert; -import org.testng.annotations.Test; - -@Test -public class TestStandardMetricFactory { - - @Test - public void test_standardMetric() { - IntegralMetric ints = StandardMetric.ints.zero(new MetricConfig()); - Assert.assertEquals((int) ints.getMaxComponent().getValue(), Integer.MIN_VALUE); - - ArrayList list = new ArrayList<>(); - list.add(ints); - list.add(StandardMetric.ints.zero(new MetricConfig())); - } -} From 1bb305e908d6d6dc71df5b40f20580c12a782aad Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Thu, 1 Sep 2022 14:23:58 -0700 Subject: [PATCH 12/71] Updates data schema to show dirty if there is a change to the columns and if not then true for no columns added --- .../java/com/whylogs/core/resolvers/StandardResolver.java | 3 +-- .../main/java/com/whylogs/core/schemas/DatasetSchema.java | 5 ++++- .../java/com/whylogs/core/schemas/TestDatasetSchema.java | 4 +++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/java/core/src/main/java/com/whylogs/core/resolvers/StandardResolver.java b/java/core/src/main/java/com/whylogs/core/resolvers/StandardResolver.java index 14a16b0a4b..3b8ffede04 100644 --- a/java/core/src/main/java/com/whylogs/core/resolvers/StandardResolver.java +++ b/java/core/src/main/java/com/whylogs/core/resolvers/StandardResolver.java @@ -4,9 +4,8 @@ import com.whylogs.core.metrics.IntegralMetric; import com.whylogs.core.metrics.Metric; import com.whylogs.core.schemas.ColumnSchema; -import org.apache.commons.lang3.NotImplementedException; - import java.util.HashMap; +import org.apache.commons.lang3.NotImplementedException; public class StandardResolver extends Resolver { public StandardResolver() { diff --git a/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java b/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java index c79fa53a28..06a514762b 100644 --- a/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java +++ b/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java @@ -55,6 +55,7 @@ public DatasetSchema copy() { } public boolean resolve(HashMap data) { + boolean dirty = false; for (String columnName : data.keySet()) { if (this.columns.containsKey(columnName)) { continue; @@ -63,8 +64,10 @@ public boolean resolve(HashMap data) { this.columns.put( columnName, new ColumnSchema(data.get(columnName).getClass(), this.defaultConfig, this.resolver)); + + dirty = true; } - return true; + return dirty; } public Optional get(String name) { diff --git a/java/core/src/test/java/com/whylogs/core/schemas/TestDatasetSchema.java b/java/core/src/test/java/com/whylogs/core/schemas/TestDatasetSchema.java index ab6a4e1492..3f9970a6ae 100644 --- a/java/core/src/test/java/com/whylogs/core/schemas/TestDatasetSchema.java +++ b/java/core/src/test/java/com/whylogs/core/schemas/TestDatasetSchema.java @@ -15,8 +15,10 @@ public void test_dataset_schema() { HashMap data = new HashMap<>(); data.put("test", 1); data.put("test2", "2"); - datasetSchema.resolve(data); + Assert.assertTrue(datasetSchema.resolve(data)); Assert.assertEquals(datasetSchema.getColumns().get("test").getType(), Integer.class); Assert.assertEquals(datasetSchema.getColumns().get("test2").getType(), String.class); + + Assert.assertFalse(datasetSchema.resolve(data)); } } From b1cce16bc3d4d770c38372a120ee0ef01e5c7e06 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Tue, 6 Sep 2022 14:15:22 -0700 Subject: [PATCH 13/71] Adding a getter and the Singl --- .../com/whylogs/core/SingleFieldProjector.java | 16 ++++++++++++++++ .../whylogs/core/metrics/OperationResult.java | 2 ++ 2 files changed, 18 insertions(+) create mode 100644 java/core/src/main/java/com/whylogs/core/SingleFieldProjector.java diff --git a/java/core/src/main/java/com/whylogs/core/SingleFieldProjector.java b/java/core/src/main/java/com/whylogs/core/SingleFieldProjector.java new file mode 100644 index 0000000000..56bba0927a --- /dev/null +++ b/java/core/src/main/java/com/whylogs/core/SingleFieldProjector.java @@ -0,0 +1,16 @@ +package com.whylogs.core; + +import lombok.Getter; +import lombok.RequiredArgsConstructor; + +import java.util.HashMap; + +@RequiredArgsConstructor +@Getter +public class SingleFieldProjector { + private final String columnName; + + public T apply(HashMap row) { + return row.get(columnName); + } +} diff --git a/java/core/src/main/java/com/whylogs/core/metrics/OperationResult.java b/java/core/src/main/java/com/whylogs/core/metrics/OperationResult.java index eb87307311..4a23b0b4c3 100644 --- a/java/core/src/main/java/com/whylogs/core/metrics/OperationResult.java +++ b/java/core/src/main/java/com/whylogs/core/metrics/OperationResult.java @@ -1,8 +1,10 @@ package com.whylogs.core.metrics; import lombok.Data; +import lombok.Getter; @Data +@Getter public class OperationResult { private final int successes; private final int failures; From 1e941825ad3d8103071a466ef03ea44463d4148b Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Tue, 6 Sep 2022 14:17:19 -0700 Subject: [PATCH 14/71] Most of the columnProfile is here. There is some needed changes to metric coming --- .../java/com/whylogs/core/ColumnProfile.java | 82 +++++++++ .../whylogs/core/views/ColumnProfileView.java | 171 ++++++++++++++++++ 2 files changed, 253 insertions(+) create mode 100644 java/core/src/main/java/com/whylogs/core/ColumnProfile.java create mode 100644 java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java diff --git a/java/core/src/main/java/com/whylogs/core/ColumnProfile.java b/java/core/src/main/java/com/whylogs/core/ColumnProfile.java new file mode 100644 index 0000000000..63e179431d --- /dev/null +++ b/java/core/src/main/java/com/whylogs/core/ColumnProfile.java @@ -0,0 +1,82 @@ +package com.whylogs.core; + +import com.whylogs.core.metrics.Metric; +import com.whylogs.core.metrics.OperationResult; +import com.whylogs.core.schemas.ColumnSchema; +import com.whylogs.core.views.ColumnProfileView; +import whylogs.core.message.ColumnMessage; + +import java.util.ArrayList; +import java.util.HashMap; + +public class ColumnProfile { + // Required + private String name; + private ColumnSchema schema; + private int cachedSize; + + // Has Defaults + private HashMap metrics; + private SingleFieldProjector projector; + private int successCount; + private int failureCount; + + private ArrayList cachedValues; + + public ColumnProfile(String name, ColumnSchema schema, int cachedSize){ + this.name = name; + this.schema = schema; + this.cachedSize = cachedSize; // TODO: add logger for size of cache on column + + // Defaulted + this.metrics = new HashMap<>(); + this.projector = new SingleFieldProjector(name); + this.successCount = 0; + this.failureCount = 0; + this.cachedValues = new ArrayList<>(); + } + + public void addMetric(Metric metric){ + if(this.metrics.containsKey(metric.getNamespace())){ + // TODO: Add logger with warning about replacement + } + + this.metrics.put(metric.getNamespace(), metric); + } + + public void track(HashMap row){ + T value = this.projector.apply(row); + this.cachedValues.add(value); + + if(this.cachedValues.size() >= this.cachedSize){ + this.flush(); + } + } + + public void flush(){ + // TODO: Logger was initially here, but only for when it was forced, think it through + ArrayList oldCache = this.cachedValues; + this.cachedValues = new ArrayList<>(); + this.trackColumn(oldCache); + } + + public void trackColumn(ArrayList values){ + PreprocessedColumn proccessedColumn = PreprocessedColumn.apply(values); + + for(Metric metric : this.metrics.values()){ + OperationResult result = metric.columnarUpdate(proccessedColumn); + this.successCount += result.getSuccesses(); + this.failureCount += result.getFailures(); + } + } + + public ColumnMessage toProtobuf(){ + // going to view flushes + return this.view().toProtobuf(); + } + + public ColumnProfileView view(){ + this.flush(); + return new ColumnProfileView(this.metrics, this.successCount, this.failureCount); + } +} diff --git a/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java b/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java new file mode 100644 index 0000000000..73321a6cab --- /dev/null +++ b/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java @@ -0,0 +1,171 @@ +package com.whylogs.core.views; + +import com.google.protobuf.InvalidProtocolBufferException; +import com.whylogs.core.SummaryConfig; +import com.whylogs.core.errors.UnsupportedError; +import com.whylogs.core.metrics.Metric; +import lombok.Getter; +import whylogs.core.message.ColumnMessage; +import whylogs.core.message.MetricComponentMessage; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Optional; + +@Getter +public class ColumnProfileView{ + private HashMap metrics; + private int successes = 0; + private int failures = 0; + + public ColumnProfileView(HashMap metrics) { + this.metrics = metrics; + } + + public ColumnProfileView(HashMap metrics, int successes, int failures) { + this.metrics = metrics; + this.successes = successes; + this.failures = failures; + } + + // TODO: this needs the Metric Merge fixed + public ColumnProfileView merge(ColumnProfileView otherView){ + HashSet allMetricNames = new HashSet<>(); + allMetricNames.addAll(this.metrics.keySet()); + allMetricNames.addAll(otherView.metrics.keySet()); + + HashMap mergedMetrics = new HashMap<>(); + for(String metricName : allMetricNames){ + Metric thisMetric = this.metrics.get(metricName); + Metric otherMetric = otherView.metrics.get(metricName); + + Metric result = thisMetric; + + if(thisMetric != null && otherMetric != null){ + result = thisMetric.merge(otherMetric); + } else if (otherMetric != null){ + result = otherMetric; + } + + mergedMetrics.put(metricName, result); + } + + return new ColumnProfileView(mergedMetrics, + this.successes + otherView.successes, + this.failures + otherView.failures); + } + + public byte[] serialize(){ + return this.toProtobuf().toByteArray(); + } + + public static ColumnProfileView deserialize(byte[] data) throws InvalidProtocolBufferException { + ColumnMessage columnMessage = ColumnMessage.parseFrom(data); + return ColumnProfileView.fromProtobuf(columnMessage); + } + + public Optional getMetric(String metricName){ + return Optional.ofNullable(this.metrics.get(metricName)); + } + + // TODO: needs to have getComponents added to Metric + public ColumnMessage toProtobuf(){ + HashMap metricMessages = new HashMap<>(); + for(String metricName : this.metrics.keySet()){ + for(String componentName : this.metrics.get(metricName).getComponents().keySet()){ + MetricComponentMessage componentMessage = this.metrics.get(metricName).getComponents().get(componentName).toProtobuf(); + metricMessages.put(metricName + "/" + componentName, componentMessage); + } + } + return ColumnMessage.newBuilder().putAllMetricComponents(metricMessages).build(); + } + + public static ColumnProfileView zero(){ + return new ColumnProfileView(new HashMap<>()); + } + + public static ColumnProfileView fromProtobuf(ColumnMessage columnMessage){ + HashMap resultMetrics = new HashMap<>(); + HashMap> metricMessages = new HashMap<>(); + + for(String path : columnMessage.getMetricComponentsMap().keySet()){ + String metricName = path.split("/")[0]; + HashMap metricComponents = new HashMap<>(); + + if(metricMessages.containsKey(metricName)){ + metricComponents = metricMessages.get(metricName); + metricMessages.put(metricName, metricComponents); + } else { + metricMessages.put(metricName, new HashMap()); + } + + // TODO: get the path from the first / on + String componentKey = path.substring(path.indexOf("/") + 1); + metricComponents.put(componentKey, columnMessage.getMetricComponentsMap().get(path)); + } + + // TODO: turn metric into type + // was from StandardMetric + // then Registry + // then Metric.fromProtobuf + + return new ColumnProfileView(resultMetrics); + } + + public static ColumnProfileView fromBytes(byte[] data) throws InvalidProtocolBufferException { + ColumnMessage message = ColumnMessage.parseFrom(data); + return ColumnProfileView.fromProtobuf(message); + } + + // TODO: metric needs a getComponentPath + public ArrayList getMetricComponentPaths(){ + ArrayList paths = new ArrayList<>(); + for(String metricName : this.getMetricNames()){ + for(String componentName :this.getMetric(metricName).getCompnentPaths()){ + paths.add(metricName + "/" + componentName); + } + } + return paths; + } + + public ArrayList getMetricNames(){ + return new ArrayList<>(this.getMetrics().keySet()); + } + + public HashMap toSummaryDict(Optional columnMetric, Optional config) throws UnsupportedError { + SummaryConfig summaryConfig = config.orElse(new SummaryConfig()); + HashMap summary = new HashMap<>(); + + if(columnMetric.isPresent()){ + summary.putAll(getMetricSummaryHelper(summaryConfig, this.getMetric(columnMetric.get()))); + } else { + for(String metricName : this.getMetricNames()){ + summary.putAll(getMetricSummaryHelper(summaryConfig, this.getMetric(metricName))); + } + } + + // TODO: there was a logger for when a ssummary couldn't be implmented for a metric + + if(columnMetric.isPresent() && columnMetric.get().length() == 0){ + throw new UnsupportedError("No metric available for requested column metric: " + columnMetric.get()); + } + + return summary; + } + + private HashMap getMetricSummaryHelper(SummaryConfig summaryConfig, + Optional maybeMetric) { + HashMap result = new HashMap<>(); + Metric metric; + if(maybeMetric.isPresent()){ + metric = maybeMetric.get(); + HashMap metricSummary = metric.toSummaryDict(summaryConfig); + for (String componentName : metricSummary.keySet()) { + String fullName = metric.getNamespace() + "/" + componentName; + result.put(fullName, metricSummary.get(componentName)); + } + } + return result; + } +} From 7a0221c327cc3a5916413cd29b3c2248735de8ca Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Thu, 8 Sep 2022 13:04:12 -0700 Subject: [PATCH 15/71] WIP with the protobuf --- .../java/com/whylogs/core/DatasetProfile.java | 155 +++++++++++++ .../com/whylogs/core/utils/ProtobufUtil.java | 65 ++++++ .../core/views/DatasetProfileView.java | 207 ++++++++++++++++++ .../com/whylogs/core/views/SummaryType.java | 11 + .../core/views/WhylogsMagicUtility.java | 12 + .../com/whylogs/core/metrics/TestMetric.java | 25 +++ 6 files changed, 475 insertions(+) create mode 100644 java/core/src/main/java/com/whylogs/core/DatasetProfile.java create mode 100644 java/core/src/main/java/com/whylogs/core/utils/ProtobufUtil.java create mode 100644 java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java create mode 100644 java/core/src/main/java/com/whylogs/core/views/SummaryType.java create mode 100644 java/core/src/main/java/com/whylogs/core/views/WhylogsMagicUtility.java create mode 100644 java/core/src/test/java/com/whylogs/core/metrics/TestMetric.java diff --git a/java/core/src/main/java/com/whylogs/core/DatasetProfile.java b/java/core/src/main/java/com/whylogs/core/DatasetProfile.java new file mode 100644 index 0000000000..6430178ab2 --- /dev/null +++ b/java/core/src/main/java/com/whylogs/core/DatasetProfile.java @@ -0,0 +1,155 @@ +package com.whylogs.core; + +import com.whylogs.core.metrics.Metric; +import com.whylogs.core.schemas.ColumnSchema; +import com.whylogs.core.schemas.DatasetSchema; +import com.whylogs.core.views.ColumnProfileView; +import com.whylogs.core.views.DatasetProfileView; +import lombok.Getter; +import lombok.ToString; + +import java.time.ZoneId; +import java.time.ZonedDateTime; +import java.util.*; + +// TODO: extend WRITABLE interface +@Getter +@ToString +public class DatasetProfile { + public static int _LARGE_CACHE_SIZE_LIMIT = 1024 * 100; + + private DatasetSchema schema; + // QUESTION: Do we need zones here? Do we just use UTC? + private Date datasetTimestamp; + private Date creationTimestamp; + private HashMap> columns; + private boolean isActive = false; + private int trackCount = 0; + + public DatasetProfile(Optional datasetSchema, + Optional datasetaTimestampe, + Optional creationTimestampe) { + this.schema = datasetSchema.orElse(new DatasetSchema()); + this.datasetTimestamp = datasetaTimestampe.orElse(Date.); + this.creationTimestamp = creationTimestampe.orElse(Date.now()); + + this.columns = new HashMap<>(); + this.initializeNewColumns(schema.getColNames()); + } + + public void addMetric(String colName, Metric metric){ + if(!this.columns.containsKey(colName)){ + throw new InputMismatchException("Column name not found in schema"); + } + this.columns.get(colName).addMetric(metric); + } + + /* + TODO: I don't beleive we need this in Java? + public void track(T obj){ + try{ + this.isActive = true; + this.trackCount += 1; + this.doTrack(obj); + } finally { + this.isActive = false; + } + }*/ + + public void track(HashMap row){ + try{ + this.isActive = true; + this.trackCount += 1; + this.doTrack(row); + } finally { + this.isActive = false; + } + } + + + private void doTrack(HashMap row) { + boolean dirty = this.schema.resolve(row); + if(dirty){ + Set schemaColumnNames = this.schema.getColNames(); + Set newColumnNames = new HashSet<>(); + for(String colName : schemaColumnNames){ + if(!this.columns.containsKey(colName)){ + newColumnNames.add(colName); + } + } + this.initializeNewColumns(newColumnNames); + } + + // QUESTION: what is this section for? Why do we track this if we just did? + /* + for(String col : row.keySet()){ + this.columns.get(col).track(new ArrayList(row.get(col))); + }*/ + + } + + /** + * @return True if the profile tracking code is currently running. + */ + public boolean isEmpty(){ + return this.trackCount == 0; + } + + public void setDatasetTimestamp(ZonedDateTime datasetTimestamp) { + if(datasetTimestamp.getZone() == null){ + // TODO: log warning if it's not there (is that even possible in Java?) + } + this.datasetTimestamp = ZonedDateTime.ofInstant(datasetTimestamp.toInstant(), ZoneId.of("UTC")); + } + + private void initializeNewColumns(Set colNames) { + for(String column : colNames){ + ColumnSchema columnSchema = this.schema.columns.get(column); + if(columnSchema != null){ + this.columns.put(column, new ColumnProfile(column, columnSchema, this.schema.cache_size)); + } + // TODO: log warning 'Encountered a column without schema: %s", col' in an else + } + } + + public DatasetProfileView view(){ + HashMap columns = new HashMap<>(); + + for(String colName : this.columns.keySet()){ + columns.put(colName, this.columns.get(colName).view()); + } + + return new DatasetProfileView(columns, this.datasetTimestamp, this.creationTimestamp); + } + + public void flush(){ + for(String colName : this.columns.keySet()){ + this.columns.get(colName).flush(); + } + } + + public static String getDefaultPath(Optional path){ + String defaultPath = "profile." + (int) System.currentTimeMillis() + ".bin"; + + if(!path.isPresent()){ + return defaultPath; + } + + if(!path.get().endsWith("bin")) { + String finalPath = path.get() + defaultPath; + return finalPath; + } + + return path.get(); + } + + public void write(Optional pathName){ + String outputPath = getDefaultPath(pathName); + this.view().write(outputPath); + // TODO log debug "Wrote profile to path: %s", output_path + } + + public static DatasetProfileView read(String inputPath){ + return DatasetProfileView.read(inputPath); + } +} diff --git a/java/core/src/main/java/com/whylogs/core/utils/ProtobufUtil.java b/java/core/src/main/java/com/whylogs/core/utils/ProtobufUtil.java new file mode 100644 index 0000000000..ceea794206 --- /dev/null +++ b/java/core/src/main/java/com/whylogs/core/utils/ProtobufUtil.java @@ -0,0 +1,65 @@ +package com.whylogs.core.utils; + +import com.google.protobuf.Message; +import com.google.protobuf.Parser; +import lombok.experimental.UtilityClass; + +import java.io.*; +import java.lang.reflect.InvocationTargetException; + +@UtilityClass +public class ProtobufUtil { + + public static T readDelimitedProtobuf(InputStream input, Class protoClass, Parser parser) throws IOException { + readDelimitedProtobuf(input, protoClass, parser, 0); + } + + public static T readDelimitedProtobuf(InputStream input, Class protoClass, Parser parser, int offset) throws IOException { + int size = parseFromDelimitedSize(input, offset); + if(size == 0){ + try{ + return protoClass.getConstructor().newInstance(); + } catch (InvocationTargetException | InstantiationException | IllegalAccessException | NoSuchMethodException e) { + e.printStackTrace(); + } + } + + byte[] buffer = new byte[size]; + if(input.read(buffer) == -1){ + throw new EOFException(); + } + + return parser.parseDelimitedFrom(input); + } + + public static int readVarint(InputStream input, int offset) throws IOException { + if(offset > 0){ + input.read(new byte[offset]); + } + + byte[] buffer = new byte[7]; + if(input.read(buffer) == -1){ + return 0; + } + + int i = 0; + while((buffer[i] & 0x80) >> 7 == 1){ + int new_byte = input.read(); + if(new_byte == -1){ + throw new EOFException("Unexpected EOF"); + } + i += 1; + buffer[i] = (byte) new_byte; + } + + return parseDelimitedFrom(buffer, i); + } + + public static void writeDelimitedProtobuf(OutputStream output, Message message) throws IOException { + if(output == null){ + throw new IOException("Output stream is null"); + } + EncodeVarint(output, message.getSerializedSize()); + output.write(message.toByteArray()); + } +} diff --git a/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java b/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java new file mode 100644 index 0000000000..24d211db53 --- /dev/null +++ b/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java @@ -0,0 +1,207 @@ +package com.whylogs.core.views; + +import com.whylogs.core.errors.DeserializationError; +import com.whylogs.core.metrics.components.MetricComponent; +import com.whylogs.core.utils.ProtobufUtil; +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.ToString; +import whylogs.core.message.*; + +import java.io.*; +import java.lang.reflect.Type; +import java.nio.channels.Channels; +import java.nio.file.Files; +import java.util.*; +import java.util.stream.Collectors; + +import static com.whylogs.core.utils.ProtobufUtil.writeDelimitedProtobuf; +import static java.util.stream.Collectors.toMap; + +// TODO: extend writable +@AllArgsConstructor +@Getter +@ToString +public class DatasetProfileView { + private HashMap columns; + private Date datasetTimestamp; + private Date creationTimestamp; + + public DatasetProfileView merge(DatasetProfileView otherView) { + HashMap mergedColumns = new HashMap<>(); + HashSet allNames = new HashSet<>(); + allNames.addAll(this.columns.keySet()); + allNames.addAll(otherView.columns.keySet()); + + for (String columnName : allNames) { + ColumnProfileView thisColumn = this.columns.get(columnName); + ColumnProfileView otherColumn = otherView.columns.get(columnName); + + ColumnProfileView result = thisColumn; + + if (thisColumn != null && otherColumn != null) { + result = thisColumn.merge(otherColumn); + } else if (otherColumn != null) { + result = otherColumn; + } + + mergedColumns.put(columnName, result); + } + + return new DatasetProfileView(mergedColumns, this.datasetTimestamp, this.creationTimestamp); + } + + public Optional getColumn(String columnName) { + return Optional.ofNullable(this.columns.get(columnName)); + } + + public HashMap getColumns(Optional> colNames) { + if (colNames.isPresent()) { + HashMap result = new HashMap<>(); + for (String colName : colNames.get()) { + result.put(colName, this.columns.get(colName)); + } + return result; + } else { + return this.columns; + } + } + + public String getDefaultPath() { + return "profile_" + this.creationTimestamp + ".bin"; + } + + // TODO: we need get components + public void write(Optional path) { + HashSet allComponentNames = new HashSet<>(); + HashMap metricNameToIndex = new HashMap<>(); + HashMap indexToMetricName = new HashMap<>(); + HashMap columnChunkOffsets = new HashMap<>(); + String pathName = path.orElseGet(this::getDefaultPath); + + for (String colName : this.columns.keySet()) { + ColumnProfileView column = this.columns.get(colName); + allComponentNames.addAll(column.getComponents().keySet()); + } + allComponentNames.stream().sorted().forEach(name -> { + int index = metricNameToIndex.size(); + metricNameToIndex.put(name, index); + indexToMetricName.put(index, name); + }); + + String tempPath = System.getProperty("java.io.tmpdir") + File.separator + "whylogs" + File.separator + "temp_" + this.creationTimestamp + ".bin"; + try (RandomAccessFile file = new RandomAccessFile(tempPath, "rw")) { + OutputStream outputStream = Channels.newOutputStream(file.getChannel()); + for (String colName : this.columns.keySet().stream().sorted().collect(Collectors.toList())) { + ColumnProfileView currentColumn = this.columns.get(colName); + columnChunkOffsets.put(colName, ChunkOffsets.newBuilder().addOffsets(file.getFilePointer()).build()); + + // Chunk the column + HashMap indexComponentMetric = new HashMap<>(); + Map metricComponentMap = currentColumn.toProtobuf().getMetricComponentsMap(); + + for (String metricName : metricComponentMap.keySet()) { + if (metricNameToIndex.containsKey(metricName)) { + indexComponentMetric.put(metricNameToIndex.get(metricName), metricComponentMap.get(metricName)); + } else { + throw new InputMismatchException("Missing metric from index map. Metric name: " + metricName); + } + } + + ChunkMessage chunkMsg = ChunkMessage.newBuilder().putAllMetricComponents(indexComponentMetric).build(); + ChunkHeader chunkHeader = ChunkHeader.newBuilder().setType(ChunkHeader.ChunkType.COLUMN).setLength(chunkMsg.getSerializedSize()).build(); + writeDelimitedProtobuf(outputStream, chunkHeader); + outputStream.write(chunkMsg.toByteArray()); + } + + long totalLength = file.getFilePointer(); + + DatasetProperties datasetProperties = DatasetProperties.newBuilder() + .setDatasetTimestamp(this.datasetTimestamp.getTime()) + .setCreationTimestamp(this.creationTimestamp.getTime()) + .build(); + + DatasetProfileHeader header = DatasetProfileHeader.newBuilder() + .setProperties(datasetProperties) + .setLength(totalLength) + .putAllColumnOffsets(columnChunkOffsets) + .putAllIndexedMetricPaths(indexToMetricName) + .build(); + + DatasetSegmentHeader segmentHeader = DatasetSegmentHeader.newBuilder() + .setHasSegments(false) + .build(); + + try (RandomAccessFile outFile = new RandomAccessFile(pathName, "rw")) { + file.seek(0); + InputStream inputFromTemp = Channels.newInputStream(file.getChannel()); + OutputStream writeToFile = Channels.newOutputStream(outFile.getChannel()); + outFile.write(WhylogsMagicUtility.WHYLOGS_MAGIC_HEADER_BYTES); + writeDelimitedProtobuf(writeToFile, segmentHeader); + writeDelimitedProtobuf(writeToFile, header); + + int bufferSize = 1024; + int bytesRead = 0; + while (file.getFilePointer() < totalLength) { + byte[] buffer = new byte[bufferSize]; + bytesRead = inputFromTemp.read(buffer, bytesRead, bytesRead+ bufferSize); + writeToFile.write(buffer, 0, bytesRead); // TODO: this offset doesn't seem write. Test + } + } + } catch (IOException e) { + e.printStackTrace(); + } finally { + try { + Files.deleteIfExists(new File(tempPath).toPath()); + } catch (IOException e) { + e.printStackTrace(); + } + } + } + + public static DatasetProfileView read(String path) throws FileNotFoundException { + try (RandomAccessFile file = new RandomAccessFile(path, "r")) { + byte[] buffer = new byte[WhylogsMagicUtility.WHYLOGS_MAGIC_HEADER_LENGTH]; + file.read(buffer); + + String decodedHeader; + try{ + decodedHeader = new String(buffer, "UTF-8"); + } catch(Exception e){ + throw new DeserializationError("Invalid magic header. Decoder error: " + e.getMessage()); + } + + if (!WhylogsMagicUtility.WHYLOGS_MAGIC_HEADER.equals(decodedHeader)) { + throw new DeserializationError("Invalid magic header. Expected: " + WhylogsMagicUtility.WHYLOGS_MAGIC_HEADER + " Got: " + decodedHeader); + } + + InputStream inputStream = Channels.newInputStream(file.getChannel()); + DatasetSegmentHeader segmentHeader = DatasetSegmentHeader.parseDelimitedFrom(inputStream); + if(segmentHeader.getHasSegments()){ + throw new DeserializationError("Dataset profile has segments. This is not supported yet."); + } + + DatasetProfileHeader header = DatasetProfileHeader.parseDelimitedFrom(inputStream); + if(header.getSerializedSize() == 0){ + throw new DeserializationError("Missing valid dataset profile header"); + } + + Date datasetTimestamp = new Date(header.getProperties().getDatasetTimestamp()); + Date creationTimestamp = new Date(header.getProperties().getCreationTimestamp()); + Map indexedMetricPath = header.getIndexedMetricPathsMap(); + + // TODO; Log warning if it's less than 1 "Name index in the header is empty. Possible data corruption" + long startOffset = file.getFilePointer(); + + HashMap columns = new HashMap<>(); + ArrayList sortedColNames = new ArrayList<>(header.getColumnOffsetsMap().keySet()); + sortedColNames.sort(Comparator.naturalOrder()); + for(String colName: sortedColNames){ + + } + + } catch (IOException | DeserializationError e) { + e.printStackTrace(); + } + } +} \ No newline at end of file diff --git a/java/core/src/main/java/com/whylogs/core/views/SummaryType.java b/java/core/src/main/java/com/whylogs/core/views/SummaryType.java new file mode 100644 index 0000000000..1fee260d2c --- /dev/null +++ b/java/core/src/main/java/com/whylogs/core/views/SummaryType.java @@ -0,0 +1,11 @@ +package com.whylogs.core.views; + +public enum SummaryType { + COLUMN("COLUMN"), + DATASET("DATASET"); + + public final String label; + private SummaryType(String label) { + this.label = label; + } +} diff --git a/java/core/src/main/java/com/whylogs/core/views/WhylogsMagicUtility.java b/java/core/src/main/java/com/whylogs/core/views/WhylogsMagicUtility.java new file mode 100644 index 0000000000..0254024481 --- /dev/null +++ b/java/core/src/main/java/com/whylogs/core/views/WhylogsMagicUtility.java @@ -0,0 +1,12 @@ +package com.whylogs.core.views; + +import lombok.experimental.UtilityClass; + +import java.nio.charset.StandardCharsets; + +@UtilityClass +public class WhylogsMagicUtility { + public static final String WHYLOGS_MAGIC_HEADER = "WHY1"; + public static final int WHYLOGS_MAGIC_HEADER_LENGTH = WHYLOGS_MAGIC_HEADER.length(); + public static final byte[] WHYLOGS_MAGIC_HEADER_BYTES = WHYLOGS_MAGIC_HEADER.getBytes(StandardCharsets.UTF_8);; +} diff --git a/java/core/src/test/java/com/whylogs/core/metrics/TestMetric.java b/java/core/src/test/java/com/whylogs/core/metrics/TestMetric.java new file mode 100644 index 0000000000..35004ec2a5 --- /dev/null +++ b/java/core/src/test/java/com/whylogs/core/metrics/TestMetric.java @@ -0,0 +1,25 @@ +package com.whylogs.core.metrics; + +import com.whylogs.core.metrics.components.MaxIntegralComponent; +import org.junit.Assert; +import org.testng.annotations.Test; + +import java.util.ArrayList; + +@Test +public class TestMetric { + + @Test + public void testMetrics(){ + ArrayList metrics = new ArrayList<>(); + metrics.add(IntegralMetric.zero(new MetricConfig())); + metrics.add(IntegralMetric.zero(new MetricConfig())); + + for(Metric metric : metrics){ + Assert.assertTrue(metric instanceof IntegralMetric); + metric.merge(new IntegralMetric()); + + ((IntegralMetric) metric).getMaxComponent() + } + } +} From fba212cb5c70889a0ae263790c7dcd626e7b2cda Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Thu, 8 Sep 2022 13:04:36 -0700 Subject: [PATCH 16/71] Removes protobuf as I don't think I need it in java --- .../com/whylogs/core/utils/ProtobufUtil.java | 65 ------------------- 1 file changed, 65 deletions(-) delete mode 100644 java/core/src/main/java/com/whylogs/core/utils/ProtobufUtil.java diff --git a/java/core/src/main/java/com/whylogs/core/utils/ProtobufUtil.java b/java/core/src/main/java/com/whylogs/core/utils/ProtobufUtil.java deleted file mode 100644 index ceea794206..0000000000 --- a/java/core/src/main/java/com/whylogs/core/utils/ProtobufUtil.java +++ /dev/null @@ -1,65 +0,0 @@ -package com.whylogs.core.utils; - -import com.google.protobuf.Message; -import com.google.protobuf.Parser; -import lombok.experimental.UtilityClass; - -import java.io.*; -import java.lang.reflect.InvocationTargetException; - -@UtilityClass -public class ProtobufUtil { - - public static T readDelimitedProtobuf(InputStream input, Class protoClass, Parser parser) throws IOException { - readDelimitedProtobuf(input, protoClass, parser, 0); - } - - public static T readDelimitedProtobuf(InputStream input, Class protoClass, Parser parser, int offset) throws IOException { - int size = parseFromDelimitedSize(input, offset); - if(size == 0){ - try{ - return protoClass.getConstructor().newInstance(); - } catch (InvocationTargetException | InstantiationException | IllegalAccessException | NoSuchMethodException e) { - e.printStackTrace(); - } - } - - byte[] buffer = new byte[size]; - if(input.read(buffer) == -1){ - throw new EOFException(); - } - - return parser.parseDelimitedFrom(input); - } - - public static int readVarint(InputStream input, int offset) throws IOException { - if(offset > 0){ - input.read(new byte[offset]); - } - - byte[] buffer = new byte[7]; - if(input.read(buffer) == -1){ - return 0; - } - - int i = 0; - while((buffer[i] & 0x80) >> 7 == 1){ - int new_byte = input.read(); - if(new_byte == -1){ - throw new EOFException("Unexpected EOF"); - } - i += 1; - buffer[i] = (byte) new_byte; - } - - return parseDelimitedFrom(buffer, i); - } - - public static void writeDelimitedProtobuf(OutputStream output, Message message) throws IOException { - if(output == null){ - throw new IOException("Output stream is null"); - } - EncodeVarint(output, message.getSerializedSize()); - output.write(message.toByteArray()); - } -} From 5f9e81087b0242bd87a0ef5ae666606ebd34e542 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Thu, 8 Sep 2022 16:21:51 -0700 Subject: [PATCH 17/71] Finished first take of the dataset and column profile/view. Still erroring due to metric refactor needed --- .../whylogs/core/views/ColumnProfileView.java | 14 +++-- .../core/views/DatasetProfileView.java | 51 ++++++++++++++----- 2 files changed, 48 insertions(+), 17 deletions(-) diff --git a/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java b/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java index 73321a6cab..9489068ff9 100644 --- a/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java +++ b/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java @@ -4,14 +4,12 @@ import com.whylogs.core.SummaryConfig; import com.whylogs.core.errors.UnsupportedError; import com.whylogs.core.metrics.Metric; +import com.whylogs.core.metrics.components.MetricComponent; import lombok.Getter; import whylogs.core.message.ColumnMessage; import whylogs.core.message.MetricComponentMessage; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Optional; +import java.util.*; @Getter public class ColumnProfileView{ @@ -168,4 +166,12 @@ private HashMap getMetricSummaryHelper(SummaryConfig summaryConf } return result; } + + public Map getComponents() { + HashMap result = new HashMap<>(); + for(String metricName : this.getMetricNames()){ + result.putAll(this.getMetric(metricName).getComponents()); + } + return result; + } } diff --git a/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java b/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java index 24d211db53..72df2a7632 100644 --- a/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java +++ b/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java @@ -1,8 +1,6 @@ package com.whylogs.core.views; import com.whylogs.core.errors.DeserializationError; -import com.whylogs.core.metrics.components.MetricComponent; -import com.whylogs.core.utils.ProtobufUtil; import lombok.AllArgsConstructor; import lombok.Getter; import lombok.ToString; @@ -15,9 +13,6 @@ import java.util.*; import java.util.stream.Collectors; -import static com.whylogs.core.utils.ProtobufUtil.writeDelimitedProtobuf; -import static java.util.stream.Collectors.toMap; - // TODO: extend writable @AllArgsConstructor @Getter @@ -44,7 +39,6 @@ public DatasetProfileView merge(DatasetProfileView otherView) { } else if (otherColumn != null) { result = otherColumn; } - mergedColumns.put(columnName, result); } @@ -110,7 +104,7 @@ public void write(Optional path) { ChunkMessage chunkMsg = ChunkMessage.newBuilder().putAllMetricComponents(indexComponentMetric).build(); ChunkHeader chunkHeader = ChunkHeader.newBuilder().setType(ChunkHeader.ChunkType.COLUMN).setLength(chunkMsg.getSerializedSize()).build(); - writeDelimitedProtobuf(outputStream, chunkHeader); + chunkHeader.writeDelimitedTo(outputStream); outputStream.write(chunkMsg.toByteArray()); } @@ -137,8 +131,8 @@ public void write(Optional path) { InputStream inputFromTemp = Channels.newInputStream(file.getChannel()); OutputStream writeToFile = Channels.newOutputStream(outFile.getChannel()); outFile.write(WhylogsMagicUtility.WHYLOGS_MAGIC_HEADER_BYTES); - writeDelimitedProtobuf(writeToFile, segmentHeader); - writeDelimitedProtobuf(writeToFile, header); + segmentHeader.writeDelimitedTo(writeToFile); + header.writeDelimitedTo(writeToFile); int bufferSize = 1024; int bytesRead = 0; @@ -160,6 +154,10 @@ public void write(Optional path) { } public static DatasetProfileView read(String path) throws FileNotFoundException { + ColumnMessage columnMessage; + HashMap columns = new HashMap<>(); + Date datasetTimestamp = null; + Date creationTimestamp = null; try (RandomAccessFile file = new RandomAccessFile(path, "r")) { byte[] buffer = new byte[WhylogsMagicUtility.WHYLOGS_MAGIC_HEADER_LENGTH]; file.read(buffer); @@ -186,22 +184,49 @@ public static DatasetProfileView read(String path) throws FileNotFoundException throw new DeserializationError("Missing valid dataset profile header"); } - Date datasetTimestamp = new Date(header.getProperties().getDatasetTimestamp()); - Date creationTimestamp = new Date(header.getProperties().getCreationTimestamp()); + datasetTimestamp = new Date(header.getProperties().getDatasetTimestamp()); + creationTimestamp = new Date(header.getProperties().getCreationTimestamp()); Map indexedMetricPath = header.getIndexedMetricPathsMap(); // TODO; Log warning if it's less than 1 "Name index in the header is empty. Possible data corruption" long startOffset = file.getFilePointer(); - HashMap columns = new HashMap<>(); ArrayList sortedColNames = new ArrayList<>(header.getColumnOffsetsMap().keySet()); sortedColNames.sort(Comparator.naturalOrder()); for(String colName: sortedColNames){ + ChunkOffsets offsets = header.getColumnOffsetsMap().get(colName); + HashMap metricComponents = new HashMap<>(); - } + for(long offset: offsets.getOffsetsList()){ + long actualOffset = offset + startOffset; + ChunkHeader chunkHeader = ChunkHeader.parseDelimitedFrom(inputStream); + + if(chunkHeader == null){ + throw new DeserializationError("Missing chunk header at offset: " + actualOffset); + } + + if (chunkHeader.getType() != ChunkHeader.ChunkType.COLUMN) { + throw new DeserializationError("Invalid chunk type. Expected: " + ChunkHeader.ChunkType.COLUMN + " Got: " + chunkHeader.getType()); + } + + // TODO: does this need to first grab the chunkHeader.length? + ChunkMessage chunkMessage = ChunkMessage.parseFrom(inputStream); + + for(Integer index: chunkMessage.getMetricComponentsMap().keySet()){ + if(indexedMetricPath.containsKey(index)){ + metricComponents.put(indexedMetricPath.get(index), chunkMessage.getMetricComponentsMap().get(index)); + } else { + throw new DeserializationError("Missing metric from index map. Index: " + index); + } + } + } + columnMessage = ColumnMessage.newBuilder().putAllMetricComponents(metricComponents).build(); + columns.put(colName, ColumnProfileView.fromProtobuf(columnMessage)); + } } catch (IOException | DeserializationError e) { e.printStackTrace(); } + return new DatasetProfileView(columns, datasetTimestamp, creationTimestamp); } } \ No newline at end of file From 8d34448a11705c943df39d2e7bcbd29affb04c4a Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Thu, 8 Sep 2022 16:41:02 -0700 Subject: [PATCH 18/71] Refactor Metric to be able to merge on main. It's not my favorite design ... but it's a step --- .../whylogs/core/metrics/IntegralMetric.java | 10 +++++++++- .../java/com/whylogs/core/metrics/Metric.java | 16 ++++++++++++++++ .../whylogs/core/views/ColumnProfileView.java | 17 +++++++++++++---- .../com/whylogs/core/metrics/TestMetric.java | 4 +--- 4 files changed, 39 insertions(+), 8 deletions(-) diff --git a/java/core/src/main/java/com/whylogs/core/metrics/IntegralMetric.java b/java/core/src/main/java/com/whylogs/core/metrics/IntegralMetric.java index e1a34a6f7c..edd35cbe96 100644 --- a/java/core/src/main/java/com/whylogs/core/metrics/IntegralMetric.java +++ b/java/core/src/main/java/com/whylogs/core/metrics/IntegralMetric.java @@ -3,6 +3,7 @@ import com.whylogs.core.PreprocessedColumn; import com.whylogs.core.SummaryConfig; import com.whylogs.core.metrics.components.MaxIntegralComponent; +import com.whylogs.core.metrics.components.MetricComponent; import com.whylogs.core.metrics.components.MinIntegralComponent; import java.util.ArrayList; import java.util.Collections; @@ -62,7 +63,14 @@ public OperationResult columnarUpdate(PreprocessedColumn data) { return OperationResult.ok(successes); } - // @Override // TODO: move this to a factory so it can be accessed for building + @Override + public HashMap getComponents() { + HashMap components = new HashMap<>(); + components.put(this.maxComponent.getTypeName(), this.maxComponent); + components.put(this.minComponent.getTypeName(), this.minComponent); + return components; + } + public static IntegralMetric zero(MetricConfig config) { return new IntegralMetric(); } diff --git a/java/core/src/main/java/com/whylogs/core/metrics/Metric.java b/java/core/src/main/java/com/whylogs/core/metrics/Metric.java index 70af2da41e..86df8951b0 100644 --- a/java/core/src/main/java/com/whylogs/core/metrics/Metric.java +++ b/java/core/src/main/java/com/whylogs/core/metrics/Metric.java @@ -3,6 +3,8 @@ import com.whylogs.core.PreprocessedColumn; import com.whylogs.core.SummaryConfig; import java.util.HashMap; + +import com.whylogs.core.metrics.components.MetricComponent; import lombok.*; @EqualsAndHashCode @@ -19,6 +21,20 @@ public abstract class Metric { public abstract OperationResult columnarUpdate(PreprocessedColumn data); + public abstract HashMap getComponents(); + + public Metric merge(Metric other){ + Metric merged = this; + if(!this.namespace.equals(other.namespace)){ + throw new IllegalArgumentException("Cannot merge metrics with different namespaces"); + } + + if(this instanceof IntegralMetric){ + ((IntegralMetric) merged).merge((IntegralMetric) other); + } + return merged; + } + public @NonNull String getNamespace() { return namespace; } diff --git a/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java b/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java index 9489068ff9..e044e9c706 100644 --- a/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java +++ b/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java @@ -1,10 +1,12 @@ package com.whylogs.core.views; import com.google.protobuf.InvalidProtocolBufferException; +import com.google.protobuf.Option; import com.whylogs.core.SummaryConfig; import com.whylogs.core.errors.UnsupportedError; import com.whylogs.core.metrics.Metric; import com.whylogs.core.metrics.components.MetricComponent; +import com.whylogs.v0.core.constraint.Op; import lombok.Getter; import whylogs.core.message.ColumnMessage; import whylogs.core.message.MetricComponentMessage; @@ -72,7 +74,8 @@ public ColumnMessage toProtobuf(){ HashMap metricMessages = new HashMap<>(); for(String metricName : this.metrics.keySet()){ for(String componentName : this.metrics.get(metricName).getComponents().keySet()){ - MetricComponentMessage componentMessage = this.metrics.get(metricName).getComponents().get(componentName).toProtobuf(); + Metric metric = this.metrics.get(metricName); + MetricComponentMessage componentMessage = metric.getComponents().get(componentName). metricMessages.put(metricName + "/" + componentName, componentMessage); } } @@ -120,8 +123,11 @@ public static ColumnProfileView fromBytes(byte[] data) throws InvalidProtocolBuf public ArrayList getMetricComponentPaths(){ ArrayList paths = new ArrayList<>(); for(String metricName : this.getMetricNames()){ - for(String componentName :this.getMetric(metricName).getCompnentPaths()){ - paths.add(metricName + "/" + componentName); + Optional metric = this.getMetric(metricName); + if(metric.isPresent()){ + for(String componentName : metric.get().getComponents().keySet()){ + paths.add(metricName + "/" + componentName); + } } } return paths; @@ -170,7 +176,10 @@ private HashMap getMetricSummaryHelper(SummaryConfig summaryConf public Map getComponents() { HashMap result = new HashMap<>(); for(String metricName : this.getMetricNames()){ - result.putAll(this.getMetric(metricName).getComponents()); + Optional metric = this.getMetric(metricName); + if(metric.isPresent()){ + result.putAll(metric.get().getComponents()); + } } return result; } diff --git a/java/core/src/test/java/com/whylogs/core/metrics/TestMetric.java b/java/core/src/test/java/com/whylogs/core/metrics/TestMetric.java index 35004ec2a5..ef4a2427c5 100644 --- a/java/core/src/test/java/com/whylogs/core/metrics/TestMetric.java +++ b/java/core/src/test/java/com/whylogs/core/metrics/TestMetric.java @@ -11,15 +11,13 @@ public class TestMetric { @Test public void testMetrics(){ - ArrayList metrics = new ArrayList<>(); + ArrayList metrics = new ArrayList<>(); metrics.add(IntegralMetric.zero(new MetricConfig())); metrics.add(IntegralMetric.zero(new MetricConfig())); for(Metric metric : metrics){ Assert.assertTrue(metric instanceof IntegralMetric); metric.merge(new IntegralMetric()); - - ((IntegralMetric) metric).getMaxComponent() } } } From 6cbf871bd5d8c5e0749b0e00f04fb3646d5db41d Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Mon, 12 Sep 2022 11:30:19 -0700 Subject: [PATCH 19/71] Remove all protobuf and write (it's on the protobuf branch now). Update some errors, make spotless, build works --- .../java/com/whylogs/core/ColumnProfile.java | 121 ++++---- .../java/com/whylogs/core/DatasetProfile.java | 224 +++++++-------- .../whylogs/core/SingleFieldProjector.java | 11 +- .../java/com/whylogs/core/metrics/Metric.java | 9 +- .../whylogs/core/views/ColumnProfileView.java | 269 +++++++----------- .../core/views/DatasetProfileView.java | 261 +++-------------- .../com/whylogs/core/views/SummaryType.java | 13 +- .../core/views/WhylogsMagicUtility.java | 10 +- .../com/whylogs/core/metrics/TestMetric.java | 22 +- 9 files changed, 340 insertions(+), 600 deletions(-) diff --git a/java/core/src/main/java/com/whylogs/core/ColumnProfile.java b/java/core/src/main/java/com/whylogs/core/ColumnProfile.java index 63e179431d..38916cc866 100644 --- a/java/core/src/main/java/com/whylogs/core/ColumnProfile.java +++ b/java/core/src/main/java/com/whylogs/core/ColumnProfile.java @@ -4,79 +4,72 @@ import com.whylogs.core.metrics.OperationResult; import com.whylogs.core.schemas.ColumnSchema; import com.whylogs.core.views.ColumnProfileView; -import whylogs.core.message.ColumnMessage; - import java.util.ArrayList; import java.util.HashMap; public class ColumnProfile { - // Required - private String name; - private ColumnSchema schema; - private int cachedSize; - - // Has Defaults - private HashMap metrics; - private SingleFieldProjector projector; - private int successCount; - private int failureCount; - - private ArrayList cachedValues; - - public ColumnProfile(String name, ColumnSchema schema, int cachedSize){ - this.name = name; - this.schema = schema; - this.cachedSize = cachedSize; // TODO: add logger for size of cache on column - - // Defaulted - this.metrics = new HashMap<>(); - this.projector = new SingleFieldProjector(name); - this.successCount = 0; - this.failureCount = 0; - this.cachedValues = new ArrayList<>(); - } - - public void addMetric(Metric metric){ - if(this.metrics.containsKey(metric.getNamespace())){ - // TODO: Add logger with warning about replacement - } - - this.metrics.put(metric.getNamespace(), metric); + // Required + private String name; + private ColumnSchema schema; + private int cachedSize; + + // Has Defaults + private HashMap metrics; + private SingleFieldProjector projector; + private int successCount; + private int failureCount; + + private ArrayList cachedValues; + + public ColumnProfile(String name, ColumnSchema schema, int cachedSize) { + this.name = name; + this.schema = schema; + this.cachedSize = cachedSize; // TODO: add logger for size of cache on column + + // Defaulted + this.metrics = new HashMap<>(); + this.projector = new SingleFieldProjector(name); + this.successCount = 0; + this.failureCount = 0; + this.cachedValues = new ArrayList<>(); + } + + public void addMetric(Metric metric) { + if (this.metrics.containsKey(metric.getNamespace())) { + // TODO: Add logger with warning about replacement } - public void track(HashMap row){ - T value = this.projector.apply(row); - this.cachedValues.add(value); + this.metrics.put(metric.getNamespace(), metric); + } - if(this.cachedValues.size() >= this.cachedSize){ - this.flush(); - } - } + public void track(HashMap row) { + T value = this.projector.apply(row); + this.cachedValues.add(value); - public void flush(){ - // TODO: Logger was initially here, but only for when it was forced, think it through - ArrayList oldCache = this.cachedValues; - this.cachedValues = new ArrayList<>(); - this.trackColumn(oldCache); + if (this.cachedValues.size() >= this.cachedSize) { + this.flush(); } - - public void trackColumn(ArrayList values){ - PreprocessedColumn proccessedColumn = PreprocessedColumn.apply(values); - - for(Metric metric : this.metrics.values()){ - OperationResult result = metric.columnarUpdate(proccessedColumn); - this.successCount += result.getSuccesses(); - this.failureCount += result.getFailures(); - } + } + + public void flush() { + // TODO: Logger was initially here, but only for when it was forced, think it through + ArrayList oldCache = this.cachedValues; + this.cachedValues = new ArrayList<>(); + this.trackColumn(oldCache); + } + + public void trackColumn(ArrayList values) { + PreprocessedColumn proccessedColumn = PreprocessedColumn.apply(values); + + for (Metric metric : this.metrics.values()) { + OperationResult result = metric.columnarUpdate(proccessedColumn); + this.successCount += result.getSuccesses(); + this.failureCount += result.getFailures(); } + } - public ColumnMessage toProtobuf(){ - // going to view flushes - return this.view().toProtobuf(); - } - - public ColumnProfileView view(){ - this.flush(); - return new ColumnProfileView(this.metrics, this.successCount, this.failureCount); - } + public ColumnProfileView view() { + this.flush(); + return new ColumnProfileView(this.metrics, this.successCount, this.failureCount); + } } diff --git a/java/core/src/main/java/com/whylogs/core/DatasetProfile.java b/java/core/src/main/java/com/whylogs/core/DatasetProfile.java index 6430178ab2..e3701c0c8c 100644 --- a/java/core/src/main/java/com/whylogs/core/DatasetProfile.java +++ b/java/core/src/main/java/com/whylogs/core/DatasetProfile.java @@ -5,151 +5,137 @@ import com.whylogs.core.schemas.DatasetSchema; import com.whylogs.core.views.ColumnProfileView; import com.whylogs.core.views.DatasetProfileView; -import lombok.Getter; -import lombok.ToString; - -import java.time.ZoneId; import java.time.ZonedDateTime; import java.util.*; +import lombok.Getter; +import lombok.ToString; // TODO: extend WRITABLE interface @Getter @ToString public class DatasetProfile { - public static int _LARGE_CACHE_SIZE_LIMIT = 1024 * 100; - - private DatasetSchema schema; - // QUESTION: Do we need zones here? Do we just use UTC? - private Date datasetTimestamp; - private Date creationTimestamp; - private HashMap> columns; - private boolean isActive = false; - private int trackCount = 0; - - public DatasetProfile(Optional datasetSchema, - Optional datasetaTimestampe, - Optional creationTimestampe) { - this.schema = datasetSchema.orElse(new DatasetSchema()); - this.datasetTimestamp = datasetaTimestampe.orElse(Date.); - this.creationTimestamp = creationTimestampe.orElse(Date.now()); - - this.columns = new HashMap<>(); - this.initializeNewColumns(schema.getColNames()); + // TODO: Time zone is all mixed up. Fix + public static int _LARGE_CACHE_SIZE_LIMIT = 1024 * 100; + + private DatasetSchema schema; + // QUESTION: Do we need zones here? Do we just use UTC? + private Date datasetTimestamp; + private Date creationTimestamp; + private HashMap> columns; + private boolean isActive = false; + private int trackCount = 0; + + public DatasetProfile( + Optional datasetSchema, + Optional datasetaTimestampe, + Optional creationTimestampe) { + this.schema = datasetSchema.orElse(new DatasetSchema()); + this.datasetTimestamp = datasetaTimestampe.orElse(new Date()); + this.creationTimestamp = creationTimestampe.orElse(new Date()); + + this.columns = new HashMap<>(); + this.initializeNewColumns(schema.getColNames()); + } + + public void addMetric(String colName, Metric metric) { + if (!this.columns.containsKey(colName)) { + throw new InputMismatchException("Column name not found in schema"); } - - public void addMetric(String colName, Metric metric){ - if(!this.columns.containsKey(colName)){ - throw new InputMismatchException("Column name not found in schema"); - } - this.columns.get(colName).addMetric(metric); + this.columns.get(colName).addMetric(metric); + } + + /* + TODO: I don't beleive we need this in Java? (with the T Object) + public void track(T obj){ + try{ + this.isActive = true; + this.trackCount += 1; + this.doTrack(obj); + } finally { + this.isActive = false; + } + }*/ + + public void track(HashMap row) { + try { + this.isActive = true; + this.trackCount += 1; + this.doTrack(row); + } finally { + this.isActive = false; } - - /* - TODO: I don't beleive we need this in Java? - public void track(T obj){ - try{ - this.isActive = true; - this.trackCount += 1; - this.doTrack(obj); - } finally { - this.isActive = false; - } - }*/ - - public void track(HashMap row){ - try{ - this.isActive = true; - this.trackCount += 1; - this.doTrack(row); - } finally { - this.isActive = false; + } + + private void doTrack(HashMap row) { + boolean dirty = this.schema.resolve(row); + if (dirty) { + Set schemaColumnNames = this.schema.getColNames(); + Set newColumnNames = new HashSet<>(); + for (String colName : schemaColumnNames) { + if (!this.columns.containsKey(colName)) { + newColumnNames.add(colName); } + } + this.initializeNewColumns(newColumnNames); } - - private void doTrack(HashMap row) { - boolean dirty = this.schema.resolve(row); - if(dirty){ - Set schemaColumnNames = this.schema.getColNames(); - Set newColumnNames = new HashSet<>(); - for(String colName : schemaColumnNames){ - if(!this.columns.containsKey(colName)){ - newColumnNames.add(colName); - } - } - this.initializeNewColumns(newColumnNames); - } - - // QUESTION: what is this section for? Why do we track this if we just did? - /* - for(String col : row.keySet()){ - this.columns.get(col).track(new ArrayList(row.get(col))); - }*/ - + for (String col : row.keySet()) { + ArrayList values = new ArrayList<>(); + values.add(row.get(col)); + this.columns.get(col).trackColumn(values); } + } - /** - * @return True if the profile tracking code is currently running. - */ - public boolean isEmpty(){ - return this.trackCount == 0; - } + /** @return True if the profile tracking code is currently running. */ + public boolean isEmpty() { + return this.trackCount == 0; + } - public void setDatasetTimestamp(ZonedDateTime datasetTimestamp) { - if(datasetTimestamp.getZone() == null){ - // TODO: log warning if it's not there (is that even possible in Java?) - } - this.datasetTimestamp = ZonedDateTime.ofInstant(datasetTimestamp.toInstant(), ZoneId.of("UTC")); + public void setDatasetTimestamp(ZonedDateTime datasetTimestamp) { + if (datasetTimestamp.getZone() == null) { + // TODO: log warning if it's not there } - - private void initializeNewColumns(Set colNames) { - for(String column : colNames){ - ColumnSchema columnSchema = this.schema.columns.get(column); - if(columnSchema != null){ - this.columns.put(column, new ColumnProfile(column, columnSchema, this.schema.cache_size)); - } - // TODO: log warning 'Encountered a column without schema: %s", col' in an else - } + this.datasetTimestamp = Date.from(datasetTimestamp.toInstant()); + } + + private void initializeNewColumns(Set colNames) { + for (String column : colNames) { + ColumnSchema columnSchema = this.schema.columns.get(column); + if (columnSchema != null) { + this.columns.put(column, new ColumnProfile(column, columnSchema, this.schema.cache_size)); + } + // TODO: log warning 'Encountered a column without schema: %s", col' in an else } + } - public DatasetProfileView view(){ - HashMap columns = new HashMap<>(); - - for(String colName : this.columns.keySet()){ - columns.put(colName, this.columns.get(colName).view()); - } - - return new DatasetProfileView(columns, this.datasetTimestamp, this.creationTimestamp); - } + public DatasetProfileView view() { + HashMap columns = new HashMap<>(); - public void flush(){ - for(String colName : this.columns.keySet()){ - this.columns.get(colName).flush(); - } + for (String colName : this.columns.keySet()) { + columns.put(colName, this.columns.get(colName).view()); } - public static String getDefaultPath(Optional path){ - String defaultPath = "profile." + (int) System.currentTimeMillis() + ".bin"; + return new DatasetProfileView(columns, this.datasetTimestamp, this.creationTimestamp); + } - if(!path.isPresent()){ - return defaultPath; - } + public void flush() { + for (String colName : this.columns.keySet()) { + this.columns.get(colName).flush(); + } + } - if(!path.get().endsWith("bin")) { - String finalPath = path.get() + defaultPath; - return finalPath; - } + public static String getDefaultPath(Optional path) { + String defaultPath = "profile." + (int) System.currentTimeMillis() + ".bin"; - return path.get(); + if (!path.isPresent()) { + return defaultPath; } - public void write(Optional pathName){ - String outputPath = getDefaultPath(pathName); - this.view().write(outputPath); - // TODO log debug "Wrote profile to path: %s", output_path + if (!path.get().endsWith("bin")) { + String finalPath = path.get() + defaultPath; + return finalPath; } - public static DatasetProfileView read(String inputPath){ - return DatasetProfileView.read(inputPath); - } + return path.get(); + } } diff --git a/java/core/src/main/java/com/whylogs/core/SingleFieldProjector.java b/java/core/src/main/java/com/whylogs/core/SingleFieldProjector.java index 56bba0927a..34c2d0f08a 100644 --- a/java/core/src/main/java/com/whylogs/core/SingleFieldProjector.java +++ b/java/core/src/main/java/com/whylogs/core/SingleFieldProjector.java @@ -1,16 +1,15 @@ package com.whylogs.core; +import java.util.HashMap; import lombok.Getter; import lombok.RequiredArgsConstructor; -import java.util.HashMap; - @RequiredArgsConstructor @Getter public class SingleFieldProjector { - private final String columnName; + private final String columnName; - public T apply(HashMap row) { - return row.get(columnName); - } + public T apply(HashMap row) { + return row.get(columnName); + } } diff --git a/java/core/src/main/java/com/whylogs/core/metrics/Metric.java b/java/core/src/main/java/com/whylogs/core/metrics/Metric.java index 86df8951b0..6fc4a231cf 100644 --- a/java/core/src/main/java/com/whylogs/core/metrics/Metric.java +++ b/java/core/src/main/java/com/whylogs/core/metrics/Metric.java @@ -2,9 +2,8 @@ import com.whylogs.core.PreprocessedColumn; import com.whylogs.core.SummaryConfig; -import java.util.HashMap; - import com.whylogs.core.metrics.components.MetricComponent; +import java.util.HashMap; import lombok.*; @EqualsAndHashCode @@ -23,13 +22,13 @@ public abstract class Metric { public abstract HashMap getComponents(); - public Metric merge(Metric other){ + public Metric merge(Metric other) { Metric merged = this; - if(!this.namespace.equals(other.namespace)){ + if (!this.namespace.equals(other.namespace)) { throw new IllegalArgumentException("Cannot merge metrics with different namespaces"); } - if(this instanceof IntegralMetric){ + if (this instanceof IntegralMetric) { ((IntegralMetric) merged).merge((IntegralMetric) other); } return merged; diff --git a/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java b/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java index e044e9c706..3b9bb7439a 100644 --- a/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java +++ b/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java @@ -1,186 +1,125 @@ package com.whylogs.core.views; -import com.google.protobuf.InvalidProtocolBufferException; -import com.google.protobuf.Option; import com.whylogs.core.SummaryConfig; import com.whylogs.core.errors.UnsupportedError; import com.whylogs.core.metrics.Metric; import com.whylogs.core.metrics.components.MetricComponent; -import com.whylogs.v0.core.constraint.Op; -import lombok.Getter; -import whylogs.core.message.ColumnMessage; -import whylogs.core.message.MetricComponentMessage; - import java.util.*; +import lombok.Getter; @Getter -public class ColumnProfileView{ - private HashMap metrics; - private int successes = 0; - private int failures = 0; - - public ColumnProfileView(HashMap metrics) { - this.metrics = metrics; - } - - public ColumnProfileView(HashMap metrics, int successes, int failures) { - this.metrics = metrics; - this.successes = successes; - this.failures = failures; - } - - // TODO: this needs the Metric Merge fixed - public ColumnProfileView merge(ColumnProfileView otherView){ - HashSet allMetricNames = new HashSet<>(); - allMetricNames.addAll(this.metrics.keySet()); - allMetricNames.addAll(otherView.metrics.keySet()); - - HashMap mergedMetrics = new HashMap<>(); - for(String metricName : allMetricNames){ - Metric thisMetric = this.metrics.get(metricName); - Metric otherMetric = otherView.metrics.get(metricName); - - Metric result = thisMetric; - - if(thisMetric != null && otherMetric != null){ - result = thisMetric.merge(otherMetric); - } else if (otherMetric != null){ - result = otherMetric; - } - - mergedMetrics.put(metricName, result); - } - - return new ColumnProfileView(mergedMetrics, - this.successes + otherView.successes, - this.failures + otherView.failures); - } - - public byte[] serialize(){ - return this.toProtobuf().toByteArray(); - } - - public static ColumnProfileView deserialize(byte[] data) throws InvalidProtocolBufferException { - ColumnMessage columnMessage = ColumnMessage.parseFrom(data); - return ColumnProfileView.fromProtobuf(columnMessage); - } - - public Optional getMetric(String metricName){ - return Optional.ofNullable(this.metrics.get(metricName)); - } - - // TODO: needs to have getComponents added to Metric - public ColumnMessage toProtobuf(){ - HashMap metricMessages = new HashMap<>(); - for(String metricName : this.metrics.keySet()){ - for(String componentName : this.metrics.get(metricName).getComponents().keySet()){ - Metric metric = this.metrics.get(metricName); - MetricComponentMessage componentMessage = metric.getComponents().get(componentName). - metricMessages.put(metricName + "/" + componentName, componentMessage); - } - } - return ColumnMessage.newBuilder().putAllMetricComponents(metricMessages).build(); - } - - public static ColumnProfileView zero(){ - return new ColumnProfileView(new HashMap<>()); - } - - public static ColumnProfileView fromProtobuf(ColumnMessage columnMessage){ - HashMap resultMetrics = new HashMap<>(); - HashMap> metricMessages = new HashMap<>(); - - for(String path : columnMessage.getMetricComponentsMap().keySet()){ - String metricName = path.split("/")[0]; - HashMap metricComponents = new HashMap<>(); - - if(metricMessages.containsKey(metricName)){ - metricComponents = metricMessages.get(metricName); - metricMessages.put(metricName, metricComponents); - } else { - metricMessages.put(metricName, new HashMap()); - } - - // TODO: get the path from the first / on - String componentKey = path.substring(path.indexOf("/") + 1); - metricComponents.put(componentKey, columnMessage.getMetricComponentsMap().get(path)); - } - - // TODO: turn metric into type - // was from StandardMetric - // then Registry - // then Metric.fromProtobuf - - return new ColumnProfileView(resultMetrics); +public class ColumnProfileView { + private HashMap metrics; + private int successes = 0; + private int failures = 0; + + public ColumnProfileView(HashMap metrics) { + this.metrics = metrics; + } + + public ColumnProfileView(HashMap metrics, int successes, int failures) { + this.metrics = metrics; + this.successes = successes; + this.failures = failures; + } + + public ColumnProfileView merge(ColumnProfileView otherView) { + HashSet allMetricNames = new HashSet<>(); + allMetricNames.addAll(this.metrics.keySet()); + allMetricNames.addAll(otherView.metrics.keySet()); + + HashMap mergedMetrics = new HashMap<>(); + for (String metricName : allMetricNames) { + Metric thisMetric = this.metrics.get(metricName); + Metric otherMetric = otherView.metrics.get(metricName); + + Metric result = thisMetric; + + if (thisMetric != null && otherMetric != null) { + result = thisMetric.merge(otherMetric); + } else if (otherMetric != null) { + result = otherMetric; + } + + mergedMetrics.put(metricName, result); } - public static ColumnProfileView fromBytes(byte[] data) throws InvalidProtocolBufferException { - ColumnMessage message = ColumnMessage.parseFrom(data); - return ColumnProfileView.fromProtobuf(message); - } - - // TODO: metric needs a getComponentPath - public ArrayList getMetricComponentPaths(){ - ArrayList paths = new ArrayList<>(); - for(String metricName : this.getMetricNames()){ - Optional metric = this.getMetric(metricName); - if(metric.isPresent()){ - for(String componentName : metric.get().getComponents().keySet()){ - paths.add(metricName + "/" + componentName); - } - } + return new ColumnProfileView( + mergedMetrics, this.successes + otherView.successes, this.failures + otherView.failures); + } + + public Optional getMetric(String metricName) { + return Optional.ofNullable(this.metrics.get(metricName)); + } + + public static ColumnProfileView zero() { + return new ColumnProfileView(new HashMap<>()); + } + + // TODO: metric needs a getComponentPath + public ArrayList getMetricComponentPaths() { + ArrayList paths = new ArrayList<>(); + for (String metricName : this.getMetricNames()) { + Optional metric = this.getMetric(metricName); + if (metric.isPresent()) { + for (String componentName : metric.get().getComponents().keySet()) { + paths.add(metricName + "/" + componentName); } - return paths; + } } - - public ArrayList getMetricNames(){ - return new ArrayList<>(this.getMetrics().keySet()); + return paths; + } + + public ArrayList getMetricNames() { + return new ArrayList<>(this.getMetrics().keySet()); + } + + public HashMap toSummaryDict( + Optional columnMetric, Optional config) throws UnsupportedError { + SummaryConfig summaryConfig = config.orElse(new SummaryConfig()); + HashMap summary = new HashMap<>(); + + if (columnMetric.isPresent()) { + summary.putAll(getMetricSummaryHelper(summaryConfig, this.getMetric(columnMetric.get()))); + } else { + for (String metricName : this.getMetricNames()) { + summary.putAll(getMetricSummaryHelper(summaryConfig, this.getMetric(metricName))); + } } - public HashMap toSummaryDict(Optional columnMetric, Optional config) throws UnsupportedError { - SummaryConfig summaryConfig = config.orElse(new SummaryConfig()); - HashMap summary = new HashMap<>(); - - if(columnMetric.isPresent()){ - summary.putAll(getMetricSummaryHelper(summaryConfig, this.getMetric(columnMetric.get()))); - } else { - for(String metricName : this.getMetricNames()){ - summary.putAll(getMetricSummaryHelper(summaryConfig, this.getMetric(metricName))); - } - } - - // TODO: there was a logger for when a ssummary couldn't be implmented for a metric + // TODO: there was a logger for when a ssummary couldn't be implmented for a metric - if(columnMetric.isPresent() && columnMetric.get().length() == 0){ - throw new UnsupportedError("No metric available for requested column metric: " + columnMetric.get()); - } - - return summary; + if (columnMetric.isPresent() && columnMetric.get().length() == 0) { + throw new UnsupportedError( + "No metric available for requested column metric: " + columnMetric.get()); } - private HashMap getMetricSummaryHelper(SummaryConfig summaryConfig, - Optional maybeMetric) { - HashMap result = new HashMap<>(); - Metric metric; - if(maybeMetric.isPresent()){ - metric = maybeMetric.get(); - HashMap metricSummary = metric.toSummaryDict(summaryConfig); - for (String componentName : metricSummary.keySet()) { - String fullName = metric.getNamespace() + "/" + componentName; - result.put(fullName, metricSummary.get(componentName)); - } - } - return result; + return summary; + } + + private HashMap getMetricSummaryHelper( + SummaryConfig summaryConfig, Optional maybeMetric) { + HashMap result = new HashMap<>(); + Metric metric; + if (maybeMetric.isPresent()) { + metric = maybeMetric.get(); + HashMap metricSummary = metric.toSummaryDict(summaryConfig); + for (String componentName : metricSummary.keySet()) { + String fullName = metric.getNamespace() + "/" + componentName; + result.put(fullName, metricSummary.get(componentName)); + } } - - public Map getComponents() { - HashMap result = new HashMap<>(); - for(String metricName : this.getMetricNames()){ - Optional metric = this.getMetric(metricName); - if(metric.isPresent()){ - result.putAll(metric.get().getComponents()); - } - } - return result; + return result; + } + + public Map getComponents() { + HashMap result = new HashMap<>(); + for (String metricName : this.getMetricNames()) { + Optional metric = this.getMetric(metricName); + if (metric.isPresent()) { + result.putAll(metric.get().getComponents()); + } } + return result; + } } diff --git a/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java b/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java index 72df2a7632..bd69057306 100644 --- a/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java +++ b/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java @@ -1,232 +1,57 @@ package com.whylogs.core.views; -import com.whylogs.core.errors.DeserializationError; +import java.io.*; +import java.util.*; import lombok.AllArgsConstructor; import lombok.Getter; import lombok.ToString; import whylogs.core.message.*; -import java.io.*; -import java.lang.reflect.Type; -import java.nio.channels.Channels; -import java.nio.file.Files; -import java.util.*; -import java.util.stream.Collectors; - -// TODO: extend writable +// TODO: extend writable when we do Protobuf @AllArgsConstructor @Getter @ToString public class DatasetProfileView { - private HashMap columns; - private Date datasetTimestamp; - private Date creationTimestamp; - - public DatasetProfileView merge(DatasetProfileView otherView) { - HashMap mergedColumns = new HashMap<>(); - HashSet allNames = new HashSet<>(); - allNames.addAll(this.columns.keySet()); - allNames.addAll(otherView.columns.keySet()); - - for (String columnName : allNames) { - ColumnProfileView thisColumn = this.columns.get(columnName); - ColumnProfileView otherColumn = otherView.columns.get(columnName); - - ColumnProfileView result = thisColumn; - - if (thisColumn != null && otherColumn != null) { - result = thisColumn.merge(otherColumn); - } else if (otherColumn != null) { - result = otherColumn; - } - mergedColumns.put(columnName, result); - } - - return new DatasetProfileView(mergedColumns, this.datasetTimestamp, this.creationTimestamp); - } - - public Optional getColumn(String columnName) { - return Optional.ofNullable(this.columns.get(columnName)); - } - - public HashMap getColumns(Optional> colNames) { - if (colNames.isPresent()) { - HashMap result = new HashMap<>(); - for (String colName : colNames.get()) { - result.put(colName, this.columns.get(colName)); - } - return result; - } else { - return this.columns; - } - } - - public String getDefaultPath() { - return "profile_" + this.creationTimestamp + ".bin"; - } - - // TODO: we need get components - public void write(Optional path) { - HashSet allComponentNames = new HashSet<>(); - HashMap metricNameToIndex = new HashMap<>(); - HashMap indexToMetricName = new HashMap<>(); - HashMap columnChunkOffsets = new HashMap<>(); - String pathName = path.orElseGet(this::getDefaultPath); - - for (String colName : this.columns.keySet()) { - ColumnProfileView column = this.columns.get(colName); - allComponentNames.addAll(column.getComponents().keySet()); - } - allComponentNames.stream().sorted().forEach(name -> { - int index = metricNameToIndex.size(); - metricNameToIndex.put(name, index); - indexToMetricName.put(index, name); - }); - - String tempPath = System.getProperty("java.io.tmpdir") + File.separator + "whylogs" + File.separator + "temp_" + this.creationTimestamp + ".bin"; - try (RandomAccessFile file = new RandomAccessFile(tempPath, "rw")) { - OutputStream outputStream = Channels.newOutputStream(file.getChannel()); - for (String colName : this.columns.keySet().stream().sorted().collect(Collectors.toList())) { - ColumnProfileView currentColumn = this.columns.get(colName); - columnChunkOffsets.put(colName, ChunkOffsets.newBuilder().addOffsets(file.getFilePointer()).build()); - - // Chunk the column - HashMap indexComponentMetric = new HashMap<>(); - Map metricComponentMap = currentColumn.toProtobuf().getMetricComponentsMap(); - - for (String metricName : metricComponentMap.keySet()) { - if (metricNameToIndex.containsKey(metricName)) { - indexComponentMetric.put(metricNameToIndex.get(metricName), metricComponentMap.get(metricName)); - } else { - throw new InputMismatchException("Missing metric from index map. Metric name: " + metricName); - } - } - - ChunkMessage chunkMsg = ChunkMessage.newBuilder().putAllMetricComponents(indexComponentMetric).build(); - ChunkHeader chunkHeader = ChunkHeader.newBuilder().setType(ChunkHeader.ChunkType.COLUMN).setLength(chunkMsg.getSerializedSize()).build(); - chunkHeader.writeDelimitedTo(outputStream); - outputStream.write(chunkMsg.toByteArray()); - } - - long totalLength = file.getFilePointer(); - - DatasetProperties datasetProperties = DatasetProperties.newBuilder() - .setDatasetTimestamp(this.datasetTimestamp.getTime()) - .setCreationTimestamp(this.creationTimestamp.getTime()) - .build(); - - DatasetProfileHeader header = DatasetProfileHeader.newBuilder() - .setProperties(datasetProperties) - .setLength(totalLength) - .putAllColumnOffsets(columnChunkOffsets) - .putAllIndexedMetricPaths(indexToMetricName) - .build(); - - DatasetSegmentHeader segmentHeader = DatasetSegmentHeader.newBuilder() - .setHasSegments(false) - .build(); - - try (RandomAccessFile outFile = new RandomAccessFile(pathName, "rw")) { - file.seek(0); - InputStream inputFromTemp = Channels.newInputStream(file.getChannel()); - OutputStream writeToFile = Channels.newOutputStream(outFile.getChannel()); - outFile.write(WhylogsMagicUtility.WHYLOGS_MAGIC_HEADER_BYTES); - segmentHeader.writeDelimitedTo(writeToFile); - header.writeDelimitedTo(writeToFile); - - int bufferSize = 1024; - int bytesRead = 0; - while (file.getFilePointer() < totalLength) { - byte[] buffer = new byte[bufferSize]; - bytesRead = inputFromTemp.read(buffer, bytesRead, bytesRead+ bufferSize); - writeToFile.write(buffer, 0, bytesRead); // TODO: this offset doesn't seem write. Test - } - } - } catch (IOException e) { - e.printStackTrace(); - } finally { - try { - Files.deleteIfExists(new File(tempPath).toPath()); - } catch (IOException e) { - e.printStackTrace(); - } - } + private HashMap columns; + private Date datasetTimestamp; + private Date creationTimestamp; + + public DatasetProfileView merge(DatasetProfileView otherView) { + HashMap mergedColumns = new HashMap<>(); + HashSet allNames = new HashSet<>(); + allNames.addAll(this.columns.keySet()); + allNames.addAll(otherView.columns.keySet()); + + for (String columnName : allNames) { + ColumnProfileView thisColumn = this.columns.get(columnName); + ColumnProfileView otherColumn = otherView.columns.get(columnName); + + ColumnProfileView result = thisColumn; + + if (thisColumn != null && otherColumn != null) { + result = thisColumn.merge(otherColumn); + } else if (otherColumn != null) { + result = otherColumn; + } + mergedColumns.put(columnName, result); } - public static DatasetProfileView read(String path) throws FileNotFoundException { - ColumnMessage columnMessage; - HashMap columns = new HashMap<>(); - Date datasetTimestamp = null; - Date creationTimestamp = null; - try (RandomAccessFile file = new RandomAccessFile(path, "r")) { - byte[] buffer = new byte[WhylogsMagicUtility.WHYLOGS_MAGIC_HEADER_LENGTH]; - file.read(buffer); - - String decodedHeader; - try{ - decodedHeader = new String(buffer, "UTF-8"); - } catch(Exception e){ - throw new DeserializationError("Invalid magic header. Decoder error: " + e.getMessage()); - } - - if (!WhylogsMagicUtility.WHYLOGS_MAGIC_HEADER.equals(decodedHeader)) { - throw new DeserializationError("Invalid magic header. Expected: " + WhylogsMagicUtility.WHYLOGS_MAGIC_HEADER + " Got: " + decodedHeader); - } - - InputStream inputStream = Channels.newInputStream(file.getChannel()); - DatasetSegmentHeader segmentHeader = DatasetSegmentHeader.parseDelimitedFrom(inputStream); - if(segmentHeader.getHasSegments()){ - throw new DeserializationError("Dataset profile has segments. This is not supported yet."); - } - - DatasetProfileHeader header = DatasetProfileHeader.parseDelimitedFrom(inputStream); - if(header.getSerializedSize() == 0){ - throw new DeserializationError("Missing valid dataset profile header"); - } - - datasetTimestamp = new Date(header.getProperties().getDatasetTimestamp()); - creationTimestamp = new Date(header.getProperties().getCreationTimestamp()); - Map indexedMetricPath = header.getIndexedMetricPathsMap(); - - // TODO; Log warning if it's less than 1 "Name index in the header is empty. Possible data corruption" - long startOffset = file.getFilePointer(); - - ArrayList sortedColNames = new ArrayList<>(header.getColumnOffsetsMap().keySet()); - sortedColNames.sort(Comparator.naturalOrder()); - for(String colName: sortedColNames){ - ChunkOffsets offsets = header.getColumnOffsetsMap().get(colName); - HashMap metricComponents = new HashMap<>(); - - for(long offset: offsets.getOffsetsList()){ - long actualOffset = offset + startOffset; - ChunkHeader chunkHeader = ChunkHeader.parseDelimitedFrom(inputStream); - - if(chunkHeader == null){ - throw new DeserializationError("Missing chunk header at offset: " + actualOffset); - } - - if (chunkHeader.getType() != ChunkHeader.ChunkType.COLUMN) { - throw new DeserializationError("Invalid chunk type. Expected: " + ChunkHeader.ChunkType.COLUMN + " Got: " + chunkHeader.getType()); - } - - // TODO: does this need to first grab the chunkHeader.length? - ChunkMessage chunkMessage = ChunkMessage.parseFrom(inputStream); - - for(Integer index: chunkMessage.getMetricComponentsMap().keySet()){ - if(indexedMetricPath.containsKey(index)){ - metricComponents.put(indexedMetricPath.get(index), chunkMessage.getMetricComponentsMap().get(index)); - } else { - throw new DeserializationError("Missing metric from index map. Index: " + index); - } - } - } - - columnMessage = ColumnMessage.newBuilder().putAllMetricComponents(metricComponents).build(); - columns.put(colName, ColumnProfileView.fromProtobuf(columnMessage)); - } - } catch (IOException | DeserializationError e) { - e.printStackTrace(); - } - return new DatasetProfileView(columns, datasetTimestamp, creationTimestamp); + return new DatasetProfileView(mergedColumns, this.datasetTimestamp, this.creationTimestamp); + } + + public Optional getColumn(String columnName) { + return Optional.ofNullable(this.columns.get(columnName)); + } + + public HashMap getColumns(Optional> colNames) { + if (colNames.isPresent()) { + HashMap result = new HashMap<>(); + for (String colName : colNames.get()) { + result.put(colName, this.columns.get(colName)); + } + return result; + } else { + return this.columns; } -} \ No newline at end of file + } +} diff --git a/java/core/src/main/java/com/whylogs/core/views/SummaryType.java b/java/core/src/main/java/com/whylogs/core/views/SummaryType.java index 1fee260d2c..d27d0e9fff 100644 --- a/java/core/src/main/java/com/whylogs/core/views/SummaryType.java +++ b/java/core/src/main/java/com/whylogs/core/views/SummaryType.java @@ -1,11 +1,12 @@ package com.whylogs.core.views; public enum SummaryType { - COLUMN("COLUMN"), - DATASET("DATASET"); + COLUMN("COLUMN"), + DATASET("DATASET"); - public final String label; - private SummaryType(String label) { - this.label = label; - } + public final String label; + + private SummaryType(String label) { + this.label = label; + } } diff --git a/java/core/src/main/java/com/whylogs/core/views/WhylogsMagicUtility.java b/java/core/src/main/java/com/whylogs/core/views/WhylogsMagicUtility.java index 0254024481..d0f43420ff 100644 --- a/java/core/src/main/java/com/whylogs/core/views/WhylogsMagicUtility.java +++ b/java/core/src/main/java/com/whylogs/core/views/WhylogsMagicUtility.java @@ -1,12 +1,12 @@ package com.whylogs.core.views; -import lombok.experimental.UtilityClass; - import java.nio.charset.StandardCharsets; +import lombok.experimental.UtilityClass; @UtilityClass public class WhylogsMagicUtility { - public static final String WHYLOGS_MAGIC_HEADER = "WHY1"; - public static final int WHYLOGS_MAGIC_HEADER_LENGTH = WHYLOGS_MAGIC_HEADER.length(); - public static final byte[] WHYLOGS_MAGIC_HEADER_BYTES = WHYLOGS_MAGIC_HEADER.getBytes(StandardCharsets.UTF_8);; + public static final String WHYLOGS_MAGIC_HEADER = "WHY1"; + public static final int WHYLOGS_MAGIC_HEADER_LENGTH = WHYLOGS_MAGIC_HEADER.length(); + public static final byte[] WHYLOGS_MAGIC_HEADER_BYTES = + WHYLOGS_MAGIC_HEADER.getBytes(StandardCharsets.UTF_8);; } diff --git a/java/core/src/test/java/com/whylogs/core/metrics/TestMetric.java b/java/core/src/test/java/com/whylogs/core/metrics/TestMetric.java index ef4a2427c5..afc72942b7 100644 --- a/java/core/src/test/java/com/whylogs/core/metrics/TestMetric.java +++ b/java/core/src/test/java/com/whylogs/core/metrics/TestMetric.java @@ -1,23 +1,21 @@ package com.whylogs.core.metrics; -import com.whylogs.core.metrics.components.MaxIntegralComponent; +import java.util.ArrayList; import org.junit.Assert; import org.testng.annotations.Test; -import java.util.ArrayList; - @Test public class TestMetric { - @Test - public void testMetrics(){ - ArrayList metrics = new ArrayList<>(); - metrics.add(IntegralMetric.zero(new MetricConfig())); - metrics.add(IntegralMetric.zero(new MetricConfig())); + @Test + public void testMetrics() { + ArrayList metrics = new ArrayList<>(); + metrics.add(IntegralMetric.zero(new MetricConfig())); + metrics.add(IntegralMetric.zero(new MetricConfig())); - for(Metric metric : metrics){ - Assert.assertTrue(metric instanceof IntegralMetric); - metric.merge(new IntegralMetric()); - } + for (Metric metric : metrics) { + Assert.assertTrue(metric instanceof IntegralMetric); + metric.merge(new IntegralMetric()); } + } } From 7e61b1ed02867582229e58bb43715f8a2f8c0932 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Mon, 12 Sep 2022 11:36:30 -0700 Subject: [PATCH 20/71] Tests for simple utiltiies --- .../com/whylogs/core/views/TestSummaryType.java | 14 ++++++++++++++ .../core/views/TestWhylogsMagicUtility.java | 15 +++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 java/core/src/test/java/com/whylogs/core/views/TestSummaryType.java create mode 100644 java/core/src/test/java/com/whylogs/core/views/TestWhylogsMagicUtility.java diff --git a/java/core/src/test/java/com/whylogs/core/views/TestSummaryType.java b/java/core/src/test/java/com/whylogs/core/views/TestSummaryType.java new file mode 100644 index 0000000000..12fa04fcc8 --- /dev/null +++ b/java/core/src/test/java/com/whylogs/core/views/TestSummaryType.java @@ -0,0 +1,14 @@ +package com.whylogs.core.views; + +import org.testng.Assert; +import org.testng.annotations.Test; + +@Test +public class TestSummaryType { + + @Test + public void testSummaryType() { + Assert.assertEquals(SummaryType.COLUMN.label, "COLUMN"); + Assert.assertEquals(SummaryType.DATASET.label, "DATASET"); + } +} diff --git a/java/core/src/test/java/com/whylogs/core/views/TestWhylogsMagicUtility.java b/java/core/src/test/java/com/whylogs/core/views/TestWhylogsMagicUtility.java new file mode 100644 index 0000000000..7052c85062 --- /dev/null +++ b/java/core/src/test/java/com/whylogs/core/views/TestWhylogsMagicUtility.java @@ -0,0 +1,15 @@ +package com.whylogs.core.views; + +import org.testng.Assert; +import org.testng.annotations.Test; + +@Test +public class TestWhylogsMagicUtility { + + @Test + public void testWhylogsMagicUtility() { + Assert.assertEquals(WhylogsMagicUtility.WHYLOGS_MAGIC_HEADER, "WHY1"); + Assert.assertEquals(WhylogsMagicUtility.WHYLOGS_MAGIC_HEADER_LENGTH, 4); + Assert.assertEquals(WhylogsMagicUtility.WHYLOGS_MAGIC_HEADER_BYTES, new byte[] {87, 72, 89, 49}); + } +} From aea9c3125091de517c20703fe66a39524d71ae55 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Mon, 12 Sep 2022 15:20:22 -0700 Subject: [PATCH 21/71] tests for ColumnProfileView --- .../whylogs/core/views/ColumnProfileView.java | 10 +- .../core/views/TestColumnProfileView.java | 123 ++++++++++++++++++ 2 files changed, 130 insertions(+), 3 deletions(-) create mode 100644 java/core/src/test/java/com/whylogs/core/views/TestColumnProfileView.java diff --git a/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java b/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java index 3b9bb7439a..a1a7eebb85 100644 --- a/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java +++ b/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java @@ -24,6 +24,11 @@ public ColumnProfileView(HashMap metrics, int successes, int fai } public ColumnProfileView merge(ColumnProfileView otherView) { + if(otherView == null){ + // TODO: log warning that otehrwas null and this returns original + return this; + } + HashSet allMetricNames = new HashSet<>(); allMetricNames.addAll(this.metrics.keySet()); allMetricNames.addAll(otherView.metrics.keySet()); @@ -75,7 +80,7 @@ public ArrayList getMetricNames() { } public HashMap toSummaryDict( - Optional columnMetric, Optional config) throws UnsupportedError { + Optional columnMetric, Optional config) throws UnsupportedError { SummaryConfig summaryConfig = config.orElse(new SummaryConfig()); HashMap summary = new HashMap<>(); @@ -87,13 +92,12 @@ public HashMap toSummaryDict( } } - // TODO: there was a logger for when a ssummary couldn't be implmented for a metric + // TODO: there was a logger for when a summary couldn't be implmented for a metric if (columnMetric.isPresent() && columnMetric.get().length() == 0) { throw new UnsupportedError( "No metric available for requested column metric: " + columnMetric.get()); } - return summary; } diff --git a/java/core/src/test/java/com/whylogs/core/views/TestColumnProfileView.java b/java/core/src/test/java/com/whylogs/core/views/TestColumnProfileView.java new file mode 100644 index 0000000000..6b52d558fc --- /dev/null +++ b/java/core/src/test/java/com/whylogs/core/views/TestColumnProfileView.java @@ -0,0 +1,123 @@ +package com.whylogs.core.views; + +import com.whylogs.core.SummaryConfig; +import com.whylogs.core.errors.UnsupportedError; +import com.whylogs.core.metrics.IntegralMetric; +import com.whylogs.core.metrics.Metric; +import com.whylogs.core.metrics.MetricConfig; +import com.whylogs.core.metrics.components.MetricComponent; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.*; + +@Test +public class TestColumnProfileView { + + private ColumnProfileView getDefaultColumnProfile(){ + Metric integralMetric = IntegralMetric.zero(new MetricConfig()); + HashMap metrics = new HashMap<>(); + metrics.put(integralMetric.getNamespace(), integralMetric); + + return new ColumnProfileView(metrics); + } + + private ColumnProfileView getChangedSuccessFailProfile(int success, int fail){ + Metric integralMetric = IntegralMetric.zero(new MetricConfig()); + HashMap metrics = new HashMap<>(); + metrics.put(integralMetric.getNamespace(), integralMetric); + + return new ColumnProfileView(metrics, success, fail); + } + + @Test + public void testColumnProfileInit(){ + ColumnProfileView columnProfileView = getDefaultColumnProfile(); + + Assert.assertEquals(columnProfileView.getMetric("ints").get().getClass(), IntegralMetric.class); + Assert.assertEquals(columnProfileView.getFailures(), 0); + Assert.assertEquals(columnProfileView.getSuccesses(), 0); + + + columnProfileView = getChangedSuccessFailProfile(1, 2); + Assert.assertEquals(columnProfileView.getFailures(), 2); + Assert.assertEquals(columnProfileView.getSuccesses(), 1); + + columnProfileView = ColumnProfileView.zero(); + Assert.assertEquals(columnProfileView.getFailures(), 0); + Assert.assertEquals(columnProfileView.getSuccesses(), 0); + } + + @Test + public void testMerge(){ + ColumnProfileView columnProfileView = getDefaultColumnProfile(); + ColumnProfileView columnProfileView2 = getChangedSuccessFailProfile(1, 2); + + ColumnProfileView result = columnProfileView.merge(columnProfileView2); + + Assert.assertEquals(result.getMetric("ints").get().getClass(), IntegralMetric.class); + Assert.assertEquals(result.getFailures(), 2); + Assert.assertEquals(result.getSuccesses(), 1); + } + + @Test + public void testMergeWithNull(){ + ColumnProfileView columnProfileView = getDefaultColumnProfile(); + ColumnProfileView result = columnProfileView.merge(null); + + Assert.assertEquals(result.getMetric("ints").get().getClass(), IntegralMetric.class); + Assert.assertEquals(result.getFailures(), 0); + Assert.assertEquals(result.getSuccesses(), 0); + } + + @Test + public void testGetMetricComponentPaths(){ + ColumnProfileView columnProfileView = getDefaultColumnProfile(); + ArrayList paths = columnProfileView.getMetricComponentPaths(); + Assert.assertEquals(paths.size(), 2); + Assert.assertEquals(paths.get(0), "ints/MaxIntegralComponent"); + Assert.assertEquals(paths.get(1), "ints/MinIntegralComponent"); + } + + @Test + public void testGetMetricComponentPathsEmpty(){ + ColumnProfileView columnProfileView = ColumnProfileView.zero(); + ArrayList paths = columnProfileView.getMetricComponentPaths(); + Assert.assertEquals(paths.size(), 0); + } + + @Test + public void testGetMetricComponentPathsNull(){ + ColumnProfileView columnProfileView = getDefaultColumnProfile(); + columnProfileView = columnProfileView.merge(null); + ArrayList paths = columnProfileView.getMetricComponentPaths(); + Assert.assertEquals(paths.size(), 2); + Assert.assertEquals(paths.get(0), "ints/MaxIntegralComponent"); + Assert.assertEquals(paths.get(1), "ints/MinIntegralComponent"); + } + + @Test + public void testToSummaryDict() throws UnsupportedError { + ColumnProfileView columnProfileView = getDefaultColumnProfile(); + HashMap summary = columnProfileView.toSummaryDict(Optional.ofNullable("ints"), Optional.ofNullable(new SummaryConfig())); + Assert.assertEquals(summary.size(), 2); + Assert.assertEquals(summary.get("ints/min"), Integer.MAX_VALUE); + Assert.assertEquals(summary.get("ints/max"), Integer.MIN_VALUE); + + summary = columnProfileView.toSummaryDict(Optional.empty(), Optional.empty()); + Assert.assertEquals(summary.size(), 2); + Assert.assertEquals(summary.get("ints/min"), Integer.MAX_VALUE); + Assert.assertEquals(summary.get("ints/max"), Integer.MIN_VALUE); + } + + @Test + public void testGetComponents(){ + ColumnProfileView columnProfileView = getDefaultColumnProfile(); + Map components = columnProfileView.getComponents(); + Assert.assertEquals(components.size(), 2); + Assert.assertEquals(components.get("MinIntegralComponent").getValue(), Integer.MAX_VALUE); + Assert.assertEquals(components.get("MaxIntegralComponent").getValue(), Integer.MIN_VALUE); + } + +} From 9e412555733ad997a7a6c3418e7158e474bcc21c Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Mon, 12 Sep 2022 15:30:38 -0700 Subject: [PATCH 22/71] tests for DatasetProfileView --- .../core/views/DatasetProfileView.java | 4 ++ .../core/views/TestDatasetProfileView.java | 68 +++++++++++++++++++ 2 files changed, 72 insertions(+) create mode 100644 java/core/src/test/java/com/whylogs/core/views/TestDatasetProfileView.java diff --git a/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java b/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java index bd69057306..d235a2c4e7 100644 --- a/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java +++ b/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java @@ -17,6 +17,10 @@ public class DatasetProfileView { private Date creationTimestamp; public DatasetProfileView merge(DatasetProfileView otherView) { + if(otherView == null) { + return this; + } + HashMap mergedColumns = new HashMap<>(); HashSet allNames = new HashSet<>(); allNames.addAll(this.columns.keySet()); diff --git a/java/core/src/test/java/com/whylogs/core/views/TestDatasetProfileView.java b/java/core/src/test/java/com/whylogs/core/views/TestDatasetProfileView.java new file mode 100644 index 0000000000..0c9e3c5382 --- /dev/null +++ b/java/core/src/test/java/com/whylogs/core/views/TestDatasetProfileView.java @@ -0,0 +1,68 @@ +package com.whylogs.core.views; + +import com.whylogs.core.metrics.IntegralMetric; +import com.whylogs.core.metrics.Metric; +import com.whylogs.core.metrics.MetricConfig; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.util.Date; +import java.util.HashMap; + +public class TestDatasetProfileView { + + private DatasetProfileView getDefaultDatasetProfile(){ + HashMap columnProfileViews = new HashMap<>(); + HashMap testMetrics = new HashMap<>(); + testMetrics.put("ints", IntegralMetric.zero(new MetricConfig())); + columnProfileViews.put("test", new ColumnProfileView(testMetrics)); + return new DatasetProfileView(columnProfileViews, new Date(), new Date()); + } + + @Test + public void testDatasetProfileViewInit(){ + DatasetProfileView view = new DatasetProfileView(new HashMap(), new Date(), new Date()); + Assert.assertEquals(view.getColumns().size(), 0); + + view = getDefaultDatasetProfile(); + Assert.assertEquals(view.getColumns().size(), 1); + Assert.assertNotNull(view.getColumns().get("test").getMetric("ints")); + } + + @Test + public void testMerge(){ + DatasetProfileView view = getDefaultDatasetProfile(); + DatasetProfileView view2 = getDefaultDatasetProfile(); + DatasetProfileView result = view.merge(view2); + Assert.assertEquals(result.getColumns().size(), 1); + Assert.assertNotNull(result.getColumns().get("test").getMetric("ints")); + } + + @Test + public void testMergeWithNull(){ + DatasetProfileView view = getDefaultDatasetProfile(); + DatasetProfileView result = view.merge(null); + Assert.assertEquals(result.getColumns().size(), 1); + Assert.assertNotNull(result.getColumns().get("test").getMetric("ints")); + } + + @Test + public void testMergeWithEmpty(){ + DatasetProfileView view = getDefaultDatasetProfile(); + DatasetProfileView result = view.merge(new DatasetProfileView(new HashMap(), new Date(), new Date())); + Assert.assertEquals(result.getColumns().size(), 1); + Assert.assertNotNull(result.getColumns().get("test").getMetric("ints")); + } + + @Test + public void testGetColumn(){ + DatasetProfileView view = getDefaultDatasetProfile(); + Assert.assertNotNull(view.getColumn("test")); + } + + @Test + public void testGetColumns(){ + DatasetProfileView view = getDefaultDatasetProfile(); + Assert.assertNotNull(view.getColumns()); + } +} From 3059ae534d6e7844889c73c75d23b2b29cb338c4 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Tue, 13 Sep 2022 10:47:11 -0700 Subject: [PATCH 23/71] place holder for TestColumnProfile --- .../src/test/java/com/whylogs/core/TestColumnProfile.java | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 java/core/src/test/java/com/whylogs/core/TestColumnProfile.java diff --git a/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java b/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java new file mode 100644 index 0000000000..c2b10d0b2f --- /dev/null +++ b/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java @@ -0,0 +1,4 @@ +package com.whylogs.core; + +public class TestColumnProfile { +} From c6e04e30e821b2fd8bd9a3cdfdb6b8f6b602c691 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Wed, 14 Sep 2022 15:45:46 -0700 Subject: [PATCH 24/71] Tests for ColumnProfile --- .../java/com/whylogs/core/ColumnProfile.java | 6 + .../com/whylogs/core/TestColumnProfile.java | 142 ++++++++++++++++++ 2 files changed, 148 insertions(+) diff --git a/java/core/src/main/java/com/whylogs/core/ColumnProfile.java b/java/core/src/main/java/com/whylogs/core/ColumnProfile.java index 38916cc866..4cf403322b 100644 --- a/java/core/src/main/java/com/whylogs/core/ColumnProfile.java +++ b/java/core/src/main/java/com/whylogs/core/ColumnProfile.java @@ -4,9 +4,14 @@ import com.whylogs.core.metrics.OperationResult; import com.whylogs.core.schemas.ColumnSchema; import com.whylogs.core.views.ColumnProfileView; +import lombok.Getter; +import lombok.ToString; + import java.util.ArrayList; import java.util.HashMap; +@Getter +@ToString public class ColumnProfile { // Required private String name; @@ -42,6 +47,7 @@ public void addMetric(Metric metric) { this.metrics.put(metric.getNamespace(), metric); } + // TODO: this only gets one not every part of the row. Should projector actually do it multiple? public void track(HashMap row) { T value = this.projector.apply(row); this.cachedValues.add(value); diff --git a/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java b/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java index c2b10d0b2f..d635612b3e 100644 --- a/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java +++ b/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java @@ -1,4 +1,146 @@ package com.whylogs.core; +import com.whylogs.core.metrics.IntegralMetric; +import com.whylogs.core.metrics.Metric; +import com.whylogs.core.metrics.MetricConfig; +import com.whylogs.core.metrics.components.MaxIntegralComponent; +import com.whylogs.core.metrics.components.MinIntegralComponent; +import com.whylogs.core.resolvers.StandardResolver; +import com.whylogs.core.schemas.ColumnSchema; +import com.whylogs.core.views.ColumnProfileView; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Optional; + +@Test public class TestColumnProfile { + private String columnName = "testColumn"; + private int CACHE_SIZE = 2; + + private ColumnProfile getDefaultColumnProfile(){ + ColumnSchema standardSchema = new ColumnSchema(Integer.class, new MetricConfig(), new StandardResolver()); + ColumnProfile result = new ColumnProfile<>(columnName, standardSchema, CACHE_SIZE ); + result.addMetric(IntegralMetric.zero(new MetricConfig())); + return result; + } + + @Test + public void testColumnProfileInit(){ + ColumnProfile profile = getDefaultColumnProfile(); + Assert.assertEquals(profile.getName(), columnName); + Assert.assertEquals(profile.getSchema().getType(), IntegralMetric.class); + Assert.assertEquals(profile.getCachedSize(), CACHE_SIZE); + } + + @Test + public void testAddMetric(){ + ColumnProfile profile = getDefaultColumnProfile(); + profile.addMetric(IntegralMetric.zero(new MetricConfig())); + Assert.assertEquals(profile.getMetrics().size(), 1); + Assert.assertEquals(profile.getMetrics().get("ints").getClass(), IntegralMetric.class); + IntegralMetric metric = (IntegralMetric) profile.getMetrics().get("ints"); + Assert.assertEquals((int) metric.getMaxComponent().getValue(), Integer.MIN_VALUE); + + IntegralMetric metric2 = new IntegralMetric(new MaxIntegralComponent(22), new MinIntegralComponent(20)); + profile.addMetric(metric2); + Assert.assertEquals(profile.getMetrics().size(), 1); + IntegralMetric result = (IntegralMetric) profile.getMetrics().get("ints"); + Assert.assertEquals((int) result.getMaxComponent().getValue(), 22); + } + + @Test + public void testTrack(){ + ColumnProfile profile = getDefaultColumnProfile(); + Assert.assertEquals(profile.getCachedSize(), CACHE_SIZE); + + HashMap row = new HashMap<>(); + row.put(columnName, 5); + row.put("test2", 2); + + profile.track(row); + Assert.assertEquals(profile.getCachedValues().size(), 1); + Assert.assertEquals((int) profile.getCachedValues().get(0), 5); + + row.put(columnName, 2); + profile.track(row); + // With cache size of 2 this should have forced a flush + Assert.assertEquals(profile.getCachedValues().size(), 0); + Assert.assertEquals(profile.getSuccessCount(), 2); + } + + @Test + public void testTrackNull(){ + ColumnProfile profile = getDefaultColumnProfile(); + Assert.assertEquals(profile.getCachedSize(), CACHE_SIZE); + + HashMap row = new HashMap<>(); + row.put(columnName, 1); + profile.track(row); + + row.put(columnName, null); + profile.track(row); + + Assert.assertEquals(profile.getCachedValues().size(), 0); + Assert.assertEquals(profile.getSuccessCount(), 1); + Assert.assertEquals(profile.getFailureCount(), 0); + // There is a null count in the columnar update, but we don't store it in the profile + } + + // Because of the typing in Java, how do we trigger the failure? + + @Test + public void testFlush(){ + ColumnProfile profile = getDefaultColumnProfile(); + + HashMap row = new HashMap<>(); + row.put(columnName, 5); + + profile.track(row); + Assert.assertEquals(profile.getCachedValues().size(), 1); + + profile.flush(); + Assert.assertEquals(profile.getCachedValues().size(), 0); + Assert.assertEquals(profile.getSuccessCount(), 1); + } + + @Test + public void testTrackColumn(){ + ColumnProfile profile = getDefaultColumnProfile(); + ArrayList column = new ArrayList<>(); + column.add("1"); + + profile.trackColumn(column); + Assert.assertEquals(profile.getSuccessCount(), 0); + Assert.assertEquals(profile.getFailureCount(), 0); + + ArrayList column2 = new ArrayList<>(); + column2.add(1); + column2.add(2); + column2.add(null); + profile.trackColumn(column2); + Assert.assertEquals(profile.getSuccessCount(), 2); + Assert.assertEquals(profile.getFailureCount(), 0); + } + + @Test + public void testView(){ + ColumnProfile profile = getDefaultColumnProfile(); + ArrayList column = new ArrayList<>(); + column.add(1); + column.add(2); + column.add(null); + profile.trackColumn(column); + + ColumnProfileView view = profile.view(); + Assert.assertEquals(view.getSuccesses(), 3); + Assert.assertEquals(view.getFailures(), 0); + Assert.assertEquals(view.getMetrics().size(), 1); + Assert.assertEquals(view.getMetrics().get("ints").getClass(), IntegralMetric.class); + IntegralMetric metric = (IntegralMetric) view.getMetrics().get("ints"); + Assert.assertEquals((int) metric.getMaxComponent().getValue(), 2); + } } From 31445302c365993710eb9f9d50324e65949736bc Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Thu, 22 Sep 2022 10:32:25 -0700 Subject: [PATCH 25/71] skeleton --- .../core/src/main/java/com/whylogs/api/logger/BasicCache.java | 4 ++++ java/core/src/main/java/com/whylogs/api/logger/Logger.java | 4 ++++ .../src/main/java/com/whylogs/api/logger/TransientLogger.java | 4 ++++ .../com/whylogs/api/logger/resultSets/ProfileResultSet.java | 4 ++++ .../java/com/whylogs/api/logger/resultSets/ResultSet.java | 4 ++++ .../java/com/whylogs/api/logger/resultSets/ViewResultSet.java | 4 ++++ 6 files changed, 24 insertions(+) create mode 100644 java/core/src/main/java/com/whylogs/api/logger/BasicCache.java create mode 100644 java/core/src/main/java/com/whylogs/api/logger/Logger.java create mode 100644 java/core/src/main/java/com/whylogs/api/logger/TransientLogger.java create mode 100644 java/core/src/main/java/com/whylogs/api/logger/resultSets/ProfileResultSet.java create mode 100644 java/core/src/main/java/com/whylogs/api/logger/resultSets/ResultSet.java create mode 100644 java/core/src/main/java/com/whylogs/api/logger/resultSets/ViewResultSet.java diff --git a/java/core/src/main/java/com/whylogs/api/logger/BasicCache.java b/java/core/src/main/java/com/whylogs/api/logger/BasicCache.java new file mode 100644 index 0000000000..cdc84047fd --- /dev/null +++ b/java/core/src/main/java/com/whylogs/api/logger/BasicCache.java @@ -0,0 +1,4 @@ +package com.whylogs.api.logger; + +public class BasicCache { +} diff --git a/java/core/src/main/java/com/whylogs/api/logger/Logger.java b/java/core/src/main/java/com/whylogs/api/logger/Logger.java new file mode 100644 index 0000000000..4b4ec081c6 --- /dev/null +++ b/java/core/src/main/java/com/whylogs/api/logger/Logger.java @@ -0,0 +1,4 @@ +package com.whylogs.api.logger; + +public class Logger { +} diff --git a/java/core/src/main/java/com/whylogs/api/logger/TransientLogger.java b/java/core/src/main/java/com/whylogs/api/logger/TransientLogger.java new file mode 100644 index 0000000000..8ce9ea86ee --- /dev/null +++ b/java/core/src/main/java/com/whylogs/api/logger/TransientLogger.java @@ -0,0 +1,4 @@ +package com.whylogs.api.logger; + +public class TransientLogger { +} diff --git a/java/core/src/main/java/com/whylogs/api/logger/resultSets/ProfileResultSet.java b/java/core/src/main/java/com/whylogs/api/logger/resultSets/ProfileResultSet.java new file mode 100644 index 0000000000..ec22a19b42 --- /dev/null +++ b/java/core/src/main/java/com/whylogs/api/logger/resultSets/ProfileResultSet.java @@ -0,0 +1,4 @@ +package com.whylogs.api.logger.resultSets; + +public class ProfileResultSet { +} diff --git a/java/core/src/main/java/com/whylogs/api/logger/resultSets/ResultSet.java b/java/core/src/main/java/com/whylogs/api/logger/resultSets/ResultSet.java new file mode 100644 index 0000000000..25342c3f0f --- /dev/null +++ b/java/core/src/main/java/com/whylogs/api/logger/resultSets/ResultSet.java @@ -0,0 +1,4 @@ +package com.whylogs.api.logger.resultSets; + +public class ResultSet { +} diff --git a/java/core/src/main/java/com/whylogs/api/logger/resultSets/ViewResultSet.java b/java/core/src/main/java/com/whylogs/api/logger/resultSets/ViewResultSet.java new file mode 100644 index 0000000000..25ad956074 --- /dev/null +++ b/java/core/src/main/java/com/whylogs/api/logger/resultSets/ViewResultSet.java @@ -0,0 +1,4 @@ +package com.whylogs.api.logger.resultSets; + +public class ViewResultSet { +} From 169e904bf73027e03d6643f6e84e7a7289e92b6f Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Thu, 22 Sep 2022 11:55:47 -0700 Subject: [PATCH 26/71] Holding place - figuring out why the view isn't visible --- .../whylogs/api/logger/resultSets/ResultSet.java | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/java/core/src/main/java/com/whylogs/api/logger/resultSets/ResultSet.java b/java/core/src/main/java/com/whylogs/api/logger/resultSets/ResultSet.java index 25342c3f0f..c38741589d 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/resultSets/ResultSet.java +++ b/java/core/src/main/java/com/whylogs/api/logger/resultSets/ResultSet.java @@ -1,4 +1,18 @@ package com.whylogs.api.logger.resultSets; -public class ResultSet { +/** + A holder object for profiling results. + + A whylogs.log call can result in more than one profile. This wrapper class + simplifies the navigation among these profiles. + + Note that currently we only hold one profile but we're planning to add other + kinds of profiles such as segmented profiles here. +**/ +public abstract class ResultSet { + + public static ResultSet read(String multiProfileFile){ + + } + } From 4267f7c5210b6b048bb6840cf0684c2e8124cac8 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Thu, 22 Sep 2022 12:17:03 -0700 Subject: [PATCH 27/71] removes unused import and does spotless Java --- .../java/com/whylogs/core/ColumnProfile.java | 5 +- .../whylogs/core/views/ColumnProfileView.java | 4 +- .../core/views/DatasetProfileView.java | 4 +- .../com/whylogs/core/TestColumnProfile.java | 260 +++++++++--------- .../core/views/TestColumnProfileView.java | 214 +++++++------- .../core/views/TestDatasetProfileView.java | 103 +++---- .../whylogs/core/views/TestSummaryType.java | 10 +- .../core/views/TestWhylogsMagicUtility.java | 13 +- 8 files changed, 305 insertions(+), 308 deletions(-) diff --git a/java/core/src/main/java/com/whylogs/core/ColumnProfile.java b/java/core/src/main/java/com/whylogs/core/ColumnProfile.java index 4cf403322b..9e57a5a881 100644 --- a/java/core/src/main/java/com/whylogs/core/ColumnProfile.java +++ b/java/core/src/main/java/com/whylogs/core/ColumnProfile.java @@ -4,11 +4,10 @@ import com.whylogs.core.metrics.OperationResult; import com.whylogs.core.schemas.ColumnSchema; import com.whylogs.core.views.ColumnProfileView; -import lombok.Getter; -import lombok.ToString; - import java.util.ArrayList; import java.util.HashMap; +import lombok.Getter; +import lombok.ToString; @Getter @ToString diff --git a/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java b/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java index a1a7eebb85..d383177aad 100644 --- a/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java +++ b/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java @@ -24,7 +24,7 @@ public ColumnProfileView(HashMap metrics, int successes, int fai } public ColumnProfileView merge(ColumnProfileView otherView) { - if(otherView == null){ + if (otherView == null) { // TODO: log warning that otehrwas null and this returns original return this; } @@ -80,7 +80,7 @@ public ArrayList getMetricNames() { } public HashMap toSummaryDict( - Optional columnMetric, Optional config) throws UnsupportedError { + Optional columnMetric, Optional config) throws UnsupportedError { SummaryConfig summaryConfig = config.orElse(new SummaryConfig()); HashMap summary = new HashMap<>(); diff --git a/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java b/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java index d235a2c4e7..6a73fde75d 100644 --- a/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java +++ b/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java @@ -1,11 +1,9 @@ package com.whylogs.core.views; -import java.io.*; import java.util.*; import lombok.AllArgsConstructor; import lombok.Getter; import lombok.ToString; -import whylogs.core.message.*; // TODO: extend writable when we do Protobuf @AllArgsConstructor @@ -17,7 +15,7 @@ public class DatasetProfileView { private Date creationTimestamp; public DatasetProfileView merge(DatasetProfileView otherView) { - if(otherView == null) { + if (otherView == null) { return this; } diff --git a/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java b/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java index d635612b3e..abece1da16 100644 --- a/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java +++ b/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java @@ -1,146 +1,144 @@ package com.whylogs.core; import com.whylogs.core.metrics.IntegralMetric; -import com.whylogs.core.metrics.Metric; import com.whylogs.core.metrics.MetricConfig; import com.whylogs.core.metrics.components.MaxIntegralComponent; import com.whylogs.core.metrics.components.MinIntegralComponent; import com.whylogs.core.resolvers.StandardResolver; import com.whylogs.core.schemas.ColumnSchema; import com.whylogs.core.views.ColumnProfileView; -import org.testng.Assert; -import org.testng.annotations.Test; - import java.util.ArrayList; import java.util.HashMap; -import java.util.List; -import java.util.Optional; +import org.testng.Assert; +import org.testng.annotations.Test; @Test public class TestColumnProfile { - private String columnName = "testColumn"; - private int CACHE_SIZE = 2; - - private ColumnProfile getDefaultColumnProfile(){ - ColumnSchema standardSchema = new ColumnSchema(Integer.class, new MetricConfig(), new StandardResolver()); - ColumnProfile result = new ColumnProfile<>(columnName, standardSchema, CACHE_SIZE ); - result.addMetric(IntegralMetric.zero(new MetricConfig())); - return result; - } - - @Test - public void testColumnProfileInit(){ - ColumnProfile profile = getDefaultColumnProfile(); - Assert.assertEquals(profile.getName(), columnName); - Assert.assertEquals(profile.getSchema().getType(), IntegralMetric.class); - Assert.assertEquals(profile.getCachedSize(), CACHE_SIZE); - } - - @Test - public void testAddMetric(){ - ColumnProfile profile = getDefaultColumnProfile(); - profile.addMetric(IntegralMetric.zero(new MetricConfig())); - Assert.assertEquals(profile.getMetrics().size(), 1); - Assert.assertEquals(profile.getMetrics().get("ints").getClass(), IntegralMetric.class); - IntegralMetric metric = (IntegralMetric) profile.getMetrics().get("ints"); - Assert.assertEquals((int) metric.getMaxComponent().getValue(), Integer.MIN_VALUE); - - IntegralMetric metric2 = new IntegralMetric(new MaxIntegralComponent(22), new MinIntegralComponent(20)); - profile.addMetric(metric2); - Assert.assertEquals(profile.getMetrics().size(), 1); - IntegralMetric result = (IntegralMetric) profile.getMetrics().get("ints"); - Assert.assertEquals((int) result.getMaxComponent().getValue(), 22); - } - - @Test - public void testTrack(){ - ColumnProfile profile = getDefaultColumnProfile(); - Assert.assertEquals(profile.getCachedSize(), CACHE_SIZE); - - HashMap row = new HashMap<>(); - row.put(columnName, 5); - row.put("test2", 2); - - profile.track(row); - Assert.assertEquals(profile.getCachedValues().size(), 1); - Assert.assertEquals((int) profile.getCachedValues().get(0), 5); - - row.put(columnName, 2); - profile.track(row); - // With cache size of 2 this should have forced a flush - Assert.assertEquals(profile.getCachedValues().size(), 0); - Assert.assertEquals(profile.getSuccessCount(), 2); - } - - @Test - public void testTrackNull(){ - ColumnProfile profile = getDefaultColumnProfile(); - Assert.assertEquals(profile.getCachedSize(), CACHE_SIZE); - - HashMap row = new HashMap<>(); - row.put(columnName, 1); - profile.track(row); - - row.put(columnName, null); - profile.track(row); - - Assert.assertEquals(profile.getCachedValues().size(), 0); - Assert.assertEquals(profile.getSuccessCount(), 1); - Assert.assertEquals(profile.getFailureCount(), 0); - // There is a null count in the columnar update, but we don't store it in the profile - } - - // Because of the typing in Java, how do we trigger the failure? - - @Test - public void testFlush(){ - ColumnProfile profile = getDefaultColumnProfile(); - - HashMap row = new HashMap<>(); - row.put(columnName, 5); - - profile.track(row); - Assert.assertEquals(profile.getCachedValues().size(), 1); - - profile.flush(); - Assert.assertEquals(profile.getCachedValues().size(), 0); - Assert.assertEquals(profile.getSuccessCount(), 1); - } - - @Test - public void testTrackColumn(){ - ColumnProfile profile = getDefaultColumnProfile(); - ArrayList column = new ArrayList<>(); - column.add("1"); - - profile.trackColumn(column); - Assert.assertEquals(profile.getSuccessCount(), 0); - Assert.assertEquals(profile.getFailureCount(), 0); - - ArrayList column2 = new ArrayList<>(); - column2.add(1); - column2.add(2); - column2.add(null); - profile.trackColumn(column2); - Assert.assertEquals(profile.getSuccessCount(), 2); - Assert.assertEquals(profile.getFailureCount(), 0); - } - - @Test - public void testView(){ - ColumnProfile profile = getDefaultColumnProfile(); - ArrayList column = new ArrayList<>(); - column.add(1); - column.add(2); - column.add(null); - profile.trackColumn(column); - - ColumnProfileView view = profile.view(); - Assert.assertEquals(view.getSuccesses(), 3); - Assert.assertEquals(view.getFailures(), 0); - Assert.assertEquals(view.getMetrics().size(), 1); - Assert.assertEquals(view.getMetrics().get("ints").getClass(), IntegralMetric.class); - IntegralMetric metric = (IntegralMetric) view.getMetrics().get("ints"); - Assert.assertEquals((int) metric.getMaxComponent().getValue(), 2); - } + private String columnName = "testColumn"; + private int CACHE_SIZE = 2; + + private ColumnProfile getDefaultColumnProfile() { + ColumnSchema standardSchema = + new ColumnSchema(Integer.class, new MetricConfig(), new StandardResolver()); + ColumnProfile result = new ColumnProfile<>(columnName, standardSchema, CACHE_SIZE); + result.addMetric(IntegralMetric.zero(new MetricConfig())); + return result; + } + + @Test + public void testColumnProfileInit() { + ColumnProfile profile = getDefaultColumnProfile(); + Assert.assertEquals(profile.getName(), columnName); + Assert.assertEquals(profile.getSchema().getType(), IntegralMetric.class); + Assert.assertEquals(profile.getCachedSize(), CACHE_SIZE); + } + + @Test + public void testAddMetric() { + ColumnProfile profile = getDefaultColumnProfile(); + profile.addMetric(IntegralMetric.zero(new MetricConfig())); + Assert.assertEquals(profile.getMetrics().size(), 1); + Assert.assertEquals(profile.getMetrics().get("ints").getClass(), IntegralMetric.class); + IntegralMetric metric = (IntegralMetric) profile.getMetrics().get("ints"); + Assert.assertEquals((int) metric.getMaxComponent().getValue(), Integer.MIN_VALUE); + + IntegralMetric metric2 = + new IntegralMetric(new MaxIntegralComponent(22), new MinIntegralComponent(20)); + profile.addMetric(metric2); + Assert.assertEquals(profile.getMetrics().size(), 1); + IntegralMetric result = (IntegralMetric) profile.getMetrics().get("ints"); + Assert.assertEquals((int) result.getMaxComponent().getValue(), 22); + } + + @Test + public void testTrack() { + ColumnProfile profile = getDefaultColumnProfile(); + Assert.assertEquals(profile.getCachedSize(), CACHE_SIZE); + + HashMap row = new HashMap<>(); + row.put(columnName, 5); + row.put("test2", 2); + + profile.track(row); + Assert.assertEquals(profile.getCachedValues().size(), 1); + Assert.assertEquals((int) profile.getCachedValues().get(0), 5); + + row.put(columnName, 2); + profile.track(row); + // With cache size of 2 this should have forced a flush + Assert.assertEquals(profile.getCachedValues().size(), 0); + Assert.assertEquals(profile.getSuccessCount(), 2); + } + + @Test + public void testTrackNull() { + ColumnProfile profile = getDefaultColumnProfile(); + Assert.assertEquals(profile.getCachedSize(), CACHE_SIZE); + + HashMap row = new HashMap<>(); + row.put(columnName, 1); + profile.track(row); + + row.put(columnName, null); + profile.track(row); + + Assert.assertEquals(profile.getCachedValues().size(), 0); + Assert.assertEquals(profile.getSuccessCount(), 1); + Assert.assertEquals(profile.getFailureCount(), 0); + // There is a null count in the columnar update, but we don't store it in the profile + } + + // Because of the typing in Java, how do we trigger the failure? + + @Test + public void testFlush() { + ColumnProfile profile = getDefaultColumnProfile(); + + HashMap row = new HashMap<>(); + row.put(columnName, 5); + + profile.track(row); + Assert.assertEquals(profile.getCachedValues().size(), 1); + + profile.flush(); + Assert.assertEquals(profile.getCachedValues().size(), 0); + Assert.assertEquals(profile.getSuccessCount(), 1); + } + + @Test + public void testTrackColumn() { + ColumnProfile profile = getDefaultColumnProfile(); + ArrayList column = new ArrayList<>(); + column.add("1"); + + profile.trackColumn(column); + Assert.assertEquals(profile.getSuccessCount(), 0); + Assert.assertEquals(profile.getFailureCount(), 0); + + ArrayList column2 = new ArrayList<>(); + column2.add(1); + column2.add(2); + column2.add(null); + profile.trackColumn(column2); + Assert.assertEquals(profile.getSuccessCount(), 2); + Assert.assertEquals(profile.getFailureCount(), 0); + } + + @Test + public void testView() { + ColumnProfile profile = getDefaultColumnProfile(); + ArrayList column = new ArrayList<>(); + column.add(1); + column.add(2); + column.add(null); + profile.trackColumn(column); + + ColumnProfileView view = profile.view(); + Assert.assertEquals(view.getSuccesses(), 3); + Assert.assertEquals(view.getFailures(), 0); + Assert.assertEquals(view.getMetrics().size(), 1); + Assert.assertEquals(view.getMetrics().get("ints").getClass(), IntegralMetric.class); + IntegralMetric metric = (IntegralMetric) view.getMetrics().get("ints"); + Assert.assertEquals((int) metric.getMaxComponent().getValue(), 2); + } } diff --git a/java/core/src/test/java/com/whylogs/core/views/TestColumnProfileView.java b/java/core/src/test/java/com/whylogs/core/views/TestColumnProfileView.java index 6b52d558fc..bbd9077ead 100644 --- a/java/core/src/test/java/com/whylogs/core/views/TestColumnProfileView.java +++ b/java/core/src/test/java/com/whylogs/core/views/TestColumnProfileView.java @@ -6,118 +6,116 @@ import com.whylogs.core.metrics.Metric; import com.whylogs.core.metrics.MetricConfig; import com.whylogs.core.metrics.components.MetricComponent; +import java.util.*; import org.testng.Assert; -import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import java.util.*; - @Test public class TestColumnProfileView { - private ColumnProfileView getDefaultColumnProfile(){ - Metric integralMetric = IntegralMetric.zero(new MetricConfig()); - HashMap metrics = new HashMap<>(); - metrics.put(integralMetric.getNamespace(), integralMetric); - - return new ColumnProfileView(metrics); - } - - private ColumnProfileView getChangedSuccessFailProfile(int success, int fail){ - Metric integralMetric = IntegralMetric.zero(new MetricConfig()); - HashMap metrics = new HashMap<>(); - metrics.put(integralMetric.getNamespace(), integralMetric); - - return new ColumnProfileView(metrics, success, fail); - } - - @Test - public void testColumnProfileInit(){ - ColumnProfileView columnProfileView = getDefaultColumnProfile(); - - Assert.assertEquals(columnProfileView.getMetric("ints").get().getClass(), IntegralMetric.class); - Assert.assertEquals(columnProfileView.getFailures(), 0); - Assert.assertEquals(columnProfileView.getSuccesses(), 0); - - - columnProfileView = getChangedSuccessFailProfile(1, 2); - Assert.assertEquals(columnProfileView.getFailures(), 2); - Assert.assertEquals(columnProfileView.getSuccesses(), 1); - - columnProfileView = ColumnProfileView.zero(); - Assert.assertEquals(columnProfileView.getFailures(), 0); - Assert.assertEquals(columnProfileView.getSuccesses(), 0); - } - - @Test - public void testMerge(){ - ColumnProfileView columnProfileView = getDefaultColumnProfile(); - ColumnProfileView columnProfileView2 = getChangedSuccessFailProfile(1, 2); - - ColumnProfileView result = columnProfileView.merge(columnProfileView2); - - Assert.assertEquals(result.getMetric("ints").get().getClass(), IntegralMetric.class); - Assert.assertEquals(result.getFailures(), 2); - Assert.assertEquals(result.getSuccesses(), 1); - } - - @Test - public void testMergeWithNull(){ - ColumnProfileView columnProfileView = getDefaultColumnProfile(); - ColumnProfileView result = columnProfileView.merge(null); - - Assert.assertEquals(result.getMetric("ints").get().getClass(), IntegralMetric.class); - Assert.assertEquals(result.getFailures(), 0); - Assert.assertEquals(result.getSuccesses(), 0); - } - - @Test - public void testGetMetricComponentPaths(){ - ColumnProfileView columnProfileView = getDefaultColumnProfile(); - ArrayList paths = columnProfileView.getMetricComponentPaths(); - Assert.assertEquals(paths.size(), 2); - Assert.assertEquals(paths.get(0), "ints/MaxIntegralComponent"); - Assert.assertEquals(paths.get(1), "ints/MinIntegralComponent"); - } - - @Test - public void testGetMetricComponentPathsEmpty(){ - ColumnProfileView columnProfileView = ColumnProfileView.zero(); - ArrayList paths = columnProfileView.getMetricComponentPaths(); - Assert.assertEquals(paths.size(), 0); - } - - @Test - public void testGetMetricComponentPathsNull(){ - ColumnProfileView columnProfileView = getDefaultColumnProfile(); - columnProfileView = columnProfileView.merge(null); - ArrayList paths = columnProfileView.getMetricComponentPaths(); - Assert.assertEquals(paths.size(), 2); - Assert.assertEquals(paths.get(0), "ints/MaxIntegralComponent"); - Assert.assertEquals(paths.get(1), "ints/MinIntegralComponent"); - } - - @Test - public void testToSummaryDict() throws UnsupportedError { - ColumnProfileView columnProfileView = getDefaultColumnProfile(); - HashMap summary = columnProfileView.toSummaryDict(Optional.ofNullable("ints"), Optional.ofNullable(new SummaryConfig())); - Assert.assertEquals(summary.size(), 2); - Assert.assertEquals(summary.get("ints/min"), Integer.MAX_VALUE); - Assert.assertEquals(summary.get("ints/max"), Integer.MIN_VALUE); - - summary = columnProfileView.toSummaryDict(Optional.empty(), Optional.empty()); - Assert.assertEquals(summary.size(), 2); - Assert.assertEquals(summary.get("ints/min"), Integer.MAX_VALUE); - Assert.assertEquals(summary.get("ints/max"), Integer.MIN_VALUE); - } - - @Test - public void testGetComponents(){ - ColumnProfileView columnProfileView = getDefaultColumnProfile(); - Map components = columnProfileView.getComponents(); - Assert.assertEquals(components.size(), 2); - Assert.assertEquals(components.get("MinIntegralComponent").getValue(), Integer.MAX_VALUE); - Assert.assertEquals(components.get("MaxIntegralComponent").getValue(), Integer.MIN_VALUE); - } - + private ColumnProfileView getDefaultColumnProfile() { + Metric integralMetric = IntegralMetric.zero(new MetricConfig()); + HashMap metrics = new HashMap<>(); + metrics.put(integralMetric.getNamespace(), integralMetric); + + return new ColumnProfileView(metrics); + } + + private ColumnProfileView getChangedSuccessFailProfile(int success, int fail) { + Metric integralMetric = IntegralMetric.zero(new MetricConfig()); + HashMap metrics = new HashMap<>(); + metrics.put(integralMetric.getNamespace(), integralMetric); + + return new ColumnProfileView(metrics, success, fail); + } + + @Test + public void testColumnProfileInit() { + ColumnProfileView columnProfileView = getDefaultColumnProfile(); + + Assert.assertEquals(columnProfileView.getMetric("ints").get().getClass(), IntegralMetric.class); + Assert.assertEquals(columnProfileView.getFailures(), 0); + Assert.assertEquals(columnProfileView.getSuccesses(), 0); + + columnProfileView = getChangedSuccessFailProfile(1, 2); + Assert.assertEquals(columnProfileView.getFailures(), 2); + Assert.assertEquals(columnProfileView.getSuccesses(), 1); + + columnProfileView = ColumnProfileView.zero(); + Assert.assertEquals(columnProfileView.getFailures(), 0); + Assert.assertEquals(columnProfileView.getSuccesses(), 0); + } + + @Test + public void testMerge() { + ColumnProfileView columnProfileView = getDefaultColumnProfile(); + ColumnProfileView columnProfileView2 = getChangedSuccessFailProfile(1, 2); + + ColumnProfileView result = columnProfileView.merge(columnProfileView2); + + Assert.assertEquals(result.getMetric("ints").get().getClass(), IntegralMetric.class); + Assert.assertEquals(result.getFailures(), 2); + Assert.assertEquals(result.getSuccesses(), 1); + } + + @Test + public void testMergeWithNull() { + ColumnProfileView columnProfileView = getDefaultColumnProfile(); + ColumnProfileView result = columnProfileView.merge(null); + + Assert.assertEquals(result.getMetric("ints").get().getClass(), IntegralMetric.class); + Assert.assertEquals(result.getFailures(), 0); + Assert.assertEquals(result.getSuccesses(), 0); + } + + @Test + public void testGetMetricComponentPaths() { + ColumnProfileView columnProfileView = getDefaultColumnProfile(); + ArrayList paths = columnProfileView.getMetricComponentPaths(); + Assert.assertEquals(paths.size(), 2); + Assert.assertEquals(paths.get(0), "ints/MaxIntegralComponent"); + Assert.assertEquals(paths.get(1), "ints/MinIntegralComponent"); + } + + @Test + public void testGetMetricComponentPathsEmpty() { + ColumnProfileView columnProfileView = ColumnProfileView.zero(); + ArrayList paths = columnProfileView.getMetricComponentPaths(); + Assert.assertEquals(paths.size(), 0); + } + + @Test + public void testGetMetricComponentPathsNull() { + ColumnProfileView columnProfileView = getDefaultColumnProfile(); + columnProfileView = columnProfileView.merge(null); + ArrayList paths = columnProfileView.getMetricComponentPaths(); + Assert.assertEquals(paths.size(), 2); + Assert.assertEquals(paths.get(0), "ints/MaxIntegralComponent"); + Assert.assertEquals(paths.get(1), "ints/MinIntegralComponent"); + } + + @Test + public void testToSummaryDict() throws UnsupportedError { + ColumnProfileView columnProfileView = getDefaultColumnProfile(); + HashMap summary = + columnProfileView.toSummaryDict( + Optional.ofNullable("ints"), Optional.ofNullable(new SummaryConfig())); + Assert.assertEquals(summary.size(), 2); + Assert.assertEquals(summary.get("ints/min"), Integer.MAX_VALUE); + Assert.assertEquals(summary.get("ints/max"), Integer.MIN_VALUE); + + summary = columnProfileView.toSummaryDict(Optional.empty(), Optional.empty()); + Assert.assertEquals(summary.size(), 2); + Assert.assertEquals(summary.get("ints/min"), Integer.MAX_VALUE); + Assert.assertEquals(summary.get("ints/max"), Integer.MIN_VALUE); + } + + @Test + public void testGetComponents() { + ColumnProfileView columnProfileView = getDefaultColumnProfile(); + Map components = columnProfileView.getComponents(); + Assert.assertEquals(components.size(), 2); + Assert.assertEquals(components.get("MinIntegralComponent").getValue(), Integer.MAX_VALUE); + Assert.assertEquals(components.get("MaxIntegralComponent").getValue(), Integer.MIN_VALUE); + } } diff --git a/java/core/src/test/java/com/whylogs/core/views/TestDatasetProfileView.java b/java/core/src/test/java/com/whylogs/core/views/TestDatasetProfileView.java index 0c9e3c5382..1d10adc0a0 100644 --- a/java/core/src/test/java/com/whylogs/core/views/TestDatasetProfileView.java +++ b/java/core/src/test/java/com/whylogs/core/views/TestDatasetProfileView.java @@ -3,66 +3,69 @@ import com.whylogs.core.metrics.IntegralMetric; import com.whylogs.core.metrics.Metric; import com.whylogs.core.metrics.MetricConfig; -import org.testng.Assert; -import org.testng.annotations.Test; - import java.util.Date; import java.util.HashMap; +import org.testng.Assert; +import org.testng.annotations.Test; public class TestDatasetProfileView { - private DatasetProfileView getDefaultDatasetProfile(){ - HashMap columnProfileViews = new HashMap<>(); - HashMap testMetrics = new HashMap<>(); - testMetrics.put("ints", IntegralMetric.zero(new MetricConfig())); - columnProfileViews.put("test", new ColumnProfileView(testMetrics)); - return new DatasetProfileView(columnProfileViews, new Date(), new Date()); - } + private DatasetProfileView getDefaultDatasetProfile() { + HashMap columnProfileViews = new HashMap<>(); + HashMap testMetrics = new HashMap<>(); + testMetrics.put("ints", IntegralMetric.zero(new MetricConfig())); + columnProfileViews.put("test", new ColumnProfileView(testMetrics)); + return new DatasetProfileView(columnProfileViews, new Date(), new Date()); + } - @Test - public void testDatasetProfileViewInit(){ - DatasetProfileView view = new DatasetProfileView(new HashMap(), new Date(), new Date()); - Assert.assertEquals(view.getColumns().size(), 0); + @Test + public void testDatasetProfileViewInit() { + DatasetProfileView view = + new DatasetProfileView(new HashMap(), new Date(), new Date()); + Assert.assertEquals(view.getColumns().size(), 0); - view = getDefaultDatasetProfile(); - Assert.assertEquals(view.getColumns().size(), 1); - Assert.assertNotNull(view.getColumns().get("test").getMetric("ints")); - } + view = getDefaultDatasetProfile(); + Assert.assertEquals(view.getColumns().size(), 1); + Assert.assertNotNull(view.getColumns().get("test").getMetric("ints")); + } - @Test - public void testMerge(){ - DatasetProfileView view = getDefaultDatasetProfile(); - DatasetProfileView view2 = getDefaultDatasetProfile(); - DatasetProfileView result = view.merge(view2); - Assert.assertEquals(result.getColumns().size(), 1); - Assert.assertNotNull(result.getColumns().get("test").getMetric("ints")); - } + @Test + public void testMerge() { + DatasetProfileView view = getDefaultDatasetProfile(); + DatasetProfileView view2 = getDefaultDatasetProfile(); + DatasetProfileView result = view.merge(view2); + Assert.assertEquals(result.getColumns().size(), 1); + Assert.assertNotNull(result.getColumns().get("test").getMetric("ints")); + } - @Test - public void testMergeWithNull(){ - DatasetProfileView view = getDefaultDatasetProfile(); - DatasetProfileView result = view.merge(null); - Assert.assertEquals(result.getColumns().size(), 1); - Assert.assertNotNull(result.getColumns().get("test").getMetric("ints")); - } + @Test + public void testMergeWithNull() { + DatasetProfileView view = getDefaultDatasetProfile(); + DatasetProfileView result = view.merge(null); + Assert.assertEquals(result.getColumns().size(), 1); + Assert.assertNotNull(result.getColumns().get("test").getMetric("ints")); + } - @Test - public void testMergeWithEmpty(){ - DatasetProfileView view = getDefaultDatasetProfile(); - DatasetProfileView result = view.merge(new DatasetProfileView(new HashMap(), new Date(), new Date())); - Assert.assertEquals(result.getColumns().size(), 1); - Assert.assertNotNull(result.getColumns().get("test").getMetric("ints")); - } + @Test + public void testMergeWithEmpty() { + DatasetProfileView view = getDefaultDatasetProfile(); + DatasetProfileView result = + view.merge( + new DatasetProfileView( + new HashMap(), new Date(), new Date())); + Assert.assertEquals(result.getColumns().size(), 1); + Assert.assertNotNull(result.getColumns().get("test").getMetric("ints")); + } - @Test - public void testGetColumn(){ - DatasetProfileView view = getDefaultDatasetProfile(); - Assert.assertNotNull(view.getColumn("test")); - } + @Test + public void testGetColumn() { + DatasetProfileView view = getDefaultDatasetProfile(); + Assert.assertNotNull(view.getColumn("test")); + } - @Test - public void testGetColumns(){ - DatasetProfileView view = getDefaultDatasetProfile(); - Assert.assertNotNull(view.getColumns()); - } + @Test + public void testGetColumns() { + DatasetProfileView view = getDefaultDatasetProfile(); + Assert.assertNotNull(view.getColumns()); + } } diff --git a/java/core/src/test/java/com/whylogs/core/views/TestSummaryType.java b/java/core/src/test/java/com/whylogs/core/views/TestSummaryType.java index 12fa04fcc8..744c57498c 100644 --- a/java/core/src/test/java/com/whylogs/core/views/TestSummaryType.java +++ b/java/core/src/test/java/com/whylogs/core/views/TestSummaryType.java @@ -6,9 +6,9 @@ @Test public class TestSummaryType { - @Test - public void testSummaryType() { - Assert.assertEquals(SummaryType.COLUMN.label, "COLUMN"); - Assert.assertEquals(SummaryType.DATASET.label, "DATASET"); - } + @Test + public void testSummaryType() { + Assert.assertEquals(SummaryType.COLUMN.label, "COLUMN"); + Assert.assertEquals(SummaryType.DATASET.label, "DATASET"); + } } diff --git a/java/core/src/test/java/com/whylogs/core/views/TestWhylogsMagicUtility.java b/java/core/src/test/java/com/whylogs/core/views/TestWhylogsMagicUtility.java index 7052c85062..45287d0c36 100644 --- a/java/core/src/test/java/com/whylogs/core/views/TestWhylogsMagicUtility.java +++ b/java/core/src/test/java/com/whylogs/core/views/TestWhylogsMagicUtility.java @@ -6,10 +6,11 @@ @Test public class TestWhylogsMagicUtility { - @Test - public void testWhylogsMagicUtility() { - Assert.assertEquals(WhylogsMagicUtility.WHYLOGS_MAGIC_HEADER, "WHY1"); - Assert.assertEquals(WhylogsMagicUtility.WHYLOGS_MAGIC_HEADER_LENGTH, 4); - Assert.assertEquals(WhylogsMagicUtility.WHYLOGS_MAGIC_HEADER_BYTES, new byte[] {87, 72, 89, 49}); - } + @Test + public void testWhylogsMagicUtility() { + Assert.assertEquals(WhylogsMagicUtility.WHYLOGS_MAGIC_HEADER, "WHY1"); + Assert.assertEquals(WhylogsMagicUtility.WHYLOGS_MAGIC_HEADER_LENGTH, 4); + Assert.assertEquals( + WhylogsMagicUtility.WHYLOGS_MAGIC_HEADER_BYTES, new byte[] {87, 72, 89, 49}); + } } From 85c357558d2b9fd200653d9326cd650958c21d5c Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Thu, 22 Sep 2022 12:52:46 -0700 Subject: [PATCH 28/71] fixes a test --- java/core/src/test/java/com/whylogs/core/TestColumnProfile.java | 2 +- .../test/java/com/whylogs/core/schemas/TestColumnSchema.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java b/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java index abece1da16..b019d615b5 100644 --- a/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java +++ b/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java @@ -29,7 +29,7 @@ private ColumnProfile getDefaultColumnProfile() { public void testColumnProfileInit() { ColumnProfile profile = getDefaultColumnProfile(); Assert.assertEquals(profile.getName(), columnName); - Assert.assertEquals(profile.getSchema().getType(), IntegralMetric.class); + Assert.assertEquals(profile.getSchema().getType(), Integer.class); Assert.assertEquals(profile.getCachedSize(), CACHE_SIZE); } diff --git a/java/core/src/test/java/com/whylogs/core/schemas/TestColumnSchema.java b/java/core/src/test/java/com/whylogs/core/schemas/TestColumnSchema.java index 9654651a43..96f3ab1b67 100644 --- a/java/core/src/test/java/com/whylogs/core/schemas/TestColumnSchema.java +++ b/java/core/src/test/java/com/whylogs/core/schemas/TestColumnSchema.java @@ -17,8 +17,8 @@ public void test_column_schema() { new ColumnSchema(Integer.class, new MetricConfig(), new StandardResolver()); HashMap metrics = columnSchema.getMetrics(); - // TODO: I'm not sure I like this. Might want to rethink the Metric just a little Assert.assertEquals(metrics.get("ints").getClass(), IntegralMetric.class); + Assert.assertEquals(columnSchema.getType(), Integer.class); IntegralMetric ints = (IntegralMetric) metrics.get("ints"); Assert.assertEquals((int) ints.getMaxComponent().getValue(), Integer.MIN_VALUE); } From 808c5a32130a5f99632fe30169814574e2eaa9c4 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Thu, 22 Sep 2022 13:14:01 -0700 Subject: [PATCH 29/71] Unused import --- .../core/src/test/java/com/whylogs/core/TestColumnProfile.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java b/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java index d635612b3e..6dbce4dc1f 100644 --- a/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java +++ b/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java @@ -1,7 +1,6 @@ package com.whylogs.core; import com.whylogs.core.metrics.IntegralMetric; -import com.whylogs.core.metrics.Metric; import com.whylogs.core.metrics.MetricConfig; import com.whylogs.core.metrics.components.MaxIntegralComponent; import com.whylogs.core.metrics.components.MinIntegralComponent; @@ -13,8 +12,6 @@ import java.util.ArrayList; import java.util.HashMap; -import java.util.List; -import java.util.Optional; @Test public class TestColumnProfile { From b2d57161117e092057e487667ab3caa3570ea193 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Thu, 22 Sep 2022 14:03:56 -0700 Subject: [PATCH 30/71] Implements resultsSet and does spotless --- .../com/whylogs/api/logger/BasicCache.java | 3 +- .../java/com/whylogs/api/logger/Logger.java | 3 +- .../whylogs/api/logger/TransientLogger.java | 3 +- .../logger/resultSets/ProfileResultSet.java | 25 +- .../api/logger/resultSets/ResultSet.java | 39 ++- .../api/logger/resultSets/ViewResultSet.java | 30 +- .../java/com/whylogs/core/ColumnProfile.java | 5 +- .../java/com/whylogs/core/DatasetProfile.java | 5 + .../whylogs/core/views/ColumnProfileView.java | 4 +- .../core/views/DatasetProfileView.java | 4 +- .../api/resultsets/TestViewResultSet.java | 32 +++ .../com/whylogs/core/TestColumnProfile.java | 257 +++++++++--------- .../core/views/TestColumnProfileView.java | 214 ++++++++------- .../core/views/TestDatasetProfileView.java | 103 +++---- .../whylogs/core/views/TestSummaryType.java | 10 +- .../core/views/TestWhylogsMagicUtility.java | 13 +- 16 files changed, 429 insertions(+), 321 deletions(-) create mode 100644 java/core/src/test/java/com/whylogs/api/resultsets/TestViewResultSet.java diff --git a/java/core/src/main/java/com/whylogs/api/logger/BasicCache.java b/java/core/src/main/java/com/whylogs/api/logger/BasicCache.java index cdc84047fd..c3949032d1 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/BasicCache.java +++ b/java/core/src/main/java/com/whylogs/api/logger/BasicCache.java @@ -1,4 +1,3 @@ package com.whylogs.api.logger; -public class BasicCache { -} +public class BasicCache {} diff --git a/java/core/src/main/java/com/whylogs/api/logger/Logger.java b/java/core/src/main/java/com/whylogs/api/logger/Logger.java index 4b4ec081c6..b8ebb9e2c7 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/Logger.java +++ b/java/core/src/main/java/com/whylogs/api/logger/Logger.java @@ -1,4 +1,3 @@ package com.whylogs.api.logger; -public class Logger { -} +public class Logger {} diff --git a/java/core/src/main/java/com/whylogs/api/logger/TransientLogger.java b/java/core/src/main/java/com/whylogs/api/logger/TransientLogger.java index 8ce9ea86ee..a5986a5e19 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/TransientLogger.java +++ b/java/core/src/main/java/com/whylogs/api/logger/TransientLogger.java @@ -1,4 +1,3 @@ package com.whylogs.api.logger; -public class TransientLogger { -} +public class TransientLogger {} diff --git a/java/core/src/main/java/com/whylogs/api/logger/resultSets/ProfileResultSet.java b/java/core/src/main/java/com/whylogs/api/logger/resultSets/ProfileResultSet.java index ec22a19b42..ff5ab53a5a 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/resultSets/ProfileResultSet.java +++ b/java/core/src/main/java/com/whylogs/api/logger/resultSets/ProfileResultSet.java @@ -1,4 +1,27 @@ package com.whylogs.api.logger.resultSets; -public class ProfileResultSet { +import com.whylogs.core.DatasetProfile; +import com.whylogs.core.views.DatasetProfileView; +import java.util.Optional; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.NonNull; + +@EqualsAndHashCode(callSuper = true) +@Data +public class ProfileResultSet extends ResultSet { + @NonNull private DatasetProfile profile; + + public ProfileResultSet(DatasetProfile profile) { + super(); + this.profile = profile; + } + + public Optional profile() { + return Optional.of(this.profile); + } + + public Optional view() { + return Optional.of(this.profile.view()); + } } diff --git a/java/core/src/main/java/com/whylogs/api/logger/resultSets/ResultSet.java b/java/core/src/main/java/com/whylogs/api/logger/resultSets/ResultSet.java index c38741589d..10cb25011c 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/resultSets/ResultSet.java +++ b/java/core/src/main/java/com/whylogs/api/logger/resultSets/ResultSet.java @@ -1,18 +1,41 @@ package com.whylogs.api.logger.resultSets; +import com.whylogs.core.DatasetProfile; +import com.whylogs.core.errors.Error; +import com.whylogs.core.metrics.Metric; +import com.whylogs.core.views.DatasetProfileView; +import java.util.Optional; +import lombok.Data; +import lombok.NoArgsConstructor; + /** - A holder object for profiling results. + * A holder object for profiling results. + * + *

A whylogs.log call can result in more than one profile. This wrapper class simplifies the + * navigation among these profiles. + * + *

Note that currently we only hold one profile but we're planning to add other kinds of profiles + * such as segmented profiles here. + */ +@Data +@NoArgsConstructor +public abstract class ResultSet { - A whylogs.log call can result in more than one profile. This wrapper class - simplifies the navigation among these profiles. + // TODO: implement read and write when I make the reader and writer - Note that currently we only hold one profile but we're planning to add other - kinds of profiles such as segmented profiles here. -**/ -public abstract class ResultSet { + public abstract Optional view(); + + public abstract Optional profile(); - public static ResultSet read(String multiProfileFile){ + // TODO: Come back for ModelPerformanceMetrics + // Question: why is the python addMetrics when it only adds the one? + public void addMetric(String name, Metric metric) throws Error { + if (!this.profile().isPresent()) { + throw new Error( + "Cannot add " + name + " metric " + metric + " to a result set without a profile"); } + this.profile().get().addMetric(name, metric); + } } diff --git a/java/core/src/main/java/com/whylogs/api/logger/resultSets/ViewResultSet.java b/java/core/src/main/java/com/whylogs/api/logger/resultSets/ViewResultSet.java index 25ad956074..17d4aef7ce 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/resultSets/ViewResultSet.java +++ b/java/core/src/main/java/com/whylogs/api/logger/resultSets/ViewResultSet.java @@ -1,4 +1,32 @@ package com.whylogs.api.logger.resultSets; -public class ViewResultSet { +import com.whylogs.core.DatasetProfile; +import com.whylogs.core.views.DatasetProfileView; +import java.util.Optional; +import lombok.*; + +@EqualsAndHashCode(callSuper = true) +@Data +public class ViewResultSet extends ResultSet { + @NonNull private final DatasetProfileView view; + + public ViewResultSet(@NonNull DatasetProfileView view) { + super(); + this.view = view; + } + + public ViewResultSet(DatasetProfile profile) { + super(); + this.view = profile.view(); + } + + @Override + public Optional view() { + return Optional.of(this.view); + } + + @Override + public Optional profile() { + throw new Error("No profile available for a view result set"); + } } diff --git a/java/core/src/main/java/com/whylogs/core/ColumnProfile.java b/java/core/src/main/java/com/whylogs/core/ColumnProfile.java index 4cf403322b..9e57a5a881 100644 --- a/java/core/src/main/java/com/whylogs/core/ColumnProfile.java +++ b/java/core/src/main/java/com/whylogs/core/ColumnProfile.java @@ -4,11 +4,10 @@ import com.whylogs.core.metrics.OperationResult; import com.whylogs.core.schemas.ColumnSchema; import com.whylogs.core.views.ColumnProfileView; -import lombok.Getter; -import lombok.ToString; - import java.util.ArrayList; import java.util.HashMap; +import lombok.Getter; +import lombok.ToString; @Getter @ToString diff --git a/java/core/src/main/java/com/whylogs/core/DatasetProfile.java b/java/core/src/main/java/com/whylogs/core/DatasetProfile.java index e3701c0c8c..830cc04348 100644 --- a/java/core/src/main/java/com/whylogs/core/DatasetProfile.java +++ b/java/core/src/main/java/com/whylogs/core/DatasetProfile.java @@ -24,6 +24,7 @@ public class DatasetProfile { private HashMap> columns; private boolean isActive = false; private int trackCount = 0; + private HashMap metrics = new HashMap<>(); public DatasetProfile( Optional datasetSchema, @@ -44,6 +45,10 @@ public void addMetric(String colName, Metric metric) { this.columns.get(colName).addMetric(metric); } + public void addDatasetMetric(String name, Metric metric) { + this.metrics.put(name, metric); + } + /* TODO: I don't beleive we need this in Java? (with the T Object) public void track(T obj){ diff --git a/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java b/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java index a1a7eebb85..d383177aad 100644 --- a/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java +++ b/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java @@ -24,7 +24,7 @@ public ColumnProfileView(HashMap metrics, int successes, int fai } public ColumnProfileView merge(ColumnProfileView otherView) { - if(otherView == null){ + if (otherView == null) { // TODO: log warning that otehrwas null and this returns original return this; } @@ -80,7 +80,7 @@ public ArrayList getMetricNames() { } public HashMap toSummaryDict( - Optional columnMetric, Optional config) throws UnsupportedError { + Optional columnMetric, Optional config) throws UnsupportedError { SummaryConfig summaryConfig = config.orElse(new SummaryConfig()); HashMap summary = new HashMap<>(); diff --git a/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java b/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java index d235a2c4e7..6a73fde75d 100644 --- a/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java +++ b/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java @@ -1,11 +1,9 @@ package com.whylogs.core.views; -import java.io.*; import java.util.*; import lombok.AllArgsConstructor; import lombok.Getter; import lombok.ToString; -import whylogs.core.message.*; // TODO: extend writable when we do Protobuf @AllArgsConstructor @@ -17,7 +15,7 @@ public class DatasetProfileView { private Date creationTimestamp; public DatasetProfileView merge(DatasetProfileView otherView) { - if(otherView == null) { + if (otherView == null) { return this; } diff --git a/java/core/src/test/java/com/whylogs/api/resultsets/TestViewResultSet.java b/java/core/src/test/java/com/whylogs/api/resultsets/TestViewResultSet.java new file mode 100644 index 0000000000..bae4a89476 --- /dev/null +++ b/java/core/src/test/java/com/whylogs/api/resultsets/TestViewResultSet.java @@ -0,0 +1,32 @@ +package com.whylogs.api.resultsets; + +import com.whylogs.api.logger.resultSets.ViewResultSet; +import com.whylogs.core.metrics.IntegralMetric; +import com.whylogs.core.metrics.Metric; +import com.whylogs.core.metrics.MetricConfig; +import com.whylogs.core.views.ColumnProfileView; +import com.whylogs.core.views.DatasetProfileView; +import java.util.Date; +import java.util.HashMap; +import org.testng.Assert; +import org.testng.annotations.Test; + +@Test +public class TestViewResultSet { + + private DatasetProfileView getDefaultDatasetProfile() { + HashMap columnProfileViews = new HashMap<>(); + HashMap testMetrics = new HashMap<>(); + testMetrics.put("ints", IntegralMetric.zero(new MetricConfig())); + columnProfileViews.put("test", new ColumnProfileView(testMetrics)); + return new DatasetProfileView(columnProfileViews, new Date(), new Date()); + } + + @Test + public void testViewResultSet() { + DatasetProfileView view = getDefaultDatasetProfile(); + ViewResultSet viewResultSet = new ViewResultSet(view); + Assert.assertNotNull(viewResultSet); + Assert.assertEquals(viewResultSet.view().get().getColumns().size(), 1); + } +} diff --git a/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java b/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java index 6dbce4dc1f..abece1da16 100644 --- a/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java +++ b/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java @@ -7,137 +7,138 @@ import com.whylogs.core.resolvers.StandardResolver; import com.whylogs.core.schemas.ColumnSchema; import com.whylogs.core.views.ColumnProfileView; -import org.testng.Assert; -import org.testng.annotations.Test; - import java.util.ArrayList; import java.util.HashMap; +import org.testng.Assert; +import org.testng.annotations.Test; @Test public class TestColumnProfile { - private String columnName = "testColumn"; - private int CACHE_SIZE = 2; - - private ColumnProfile getDefaultColumnProfile(){ - ColumnSchema standardSchema = new ColumnSchema(Integer.class, new MetricConfig(), new StandardResolver()); - ColumnProfile result = new ColumnProfile<>(columnName, standardSchema, CACHE_SIZE ); - result.addMetric(IntegralMetric.zero(new MetricConfig())); - return result; - } - - @Test - public void testColumnProfileInit(){ - ColumnProfile profile = getDefaultColumnProfile(); - Assert.assertEquals(profile.getName(), columnName); - Assert.assertEquals(profile.getSchema().getType(), IntegralMetric.class); - Assert.assertEquals(profile.getCachedSize(), CACHE_SIZE); - } - - @Test - public void testAddMetric(){ - ColumnProfile profile = getDefaultColumnProfile(); - profile.addMetric(IntegralMetric.zero(new MetricConfig())); - Assert.assertEquals(profile.getMetrics().size(), 1); - Assert.assertEquals(profile.getMetrics().get("ints").getClass(), IntegralMetric.class); - IntegralMetric metric = (IntegralMetric) profile.getMetrics().get("ints"); - Assert.assertEquals((int) metric.getMaxComponent().getValue(), Integer.MIN_VALUE); - - IntegralMetric metric2 = new IntegralMetric(new MaxIntegralComponent(22), new MinIntegralComponent(20)); - profile.addMetric(metric2); - Assert.assertEquals(profile.getMetrics().size(), 1); - IntegralMetric result = (IntegralMetric) profile.getMetrics().get("ints"); - Assert.assertEquals((int) result.getMaxComponent().getValue(), 22); - } - - @Test - public void testTrack(){ - ColumnProfile profile = getDefaultColumnProfile(); - Assert.assertEquals(profile.getCachedSize(), CACHE_SIZE); - - HashMap row = new HashMap<>(); - row.put(columnName, 5); - row.put("test2", 2); - - profile.track(row); - Assert.assertEquals(profile.getCachedValues().size(), 1); - Assert.assertEquals((int) profile.getCachedValues().get(0), 5); - - row.put(columnName, 2); - profile.track(row); - // With cache size of 2 this should have forced a flush - Assert.assertEquals(profile.getCachedValues().size(), 0); - Assert.assertEquals(profile.getSuccessCount(), 2); - } - - @Test - public void testTrackNull(){ - ColumnProfile profile = getDefaultColumnProfile(); - Assert.assertEquals(profile.getCachedSize(), CACHE_SIZE); - - HashMap row = new HashMap<>(); - row.put(columnName, 1); - profile.track(row); - - row.put(columnName, null); - profile.track(row); - - Assert.assertEquals(profile.getCachedValues().size(), 0); - Assert.assertEquals(profile.getSuccessCount(), 1); - Assert.assertEquals(profile.getFailureCount(), 0); - // There is a null count in the columnar update, but we don't store it in the profile - } - - // Because of the typing in Java, how do we trigger the failure? - - @Test - public void testFlush(){ - ColumnProfile profile = getDefaultColumnProfile(); - - HashMap row = new HashMap<>(); - row.put(columnName, 5); - - profile.track(row); - Assert.assertEquals(profile.getCachedValues().size(), 1); - - profile.flush(); - Assert.assertEquals(profile.getCachedValues().size(), 0); - Assert.assertEquals(profile.getSuccessCount(), 1); - } - - @Test - public void testTrackColumn(){ - ColumnProfile profile = getDefaultColumnProfile(); - ArrayList column = new ArrayList<>(); - column.add("1"); - - profile.trackColumn(column); - Assert.assertEquals(profile.getSuccessCount(), 0); - Assert.assertEquals(profile.getFailureCount(), 0); - - ArrayList column2 = new ArrayList<>(); - column2.add(1); - column2.add(2); - column2.add(null); - profile.trackColumn(column2); - Assert.assertEquals(profile.getSuccessCount(), 2); - Assert.assertEquals(profile.getFailureCount(), 0); - } - - @Test - public void testView(){ - ColumnProfile profile = getDefaultColumnProfile(); - ArrayList column = new ArrayList<>(); - column.add(1); - column.add(2); - column.add(null); - profile.trackColumn(column); - - ColumnProfileView view = profile.view(); - Assert.assertEquals(view.getSuccesses(), 3); - Assert.assertEquals(view.getFailures(), 0); - Assert.assertEquals(view.getMetrics().size(), 1); - Assert.assertEquals(view.getMetrics().get("ints").getClass(), IntegralMetric.class); - IntegralMetric metric = (IntegralMetric) view.getMetrics().get("ints"); - Assert.assertEquals((int) metric.getMaxComponent().getValue(), 2); - } + private String columnName = "testColumn"; + private int CACHE_SIZE = 2; + + private ColumnProfile getDefaultColumnProfile() { + ColumnSchema standardSchema = + new ColumnSchema(Integer.class, new MetricConfig(), new StandardResolver()); + ColumnProfile result = new ColumnProfile<>(columnName, standardSchema, CACHE_SIZE); + result.addMetric(IntegralMetric.zero(new MetricConfig())); + return result; + } + + @Test + public void testColumnProfileInit() { + ColumnProfile profile = getDefaultColumnProfile(); + Assert.assertEquals(profile.getName(), columnName); + Assert.assertEquals(profile.getSchema().getType(), IntegralMetric.class); + Assert.assertEquals(profile.getCachedSize(), CACHE_SIZE); + } + + @Test + public void testAddMetric() { + ColumnProfile profile = getDefaultColumnProfile(); + profile.addMetric(IntegralMetric.zero(new MetricConfig())); + Assert.assertEquals(profile.getMetrics().size(), 1); + Assert.assertEquals(profile.getMetrics().get("ints").getClass(), IntegralMetric.class); + IntegralMetric metric = (IntegralMetric) profile.getMetrics().get("ints"); + Assert.assertEquals((int) metric.getMaxComponent().getValue(), Integer.MIN_VALUE); + + IntegralMetric metric2 = + new IntegralMetric(new MaxIntegralComponent(22), new MinIntegralComponent(20)); + profile.addMetric(metric2); + Assert.assertEquals(profile.getMetrics().size(), 1); + IntegralMetric result = (IntegralMetric) profile.getMetrics().get("ints"); + Assert.assertEquals((int) result.getMaxComponent().getValue(), 22); + } + + @Test + public void testTrack() { + ColumnProfile profile = getDefaultColumnProfile(); + Assert.assertEquals(profile.getCachedSize(), CACHE_SIZE); + + HashMap row = new HashMap<>(); + row.put(columnName, 5); + row.put("test2", 2); + + profile.track(row); + Assert.assertEquals(profile.getCachedValues().size(), 1); + Assert.assertEquals((int) profile.getCachedValues().get(0), 5); + + row.put(columnName, 2); + profile.track(row); + // With cache size of 2 this should have forced a flush + Assert.assertEquals(profile.getCachedValues().size(), 0); + Assert.assertEquals(profile.getSuccessCount(), 2); + } + + @Test + public void testTrackNull() { + ColumnProfile profile = getDefaultColumnProfile(); + Assert.assertEquals(profile.getCachedSize(), CACHE_SIZE); + + HashMap row = new HashMap<>(); + row.put(columnName, 1); + profile.track(row); + + row.put(columnName, null); + profile.track(row); + + Assert.assertEquals(profile.getCachedValues().size(), 0); + Assert.assertEquals(profile.getSuccessCount(), 1); + Assert.assertEquals(profile.getFailureCount(), 0); + // There is a null count in the columnar update, but we don't store it in the profile + } + + // Because of the typing in Java, how do we trigger the failure? + + @Test + public void testFlush() { + ColumnProfile profile = getDefaultColumnProfile(); + + HashMap row = new HashMap<>(); + row.put(columnName, 5); + + profile.track(row); + Assert.assertEquals(profile.getCachedValues().size(), 1); + + profile.flush(); + Assert.assertEquals(profile.getCachedValues().size(), 0); + Assert.assertEquals(profile.getSuccessCount(), 1); + } + + @Test + public void testTrackColumn() { + ColumnProfile profile = getDefaultColumnProfile(); + ArrayList column = new ArrayList<>(); + column.add("1"); + + profile.trackColumn(column); + Assert.assertEquals(profile.getSuccessCount(), 0); + Assert.assertEquals(profile.getFailureCount(), 0); + + ArrayList column2 = new ArrayList<>(); + column2.add(1); + column2.add(2); + column2.add(null); + profile.trackColumn(column2); + Assert.assertEquals(profile.getSuccessCount(), 2); + Assert.assertEquals(profile.getFailureCount(), 0); + } + + @Test + public void testView() { + ColumnProfile profile = getDefaultColumnProfile(); + ArrayList column = new ArrayList<>(); + column.add(1); + column.add(2); + column.add(null); + profile.trackColumn(column); + + ColumnProfileView view = profile.view(); + Assert.assertEquals(view.getSuccesses(), 3); + Assert.assertEquals(view.getFailures(), 0); + Assert.assertEquals(view.getMetrics().size(), 1); + Assert.assertEquals(view.getMetrics().get("ints").getClass(), IntegralMetric.class); + IntegralMetric metric = (IntegralMetric) view.getMetrics().get("ints"); + Assert.assertEquals((int) metric.getMaxComponent().getValue(), 2); + } } diff --git a/java/core/src/test/java/com/whylogs/core/views/TestColumnProfileView.java b/java/core/src/test/java/com/whylogs/core/views/TestColumnProfileView.java index 6b52d558fc..bbd9077ead 100644 --- a/java/core/src/test/java/com/whylogs/core/views/TestColumnProfileView.java +++ b/java/core/src/test/java/com/whylogs/core/views/TestColumnProfileView.java @@ -6,118 +6,116 @@ import com.whylogs.core.metrics.Metric; import com.whylogs.core.metrics.MetricConfig; import com.whylogs.core.metrics.components.MetricComponent; +import java.util.*; import org.testng.Assert; -import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import java.util.*; - @Test public class TestColumnProfileView { - private ColumnProfileView getDefaultColumnProfile(){ - Metric integralMetric = IntegralMetric.zero(new MetricConfig()); - HashMap metrics = new HashMap<>(); - metrics.put(integralMetric.getNamespace(), integralMetric); - - return new ColumnProfileView(metrics); - } - - private ColumnProfileView getChangedSuccessFailProfile(int success, int fail){ - Metric integralMetric = IntegralMetric.zero(new MetricConfig()); - HashMap metrics = new HashMap<>(); - metrics.put(integralMetric.getNamespace(), integralMetric); - - return new ColumnProfileView(metrics, success, fail); - } - - @Test - public void testColumnProfileInit(){ - ColumnProfileView columnProfileView = getDefaultColumnProfile(); - - Assert.assertEquals(columnProfileView.getMetric("ints").get().getClass(), IntegralMetric.class); - Assert.assertEquals(columnProfileView.getFailures(), 0); - Assert.assertEquals(columnProfileView.getSuccesses(), 0); - - - columnProfileView = getChangedSuccessFailProfile(1, 2); - Assert.assertEquals(columnProfileView.getFailures(), 2); - Assert.assertEquals(columnProfileView.getSuccesses(), 1); - - columnProfileView = ColumnProfileView.zero(); - Assert.assertEquals(columnProfileView.getFailures(), 0); - Assert.assertEquals(columnProfileView.getSuccesses(), 0); - } - - @Test - public void testMerge(){ - ColumnProfileView columnProfileView = getDefaultColumnProfile(); - ColumnProfileView columnProfileView2 = getChangedSuccessFailProfile(1, 2); - - ColumnProfileView result = columnProfileView.merge(columnProfileView2); - - Assert.assertEquals(result.getMetric("ints").get().getClass(), IntegralMetric.class); - Assert.assertEquals(result.getFailures(), 2); - Assert.assertEquals(result.getSuccesses(), 1); - } - - @Test - public void testMergeWithNull(){ - ColumnProfileView columnProfileView = getDefaultColumnProfile(); - ColumnProfileView result = columnProfileView.merge(null); - - Assert.assertEquals(result.getMetric("ints").get().getClass(), IntegralMetric.class); - Assert.assertEquals(result.getFailures(), 0); - Assert.assertEquals(result.getSuccesses(), 0); - } - - @Test - public void testGetMetricComponentPaths(){ - ColumnProfileView columnProfileView = getDefaultColumnProfile(); - ArrayList paths = columnProfileView.getMetricComponentPaths(); - Assert.assertEquals(paths.size(), 2); - Assert.assertEquals(paths.get(0), "ints/MaxIntegralComponent"); - Assert.assertEquals(paths.get(1), "ints/MinIntegralComponent"); - } - - @Test - public void testGetMetricComponentPathsEmpty(){ - ColumnProfileView columnProfileView = ColumnProfileView.zero(); - ArrayList paths = columnProfileView.getMetricComponentPaths(); - Assert.assertEquals(paths.size(), 0); - } - - @Test - public void testGetMetricComponentPathsNull(){ - ColumnProfileView columnProfileView = getDefaultColumnProfile(); - columnProfileView = columnProfileView.merge(null); - ArrayList paths = columnProfileView.getMetricComponentPaths(); - Assert.assertEquals(paths.size(), 2); - Assert.assertEquals(paths.get(0), "ints/MaxIntegralComponent"); - Assert.assertEquals(paths.get(1), "ints/MinIntegralComponent"); - } - - @Test - public void testToSummaryDict() throws UnsupportedError { - ColumnProfileView columnProfileView = getDefaultColumnProfile(); - HashMap summary = columnProfileView.toSummaryDict(Optional.ofNullable("ints"), Optional.ofNullable(new SummaryConfig())); - Assert.assertEquals(summary.size(), 2); - Assert.assertEquals(summary.get("ints/min"), Integer.MAX_VALUE); - Assert.assertEquals(summary.get("ints/max"), Integer.MIN_VALUE); - - summary = columnProfileView.toSummaryDict(Optional.empty(), Optional.empty()); - Assert.assertEquals(summary.size(), 2); - Assert.assertEquals(summary.get("ints/min"), Integer.MAX_VALUE); - Assert.assertEquals(summary.get("ints/max"), Integer.MIN_VALUE); - } - - @Test - public void testGetComponents(){ - ColumnProfileView columnProfileView = getDefaultColumnProfile(); - Map components = columnProfileView.getComponents(); - Assert.assertEquals(components.size(), 2); - Assert.assertEquals(components.get("MinIntegralComponent").getValue(), Integer.MAX_VALUE); - Assert.assertEquals(components.get("MaxIntegralComponent").getValue(), Integer.MIN_VALUE); - } - + private ColumnProfileView getDefaultColumnProfile() { + Metric integralMetric = IntegralMetric.zero(new MetricConfig()); + HashMap metrics = new HashMap<>(); + metrics.put(integralMetric.getNamespace(), integralMetric); + + return new ColumnProfileView(metrics); + } + + private ColumnProfileView getChangedSuccessFailProfile(int success, int fail) { + Metric integralMetric = IntegralMetric.zero(new MetricConfig()); + HashMap metrics = new HashMap<>(); + metrics.put(integralMetric.getNamespace(), integralMetric); + + return new ColumnProfileView(metrics, success, fail); + } + + @Test + public void testColumnProfileInit() { + ColumnProfileView columnProfileView = getDefaultColumnProfile(); + + Assert.assertEquals(columnProfileView.getMetric("ints").get().getClass(), IntegralMetric.class); + Assert.assertEquals(columnProfileView.getFailures(), 0); + Assert.assertEquals(columnProfileView.getSuccesses(), 0); + + columnProfileView = getChangedSuccessFailProfile(1, 2); + Assert.assertEquals(columnProfileView.getFailures(), 2); + Assert.assertEquals(columnProfileView.getSuccesses(), 1); + + columnProfileView = ColumnProfileView.zero(); + Assert.assertEquals(columnProfileView.getFailures(), 0); + Assert.assertEquals(columnProfileView.getSuccesses(), 0); + } + + @Test + public void testMerge() { + ColumnProfileView columnProfileView = getDefaultColumnProfile(); + ColumnProfileView columnProfileView2 = getChangedSuccessFailProfile(1, 2); + + ColumnProfileView result = columnProfileView.merge(columnProfileView2); + + Assert.assertEquals(result.getMetric("ints").get().getClass(), IntegralMetric.class); + Assert.assertEquals(result.getFailures(), 2); + Assert.assertEquals(result.getSuccesses(), 1); + } + + @Test + public void testMergeWithNull() { + ColumnProfileView columnProfileView = getDefaultColumnProfile(); + ColumnProfileView result = columnProfileView.merge(null); + + Assert.assertEquals(result.getMetric("ints").get().getClass(), IntegralMetric.class); + Assert.assertEquals(result.getFailures(), 0); + Assert.assertEquals(result.getSuccesses(), 0); + } + + @Test + public void testGetMetricComponentPaths() { + ColumnProfileView columnProfileView = getDefaultColumnProfile(); + ArrayList paths = columnProfileView.getMetricComponentPaths(); + Assert.assertEquals(paths.size(), 2); + Assert.assertEquals(paths.get(0), "ints/MaxIntegralComponent"); + Assert.assertEquals(paths.get(1), "ints/MinIntegralComponent"); + } + + @Test + public void testGetMetricComponentPathsEmpty() { + ColumnProfileView columnProfileView = ColumnProfileView.zero(); + ArrayList paths = columnProfileView.getMetricComponentPaths(); + Assert.assertEquals(paths.size(), 0); + } + + @Test + public void testGetMetricComponentPathsNull() { + ColumnProfileView columnProfileView = getDefaultColumnProfile(); + columnProfileView = columnProfileView.merge(null); + ArrayList paths = columnProfileView.getMetricComponentPaths(); + Assert.assertEquals(paths.size(), 2); + Assert.assertEquals(paths.get(0), "ints/MaxIntegralComponent"); + Assert.assertEquals(paths.get(1), "ints/MinIntegralComponent"); + } + + @Test + public void testToSummaryDict() throws UnsupportedError { + ColumnProfileView columnProfileView = getDefaultColumnProfile(); + HashMap summary = + columnProfileView.toSummaryDict( + Optional.ofNullable("ints"), Optional.ofNullable(new SummaryConfig())); + Assert.assertEquals(summary.size(), 2); + Assert.assertEquals(summary.get("ints/min"), Integer.MAX_VALUE); + Assert.assertEquals(summary.get("ints/max"), Integer.MIN_VALUE); + + summary = columnProfileView.toSummaryDict(Optional.empty(), Optional.empty()); + Assert.assertEquals(summary.size(), 2); + Assert.assertEquals(summary.get("ints/min"), Integer.MAX_VALUE); + Assert.assertEquals(summary.get("ints/max"), Integer.MIN_VALUE); + } + + @Test + public void testGetComponents() { + ColumnProfileView columnProfileView = getDefaultColumnProfile(); + Map components = columnProfileView.getComponents(); + Assert.assertEquals(components.size(), 2); + Assert.assertEquals(components.get("MinIntegralComponent").getValue(), Integer.MAX_VALUE); + Assert.assertEquals(components.get("MaxIntegralComponent").getValue(), Integer.MIN_VALUE); + } } diff --git a/java/core/src/test/java/com/whylogs/core/views/TestDatasetProfileView.java b/java/core/src/test/java/com/whylogs/core/views/TestDatasetProfileView.java index 0c9e3c5382..1d10adc0a0 100644 --- a/java/core/src/test/java/com/whylogs/core/views/TestDatasetProfileView.java +++ b/java/core/src/test/java/com/whylogs/core/views/TestDatasetProfileView.java @@ -3,66 +3,69 @@ import com.whylogs.core.metrics.IntegralMetric; import com.whylogs.core.metrics.Metric; import com.whylogs.core.metrics.MetricConfig; -import org.testng.Assert; -import org.testng.annotations.Test; - import java.util.Date; import java.util.HashMap; +import org.testng.Assert; +import org.testng.annotations.Test; public class TestDatasetProfileView { - private DatasetProfileView getDefaultDatasetProfile(){ - HashMap columnProfileViews = new HashMap<>(); - HashMap testMetrics = new HashMap<>(); - testMetrics.put("ints", IntegralMetric.zero(new MetricConfig())); - columnProfileViews.put("test", new ColumnProfileView(testMetrics)); - return new DatasetProfileView(columnProfileViews, new Date(), new Date()); - } + private DatasetProfileView getDefaultDatasetProfile() { + HashMap columnProfileViews = new HashMap<>(); + HashMap testMetrics = new HashMap<>(); + testMetrics.put("ints", IntegralMetric.zero(new MetricConfig())); + columnProfileViews.put("test", new ColumnProfileView(testMetrics)); + return new DatasetProfileView(columnProfileViews, new Date(), new Date()); + } - @Test - public void testDatasetProfileViewInit(){ - DatasetProfileView view = new DatasetProfileView(new HashMap(), new Date(), new Date()); - Assert.assertEquals(view.getColumns().size(), 0); + @Test + public void testDatasetProfileViewInit() { + DatasetProfileView view = + new DatasetProfileView(new HashMap(), new Date(), new Date()); + Assert.assertEquals(view.getColumns().size(), 0); - view = getDefaultDatasetProfile(); - Assert.assertEquals(view.getColumns().size(), 1); - Assert.assertNotNull(view.getColumns().get("test").getMetric("ints")); - } + view = getDefaultDatasetProfile(); + Assert.assertEquals(view.getColumns().size(), 1); + Assert.assertNotNull(view.getColumns().get("test").getMetric("ints")); + } - @Test - public void testMerge(){ - DatasetProfileView view = getDefaultDatasetProfile(); - DatasetProfileView view2 = getDefaultDatasetProfile(); - DatasetProfileView result = view.merge(view2); - Assert.assertEquals(result.getColumns().size(), 1); - Assert.assertNotNull(result.getColumns().get("test").getMetric("ints")); - } + @Test + public void testMerge() { + DatasetProfileView view = getDefaultDatasetProfile(); + DatasetProfileView view2 = getDefaultDatasetProfile(); + DatasetProfileView result = view.merge(view2); + Assert.assertEquals(result.getColumns().size(), 1); + Assert.assertNotNull(result.getColumns().get("test").getMetric("ints")); + } - @Test - public void testMergeWithNull(){ - DatasetProfileView view = getDefaultDatasetProfile(); - DatasetProfileView result = view.merge(null); - Assert.assertEquals(result.getColumns().size(), 1); - Assert.assertNotNull(result.getColumns().get("test").getMetric("ints")); - } + @Test + public void testMergeWithNull() { + DatasetProfileView view = getDefaultDatasetProfile(); + DatasetProfileView result = view.merge(null); + Assert.assertEquals(result.getColumns().size(), 1); + Assert.assertNotNull(result.getColumns().get("test").getMetric("ints")); + } - @Test - public void testMergeWithEmpty(){ - DatasetProfileView view = getDefaultDatasetProfile(); - DatasetProfileView result = view.merge(new DatasetProfileView(new HashMap(), new Date(), new Date())); - Assert.assertEquals(result.getColumns().size(), 1); - Assert.assertNotNull(result.getColumns().get("test").getMetric("ints")); - } + @Test + public void testMergeWithEmpty() { + DatasetProfileView view = getDefaultDatasetProfile(); + DatasetProfileView result = + view.merge( + new DatasetProfileView( + new HashMap(), new Date(), new Date())); + Assert.assertEquals(result.getColumns().size(), 1); + Assert.assertNotNull(result.getColumns().get("test").getMetric("ints")); + } - @Test - public void testGetColumn(){ - DatasetProfileView view = getDefaultDatasetProfile(); - Assert.assertNotNull(view.getColumn("test")); - } + @Test + public void testGetColumn() { + DatasetProfileView view = getDefaultDatasetProfile(); + Assert.assertNotNull(view.getColumn("test")); + } - @Test - public void testGetColumns(){ - DatasetProfileView view = getDefaultDatasetProfile(); - Assert.assertNotNull(view.getColumns()); - } + @Test + public void testGetColumns() { + DatasetProfileView view = getDefaultDatasetProfile(); + Assert.assertNotNull(view.getColumns()); + } } diff --git a/java/core/src/test/java/com/whylogs/core/views/TestSummaryType.java b/java/core/src/test/java/com/whylogs/core/views/TestSummaryType.java index 12fa04fcc8..744c57498c 100644 --- a/java/core/src/test/java/com/whylogs/core/views/TestSummaryType.java +++ b/java/core/src/test/java/com/whylogs/core/views/TestSummaryType.java @@ -6,9 +6,9 @@ @Test public class TestSummaryType { - @Test - public void testSummaryType() { - Assert.assertEquals(SummaryType.COLUMN.label, "COLUMN"); - Assert.assertEquals(SummaryType.DATASET.label, "DATASET"); - } + @Test + public void testSummaryType() { + Assert.assertEquals(SummaryType.COLUMN.label, "COLUMN"); + Assert.assertEquals(SummaryType.DATASET.label, "DATASET"); + } } diff --git a/java/core/src/test/java/com/whylogs/core/views/TestWhylogsMagicUtility.java b/java/core/src/test/java/com/whylogs/core/views/TestWhylogsMagicUtility.java index 7052c85062..45287d0c36 100644 --- a/java/core/src/test/java/com/whylogs/core/views/TestWhylogsMagicUtility.java +++ b/java/core/src/test/java/com/whylogs/core/views/TestWhylogsMagicUtility.java @@ -6,10 +6,11 @@ @Test public class TestWhylogsMagicUtility { - @Test - public void testWhylogsMagicUtility() { - Assert.assertEquals(WhylogsMagicUtility.WHYLOGS_MAGIC_HEADER, "WHY1"); - Assert.assertEquals(WhylogsMagicUtility.WHYLOGS_MAGIC_HEADER_LENGTH, 4); - Assert.assertEquals(WhylogsMagicUtility.WHYLOGS_MAGIC_HEADER_BYTES, new byte[] {87, 72, 89, 49}); - } + @Test + public void testWhylogsMagicUtility() { + Assert.assertEquals(WhylogsMagicUtility.WHYLOGS_MAGIC_HEADER, "WHY1"); + Assert.assertEquals(WhylogsMagicUtility.WHYLOGS_MAGIC_HEADER_LENGTH, 4); + Assert.assertEquals( + WhylogsMagicUtility.WHYLOGS_MAGIC_HEADER_BYTES, new byte[] {87, 72, 89, 49}); + } } From 5989892b7d9f4423c59dd323fae2515280d166c1 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Fri, 23 Sep 2022 12:39:06 -0700 Subject: [PATCH 31/71] Testing result set and cleaning up --- .../logger/resultSets/ProfileResultSet.java | 2 +- .../java/com/whylogs/core/DatasetProfile.java | 4 +- .../whylogs/core/schemas/DatasetSchema.java | 17 +++--- .../api/resultsets/TestProfileResultSet.java | 60 +++++++++++++++++++ .../core/schemas/TestDatasetSchema.java | 2 +- 5 files changed, 74 insertions(+), 11 deletions(-) create mode 100644 java/core/src/test/java/com/whylogs/api/resultsets/TestProfileResultSet.java diff --git a/java/core/src/main/java/com/whylogs/api/logger/resultSets/ProfileResultSet.java b/java/core/src/main/java/com/whylogs/api/logger/resultSets/ProfileResultSet.java index ff5ab53a5a..e720ff9f06 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/resultSets/ProfileResultSet.java +++ b/java/core/src/main/java/com/whylogs/api/logger/resultSets/ProfileResultSet.java @@ -10,7 +10,7 @@ @EqualsAndHashCode(callSuper = true) @Data public class ProfileResultSet extends ResultSet { - @NonNull private DatasetProfile profile; + @NonNull private final DatasetProfile profile; public ProfileResultSet(DatasetProfile profile) { super(); diff --git a/java/core/src/main/java/com/whylogs/core/DatasetProfile.java b/java/core/src/main/java/com/whylogs/core/DatasetProfile.java index 830cc04348..4a297e2d6d 100644 --- a/java/core/src/main/java/com/whylogs/core/DatasetProfile.java +++ b/java/core/src/main/java/com/whylogs/core/DatasetProfile.java @@ -105,9 +105,9 @@ public void setDatasetTimestamp(ZonedDateTime datasetTimestamp) { private void initializeNewColumns(Set colNames) { for (String column : colNames) { - ColumnSchema columnSchema = this.schema.columns.get(column); + ColumnSchema columnSchema = this.schema.getColumns().get(column); if (columnSchema != null) { - this.columns.put(column, new ColumnProfile(column, columnSchema, this.schema.cache_size)); + this.columns.put(column, new ColumnProfile(column, columnSchema, this.schema.getCacheSize())); } // TODO: log warning 'Encountered a column without schema: %s", col' in an else } diff --git a/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java b/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java index 06a514762b..1d6ea0d0e5 100644 --- a/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java +++ b/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java @@ -6,17 +6,20 @@ import java.util.HashMap; import java.util.Optional; import java.util.Set; + +import lombok.AllArgsConstructor; import lombok.Data; @Data +@AllArgsConstructor public class DatasetSchema { private HashMap types = new HashMap<>(); private final int LARGE_CACHE_SIZE_LIMIT = 1024 * 100; - public HashMap columns; - public MetricConfig defaultConfig; - public Resolver resolver; - public int cache_size = 1024; - public boolean schema_based_automerge = false; + private HashMap columns; + private MetricConfig defaultConfig; + private Resolver resolver; + private int cacheSize = 1024; + private boolean schema_based_automerge = false; public DatasetSchema() { this.columns = new HashMap<>(); @@ -26,12 +29,12 @@ public DatasetSchema() { public DatasetSchema(int cache_size, boolean schema_based_automerge) { this.columns = new HashMap<>(); this.defaultConfig = new MetricConfig(); - this.cache_size = cache_size; + this.cacheSize = cache_size; this.schema_based_automerge = schema_based_automerge; if (cache_size < 0) { // TODO: log warning - this.cache_size = 0; + this.cacheSize = 0; } if (cache_size > LARGE_CACHE_SIZE_LIMIT) { diff --git a/java/core/src/test/java/com/whylogs/api/resultsets/TestProfileResultSet.java b/java/core/src/test/java/com/whylogs/api/resultsets/TestProfileResultSet.java new file mode 100644 index 0000000000..07747e1fdb --- /dev/null +++ b/java/core/src/test/java/com/whylogs/api/resultsets/TestProfileResultSet.java @@ -0,0 +1,60 @@ +package com.whylogs.api.resultsets; + +import com.whylogs.api.logger.resultSets.ProfileResultSet; +import com.whylogs.core.DatasetProfile; + +import com.whylogs.core.errors.Error; +import com.whylogs.core.metrics.IntegralMetric; +import com.whylogs.core.metrics.MetricConfig; +import com.whylogs.core.schemas.DatasetSchema; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.util.HashMap; +import java.util.InputMismatchException; +import java.util.Optional; + + +@Test +public class TestProfileResultSet { + + private ProfileResultSet defaultResultSet(){ + HashMap data = new HashMap<>(); + data.put("test", 1); + data.put("test2", "2"); + DatasetSchema datasetSchema = new DatasetSchema(); + datasetSchema.resolve(data); + + DatasetProfile datasetProfile = new DatasetProfile(Optional.of(datasetSchema), Optional.empty(), Optional.empty()); + return new ProfileResultSet(datasetProfile); + } + + @Test + public void testProfileResultSet() { + ProfileResultSet profileResultSet = defaultResultSet(); + Assert.assertNotNull(profileResultSet); + Assert.assertEquals(profileResultSet.profile().get().getSchema().getColNames().size(), 2); + Assert.assertEquals(profileResultSet.view().get().getColumns().size(), 2); + // TODO: BUG HERE IN DATASET SCHEMA Assert.assertEquals(profileResultSet.view().get().getColumns().get("test").getComponents(), 1); + + + // Test expected error on unknown column name + try { + profileResultSet.addMetric("newTest", IntegralMetric.zero(new MetricConfig())); + } catch (Error error) { + Assert.fail("Error adding metric: " + error.getMessage()); + } catch (InputMismatchException e){ + // expected + } + + // + try { + profileResultSet.addMetric("test", IntegralMetric.zero(new MetricConfig())); + } catch (Error error) { + Assert.fail("Error adding metric: " + error.getMessage()); + } + + Assert.assertEquals(profileResultSet.view().get().getColumns().size(), 2); + Assert.assertEquals(profileResultSet.view().get().getColumns().get("test").getComponents().get("MaxIntegralComponent").getValue(), Integer.MIN_VALUE); + } +} diff --git a/java/core/src/test/java/com/whylogs/core/schemas/TestDatasetSchema.java b/java/core/src/test/java/com/whylogs/core/schemas/TestDatasetSchema.java index 3f9970a6ae..3cd2c9831d 100644 --- a/java/core/src/test/java/com/whylogs/core/schemas/TestDatasetSchema.java +++ b/java/core/src/test/java/com/whylogs/core/schemas/TestDatasetSchema.java @@ -10,7 +10,7 @@ public class TestDatasetSchema { @Test public void test_dataset_schema() { DatasetSchema datasetSchema = new DatasetSchema(); - Assert.assertEquals(datasetSchema.getCache_size(), 1024); + Assert.assertEquals(datasetSchema.getCacheSize(), 1024); HashMap data = new HashMap<>(); data.put("test", 1); From fbdc45f0a735e95958e8608cfb0eccd6f489332d Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Fri, 23 Sep 2022 13:08:35 -0700 Subject: [PATCH 32/71] Removes un needed method --- .../main/java/com/whylogs/core/DatasetProfile.java | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/java/core/src/main/java/com/whylogs/core/DatasetProfile.java b/java/core/src/main/java/com/whylogs/core/DatasetProfile.java index e3701c0c8c..c8df49cae3 100644 --- a/java/core/src/main/java/com/whylogs/core/DatasetProfile.java +++ b/java/core/src/main/java/com/whylogs/core/DatasetProfile.java @@ -44,18 +44,6 @@ public void addMetric(String colName, Metric metric) { this.columns.get(colName).addMetric(metric); } - /* - TODO: I don't beleive we need this in Java? (with the T Object) - public void track(T obj){ - try{ - this.isActive = true; - this.trackCount += 1; - this.doTrack(obj); - } finally { - this.isActive = false; - } - }*/ - public void track(HashMap row) { try { this.isActive = true; From 9c725bf4b9de41e9bc1e1084c75dc4344992111c Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Fri, 23 Sep 2022 13:14:13 -0700 Subject: [PATCH 33/71] rename variables timestampe to timestamp --- .../src/main/java/com/whylogs/core/DatasetProfile.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/java/core/src/main/java/com/whylogs/core/DatasetProfile.java b/java/core/src/main/java/com/whylogs/core/DatasetProfile.java index c8df49cae3..2f1d13f73f 100644 --- a/java/core/src/main/java/com/whylogs/core/DatasetProfile.java +++ b/java/core/src/main/java/com/whylogs/core/DatasetProfile.java @@ -27,11 +27,11 @@ public class DatasetProfile { public DatasetProfile( Optional datasetSchema, - Optional datasetaTimestampe, - Optional creationTimestampe) { + Optional datasetTimestamp, + Optional creationTimestamp) { this.schema = datasetSchema.orElse(new DatasetSchema()); - this.datasetTimestamp = datasetaTimestampe.orElse(new Date()); - this.creationTimestamp = creationTimestampe.orElse(new Date()); + this.datasetTimestamp = datasetTimestamp.orElse(new Date()); + this.creationTimestamp = creationTimestamp.orElse(new Date()); this.columns = new HashMap<>(); this.initializeNewColumns(schema.getColNames()); From d823006d0d537c50151fe55fb4ed278ef954a9b1 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Fri, 23 Sep 2022 13:21:44 -0700 Subject: [PATCH 34/71] refactoring name from cachedSize to cacheSize --- .../src/main/java/com/whylogs/core/ColumnProfile.java | 8 ++++---- .../src/test/java/com/whylogs/core/TestColumnProfile.java | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/java/core/src/main/java/com/whylogs/core/ColumnProfile.java b/java/core/src/main/java/com/whylogs/core/ColumnProfile.java index 9e57a5a881..52896c0e91 100644 --- a/java/core/src/main/java/com/whylogs/core/ColumnProfile.java +++ b/java/core/src/main/java/com/whylogs/core/ColumnProfile.java @@ -15,7 +15,7 @@ public class ColumnProfile { // Required private String name; private ColumnSchema schema; - private int cachedSize; + private int cacheSize; // Has Defaults private HashMap metrics; @@ -25,10 +25,10 @@ public class ColumnProfile { private ArrayList cachedValues; - public ColumnProfile(String name, ColumnSchema schema, int cachedSize) { + public ColumnProfile(String name, ColumnSchema schema, int cacheSize) { this.name = name; this.schema = schema; - this.cachedSize = cachedSize; // TODO: add logger for size of cache on column + this.cacheSize = cacheSize; // TODO: add logger for size of cache on column // Defaulted this.metrics = new HashMap<>(); @@ -51,7 +51,7 @@ public void track(HashMap row) { T value = this.projector.apply(row); this.cachedValues.add(value); - if (this.cachedValues.size() >= this.cachedSize) { + if (this.cachedValues.size() >= this.cacheSize) { this.flush(); } } diff --git a/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java b/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java index b019d615b5..bd511b47f3 100644 --- a/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java +++ b/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java @@ -30,7 +30,7 @@ public void testColumnProfileInit() { ColumnProfile profile = getDefaultColumnProfile(); Assert.assertEquals(profile.getName(), columnName); Assert.assertEquals(profile.getSchema().getType(), Integer.class); - Assert.assertEquals(profile.getCachedSize(), CACHE_SIZE); + Assert.assertEquals(profile.getCacheSize(), CACHE_SIZE); } @Test @@ -53,7 +53,7 @@ public void testAddMetric() { @Test public void testTrack() { ColumnProfile profile = getDefaultColumnProfile(); - Assert.assertEquals(profile.getCachedSize(), CACHE_SIZE); + Assert.assertEquals(profile.getCacheSize(), CACHE_SIZE); HashMap row = new HashMap<>(); row.put(columnName, 5); @@ -73,7 +73,7 @@ public void testTrack() { @Test public void testTrackNull() { ColumnProfile profile = getDefaultColumnProfile(); - Assert.assertEquals(profile.getCachedSize(), CACHE_SIZE); + Assert.assertEquals(profile.getCacheSize(), CACHE_SIZE); HashMap row = new HashMap<>(); row.put(columnName, 1); From e7daf7749233d4ad67c11005442cd6d189712e10 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Fri, 23 Sep 2022 13:37:52 -0700 Subject: [PATCH 35/71] Add copy to max components --- .../java/com/whylogs/core/metrics/IntegralMetric.java | 4 ++-- .../core/metrics/components/IntegralComponent.java | 5 +++++ .../core/metrics/components/MaxIntegralComponent.java | 5 +++++ .../whylogs/core/metrics/components/MetricComponent.java | 6 +++++- .../core/metrics/components/MinIntegralComponent.java | 5 +++++ .../core/metrics/components/TestIntegralComponent.java | 9 ++++++++- .../core/metrics/components/TestMinMaxComponents.java | 8 ++++++++ 7 files changed, 38 insertions(+), 4 deletions(-) diff --git a/java/core/src/main/java/com/whylogs/core/metrics/IntegralMetric.java b/java/core/src/main/java/com/whylogs/core/metrics/IntegralMetric.java index edd35cbe96..9fe7c6a823 100644 --- a/java/core/src/main/java/com/whylogs/core/metrics/IntegralMetric.java +++ b/java/core/src/main/java/com/whylogs/core/metrics/IntegralMetric.java @@ -27,8 +27,8 @@ public IntegralMetric() { public IntegralMetric(MaxIntegralComponent maxComponent, MinIntegralComponent minComponent) { this(); - this.maxComponent = maxComponent; - this.minComponent = minComponent; + this.maxComponent = maxComponent.copy(); + this.minComponent = minComponent.copy(); } private void setMax(int max) { diff --git a/java/core/src/main/java/com/whylogs/core/metrics/components/IntegralComponent.java b/java/core/src/main/java/com/whylogs/core/metrics/components/IntegralComponent.java index af04cbaa39..6c85471997 100644 --- a/java/core/src/main/java/com/whylogs/core/metrics/components/IntegralComponent.java +++ b/java/core/src/main/java/com/whylogs/core/metrics/components/IntegralComponent.java @@ -18,4 +18,9 @@ public IntegralComponent(Integer value) { public String getTypeName() { return this.getClass().getSimpleName(); } + + @Override + public MetricComponent copy(){ + return super.copy(); + } } diff --git a/java/core/src/main/java/com/whylogs/core/metrics/components/MaxIntegralComponent.java b/java/core/src/main/java/com/whylogs/core/metrics/components/MaxIntegralComponent.java index ff7c79b036..fe3760a77b 100644 --- a/java/core/src/main/java/com/whylogs/core/metrics/components/MaxIntegralComponent.java +++ b/java/core/src/main/java/com/whylogs/core/metrics/components/MaxIntegralComponent.java @@ -30,6 +30,11 @@ public static MaxIntegralComponent max(Collection list) { return new MaxIntegralComponent(max); } + @Override + public MaxIntegralComponent copy(){ + return new MaxIntegralComponent(this.getValue()); + } + @Override public String getTypeName() { return this.getClass().getSimpleName(); diff --git a/java/core/src/main/java/com/whylogs/core/metrics/components/MetricComponent.java b/java/core/src/main/java/com/whylogs/core/metrics/components/MetricComponent.java index 8cc9c03c6c..df4de6cac9 100644 --- a/java/core/src/main/java/com/whylogs/core/metrics/components/MetricComponent.java +++ b/java/core/src/main/java/com/whylogs/core/metrics/components/MetricComponent.java @@ -44,6 +44,10 @@ public String getTypeName() { return this.getClass().getSimpleName(); } + public M copy(){ + return (M) new MetricComponent(value); + } + public MetricComponent merge(MetricComponent other) { // TODO this is where we will use the aggregators throw new NotImplementedException(); @@ -52,7 +56,7 @@ public MetricComponent merge(MetricComponent other) { // TODO to_protobuf // TODO from_protobuf // TODO: add a from_protobuf iwht registries passed in - public static T from_protobuf(MetricComponentMessage message) { + public static M from_protobuf(MetricComponentMessage message) { // TODO: check that it's a MetricComponent dataclass throw new NotImplementedException(); } diff --git a/java/core/src/main/java/com/whylogs/core/metrics/components/MinIntegralComponent.java b/java/core/src/main/java/com/whylogs/core/metrics/components/MinIntegralComponent.java index ca8f8c9799..b6db31021b 100644 --- a/java/core/src/main/java/com/whylogs/core/metrics/components/MinIntegralComponent.java +++ b/java/core/src/main/java/com/whylogs/core/metrics/components/MinIntegralComponent.java @@ -27,6 +27,11 @@ public int getTypeId() { return TYPE_ID; } + @Override + public MinIntegralComponent copy(){ + return new MinIntegralComponent(this.getValue()); + } + public static MinIntegralComponent min(Collection list) { int min = Integer.MAX_VALUE; for (Integer i : list) { diff --git a/java/core/src/test/java/com/whylogs/core/metrics/components/TestIntegralComponent.java b/java/core/src/test/java/com/whylogs/core/metrics/components/TestIntegralComponent.java index bcb77698b3..0246795b26 100644 --- a/java/core/src/test/java/com/whylogs/core/metrics/components/TestIntegralComponent.java +++ b/java/core/src/test/java/com/whylogs/core/metrics/components/TestIntegralComponent.java @@ -7,7 +7,7 @@ public class TestIntegralComponent { @Test - public void test_integral() { + public void testIntegral() { IntegralComponent component = new IntegralComponent(1); Assert.assertEquals((int) component.getValue(), 1); @@ -16,4 +16,11 @@ public void test_integral() { Assert.assertEquals(component.getTypeId(), 0); } + + @Test + public void testCopy(){ + IntegralComponent component = new IntegralComponent(1); + Assert.assertEquals((int) component.getValue(), 1); + Assert.assertEquals((int) component.copy().getValue(), 1); + } } diff --git a/java/core/src/test/java/com/whylogs/core/metrics/components/TestMinMaxComponents.java b/java/core/src/test/java/com/whylogs/core/metrics/components/TestMinMaxComponents.java index 68310958f8..6ed6e0afe5 100644 --- a/java/core/src/test/java/com/whylogs/core/metrics/components/TestMinMaxComponents.java +++ b/java/core/src/test/java/com/whylogs/core/metrics/components/TestMinMaxComponents.java @@ -73,6 +73,10 @@ public void testMin(ArrayList input, int expected) { Assert.assertEquals((int) min.getValue(), expected); Assert.assertEquals(min.getTypeId(), MIN_TYPE); + + MinIntegralComponent min2 = min.copy(); + Assert.assertEquals(min2.getTypeId(), MIN_TYPE); + Assert.assertEquals((int) min2.getValue(), expected); } @Test(dataProvider = "max-data-provider") @@ -94,5 +98,9 @@ public void testMax(ArrayList input, int expected) { Assert.assertEquals((int) max.getValue(), expected); Assert.assertEquals(max.getTypeId(), MAX_TYPE); + + MaxIntegralComponent max2 = max.copy(); + Assert.assertEquals(max2.getTypeId(), MAX_TYPE); + Assert.assertEquals((int) max2.getValue(), expected); } } From acf7a781e4d00a403b1f7b8e602dafb51c6c8d4f Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Fri, 23 Sep 2022 13:39:47 -0700 Subject: [PATCH 36/71] Add no metric config zero for IntegralMetric --- .../src/main/java/com/whylogs/core/metrics/IntegralMetric.java | 1 + 1 file changed, 1 insertion(+) diff --git a/java/core/src/main/java/com/whylogs/core/metrics/IntegralMetric.java b/java/core/src/main/java/com/whylogs/core/metrics/IntegralMetric.java index 9fe7c6a823..7f437d145e 100644 --- a/java/core/src/main/java/com/whylogs/core/metrics/IntegralMetric.java +++ b/java/core/src/main/java/com/whylogs/core/metrics/IntegralMetric.java @@ -74,6 +74,7 @@ public HashMap getComponents() { public static IntegralMetric zero(MetricConfig config) { return new IntegralMetric(); } + public static IntegralMetric zero() {return new IntegralMetric();} @Override public HashMap toSummaryDict() { From d7ee6474a12fc045563663020a8e25e1c1c18c9d Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Fri, 23 Sep 2022 13:48:49 -0700 Subject: [PATCH 37/71] change init to do a shallow copy --- .../src/main/java/com/whylogs/core/views/ColumnProfileView.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java b/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java index d383177aad..281d5b838d 100644 --- a/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java +++ b/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java @@ -18,7 +18,7 @@ public ColumnProfileView(HashMap metrics) { } public ColumnProfileView(HashMap metrics, int successes, int failures) { - this.metrics = metrics; + this.metrics.putAll(metrics); this.successes = successes; this.failures = failures; } From 3a02a33bae7d56dad733461c40520a6698b488ea Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Fri, 23 Sep 2022 13:53:32 -0700 Subject: [PATCH 38/71] Update tests --- java/core/src/test/java/com/whylogs/core/TestColumnProfile.java | 1 - .../core/src/test/java/com/whylogs/core/metrics/TestMetric.java | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java b/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java index bd511b47f3..126003ce36 100644 --- a/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java +++ b/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java @@ -36,7 +36,6 @@ public void testColumnProfileInit() { @Test public void testAddMetric() { ColumnProfile profile = getDefaultColumnProfile(); - profile.addMetric(IntegralMetric.zero(new MetricConfig())); Assert.assertEquals(profile.getMetrics().size(), 1); Assert.assertEquals(profile.getMetrics().get("ints").getClass(), IntegralMetric.class); IntegralMetric metric = (IntegralMetric) profile.getMetrics().get("ints"); diff --git a/java/core/src/test/java/com/whylogs/core/metrics/TestMetric.java b/java/core/src/test/java/com/whylogs/core/metrics/TestMetric.java index afc72942b7..569b2c5235 100644 --- a/java/core/src/test/java/com/whylogs/core/metrics/TestMetric.java +++ b/java/core/src/test/java/com/whylogs/core/metrics/TestMetric.java @@ -17,5 +17,7 @@ public void testMetrics() { Assert.assertTrue(metric instanceof IntegralMetric); metric.merge(new IntegralMetric()); } + + Assert.assertEquals(metrics.size(), 2); } } From c0d2dc7a9ba1970d6f51422ab102133f80ed0313 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Fri, 23 Sep 2022 14:04:06 -0700 Subject: [PATCH 39/71] Moves all views returns to be unmodifiable --- .../whylogs/core/views/ColumnProfileView.java | 20 ++++++++++--------- .../core/views/DatasetProfileView.java | 6 +++--- .../core/views/TestColumnProfileView.java | 8 ++++---- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java b/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java index 281d5b838d..9ec64864af 100644 --- a/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java +++ b/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java @@ -18,7 +18,11 @@ public ColumnProfileView(HashMap metrics) { } public ColumnProfileView(HashMap metrics, int successes, int failures) { - this.metrics.putAll(metrics); + if(metrics == null) { + this.metrics = new HashMap<>(); + } else { + this.metrics.putAll(metrics); + } this.successes = successes; this.failures = failures; } @@ -62,7 +66,7 @@ public static ColumnProfileView zero() { } // TODO: metric needs a getComponentPath - public ArrayList getMetricComponentPaths() { + public List getMetricComponentPaths() { ArrayList paths = new ArrayList<>(); for (String metricName : this.getMetricNames()) { Optional metric = this.getMetric(metricName); @@ -72,7 +76,7 @@ public ArrayList getMetricComponentPaths() { } } } - return paths; + return Collections.unmodifiableList(paths); } public ArrayList getMetricNames() { @@ -101,7 +105,7 @@ public HashMap toSummaryDict( return summary; } - private HashMap getMetricSummaryHelper( + private Map getMetricSummaryHelper( SummaryConfig summaryConfig, Optional maybeMetric) { HashMap result = new HashMap<>(); Metric metric; @@ -113,17 +117,15 @@ private HashMap getMetricSummaryHelper( result.put(fullName, metricSummary.get(componentName)); } } - return result; + return Collections.unmodifiableMap(result); } public Map getComponents() { HashMap result = new HashMap<>(); for (String metricName : this.getMetricNames()) { Optional metric = this.getMetric(metricName); - if (metric.isPresent()) { - result.putAll(metric.get().getComponents()); - } + metric.ifPresent(value -> result.putAll(value.getComponents())); } - return result; + return Collections.unmodifiableMap(result); } } diff --git a/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java b/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java index 6a73fde75d..c99fda703a 100644 --- a/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java +++ b/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java @@ -45,15 +45,15 @@ public Optional getColumn(String columnName) { return Optional.ofNullable(this.columns.get(columnName)); } - public HashMap getColumns(Optional> colNames) { + public Map getColumns(Optional> colNames) { if (colNames.isPresent()) { HashMap result = new HashMap<>(); for (String colName : colNames.get()) { result.put(colName, this.columns.get(colName)); } - return result; + return Collections.unmodifiableMap(result); } else { - return this.columns; + return Collections.unmodifiableMap(this.columns); } } } diff --git a/java/core/src/test/java/com/whylogs/core/views/TestColumnProfileView.java b/java/core/src/test/java/com/whylogs/core/views/TestColumnProfileView.java index bbd9077ead..993a96c2f5 100644 --- a/java/core/src/test/java/com/whylogs/core/views/TestColumnProfileView.java +++ b/java/core/src/test/java/com/whylogs/core/views/TestColumnProfileView.java @@ -71,7 +71,7 @@ public void testMergeWithNull() { @Test public void testGetMetricComponentPaths() { ColumnProfileView columnProfileView = getDefaultColumnProfile(); - ArrayList paths = columnProfileView.getMetricComponentPaths(); + List paths = columnProfileView.getMetricComponentPaths(); Assert.assertEquals(paths.size(), 2); Assert.assertEquals(paths.get(0), "ints/MaxIntegralComponent"); Assert.assertEquals(paths.get(1), "ints/MinIntegralComponent"); @@ -80,7 +80,7 @@ public void testGetMetricComponentPaths() { @Test public void testGetMetricComponentPathsEmpty() { ColumnProfileView columnProfileView = ColumnProfileView.zero(); - ArrayList paths = columnProfileView.getMetricComponentPaths(); + List paths = columnProfileView.getMetricComponentPaths(); Assert.assertEquals(paths.size(), 0); } @@ -88,7 +88,7 @@ public void testGetMetricComponentPathsEmpty() { public void testGetMetricComponentPathsNull() { ColumnProfileView columnProfileView = getDefaultColumnProfile(); columnProfileView = columnProfileView.merge(null); - ArrayList paths = columnProfileView.getMetricComponentPaths(); + List paths = columnProfileView.getMetricComponentPaths(); Assert.assertEquals(paths.size(), 2); Assert.assertEquals(paths.get(0), "ints/MaxIntegralComponent"); Assert.assertEquals(paths.get(1), "ints/MinIntegralComponent"); @@ -99,7 +99,7 @@ public void testToSummaryDict() throws UnsupportedError { ColumnProfileView columnProfileView = getDefaultColumnProfile(); HashMap summary = columnProfileView.toSummaryDict( - Optional.ofNullable("ints"), Optional.ofNullable(new SummaryConfig())); + Optional.of("ints"), Optional.of(new SummaryConfig())); Assert.assertEquals(summary.size(), 2); Assert.assertEquals(summary.get("ints/min"), Integer.MAX_VALUE); Assert.assertEquals(summary.get("ints/max"), Integer.MIN_VALUE); From 62f62b20b1536847ef077b8af2726555d0461767 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Fri, 23 Sep 2022 14:18:59 -0700 Subject: [PATCH 40/71] fix init bug in ColumnProfile and java spotless --- .../main/java/com/whylogs/core/metrics/IntegralMetric.java | 5 ++++- .../whylogs/core/metrics/components/IntegralComponent.java | 4 ++-- .../core/metrics/components/MaxIntegralComponent.java | 2 +- .../whylogs/core/metrics/components/MetricComponent.java | 4 ++-- .../core/metrics/components/MinIntegralComponent.java | 2 +- .../java/com/whylogs/core/views/ColumnProfileView.java | 7 +++---- .../core/metrics/components/TestIntegralComponent.java | 2 +- .../java/com/whylogs/core/views/TestColumnProfileView.java | 3 +-- 8 files changed, 15 insertions(+), 14 deletions(-) diff --git a/java/core/src/main/java/com/whylogs/core/metrics/IntegralMetric.java b/java/core/src/main/java/com/whylogs/core/metrics/IntegralMetric.java index 7f437d145e..2deaa12fe5 100644 --- a/java/core/src/main/java/com/whylogs/core/metrics/IntegralMetric.java +++ b/java/core/src/main/java/com/whylogs/core/metrics/IntegralMetric.java @@ -74,7 +74,10 @@ public HashMap getComponents() { public static IntegralMetric zero(MetricConfig config) { return new IntegralMetric(); } - public static IntegralMetric zero() {return new IntegralMetric();} + + public static IntegralMetric zero() { + return new IntegralMetric(); + } @Override public HashMap toSummaryDict() { diff --git a/java/core/src/main/java/com/whylogs/core/metrics/components/IntegralComponent.java b/java/core/src/main/java/com/whylogs/core/metrics/components/IntegralComponent.java index 6c85471997..c5e8c3b219 100644 --- a/java/core/src/main/java/com/whylogs/core/metrics/components/IntegralComponent.java +++ b/java/core/src/main/java/com/whylogs/core/metrics/components/IntegralComponent.java @@ -20,7 +20,7 @@ public String getTypeName() { } @Override - public MetricComponent copy(){ - return super.copy(); + public IntegralComponent copy() { + return new IntegralComponent(this.getValue()); } } diff --git a/java/core/src/main/java/com/whylogs/core/metrics/components/MaxIntegralComponent.java b/java/core/src/main/java/com/whylogs/core/metrics/components/MaxIntegralComponent.java index fe3760a77b..f6a03eff59 100644 --- a/java/core/src/main/java/com/whylogs/core/metrics/components/MaxIntegralComponent.java +++ b/java/core/src/main/java/com/whylogs/core/metrics/components/MaxIntegralComponent.java @@ -31,7 +31,7 @@ public static MaxIntegralComponent max(Collection list) { } @Override - public MaxIntegralComponent copy(){ + public MaxIntegralComponent copy() { return new MaxIntegralComponent(this.getValue()); } diff --git a/java/core/src/main/java/com/whylogs/core/metrics/components/MetricComponent.java b/java/core/src/main/java/com/whylogs/core/metrics/components/MetricComponent.java index df4de6cac9..232e2c0c99 100644 --- a/java/core/src/main/java/com/whylogs/core/metrics/components/MetricComponent.java +++ b/java/core/src/main/java/com/whylogs/core/metrics/components/MetricComponent.java @@ -44,8 +44,8 @@ public String getTypeName() { return this.getClass().getSimpleName(); } - public M copy(){ - return (M) new MetricComponent(value); + public MetricComponent copy() { + return new MetricComponent(value); } public MetricComponent merge(MetricComponent other) { diff --git a/java/core/src/main/java/com/whylogs/core/metrics/components/MinIntegralComponent.java b/java/core/src/main/java/com/whylogs/core/metrics/components/MinIntegralComponent.java index b6db31021b..805c5ac52d 100644 --- a/java/core/src/main/java/com/whylogs/core/metrics/components/MinIntegralComponent.java +++ b/java/core/src/main/java/com/whylogs/core/metrics/components/MinIntegralComponent.java @@ -28,7 +28,7 @@ public int getTypeId() { } @Override - public MinIntegralComponent copy(){ + public MinIntegralComponent copy() { return new MinIntegralComponent(this.getValue()); } diff --git a/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java b/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java index 9ec64864af..13f91f928e 100644 --- a/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java +++ b/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java @@ -18,10 +18,9 @@ public ColumnProfileView(HashMap metrics) { } public ColumnProfileView(HashMap metrics, int successes, int failures) { - if(metrics == null) { - this.metrics = new HashMap<>(); - } else { - this.metrics.putAll(metrics); + this.metrics = new HashMap<>(); + if (metrics != null) { + this.metrics = metrics; } this.successes = successes; this.failures = failures; diff --git a/java/core/src/test/java/com/whylogs/core/metrics/components/TestIntegralComponent.java b/java/core/src/test/java/com/whylogs/core/metrics/components/TestIntegralComponent.java index 0246795b26..67a68145e6 100644 --- a/java/core/src/test/java/com/whylogs/core/metrics/components/TestIntegralComponent.java +++ b/java/core/src/test/java/com/whylogs/core/metrics/components/TestIntegralComponent.java @@ -18,7 +18,7 @@ public void testIntegral() { } @Test - public void testCopy(){ + public void testCopy() { IntegralComponent component = new IntegralComponent(1); Assert.assertEquals((int) component.getValue(), 1); Assert.assertEquals((int) component.copy().getValue(), 1); diff --git a/java/core/src/test/java/com/whylogs/core/views/TestColumnProfileView.java b/java/core/src/test/java/com/whylogs/core/views/TestColumnProfileView.java index 993a96c2f5..78cc8ab8f0 100644 --- a/java/core/src/test/java/com/whylogs/core/views/TestColumnProfileView.java +++ b/java/core/src/test/java/com/whylogs/core/views/TestColumnProfileView.java @@ -98,8 +98,7 @@ public void testGetMetricComponentPathsNull() { public void testToSummaryDict() throws UnsupportedError { ColumnProfileView columnProfileView = getDefaultColumnProfile(); HashMap summary = - columnProfileView.toSummaryDict( - Optional.of("ints"), Optional.of(new SummaryConfig())); + columnProfileView.toSummaryDict(Optional.of("ints"), Optional.of(new SummaryConfig())); Assert.assertEquals(summary.size(), 2); Assert.assertEquals(summary.get("ints/min"), Integer.MAX_VALUE); Assert.assertEquals(summary.get("ints/max"), Integer.MIN_VALUE); From 41db21c5aca629298df1eabfe378a80c1062fa90 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Fri, 23 Sep 2022 14:33:12 -0700 Subject: [PATCH 41/71] Moving row from Map to Map this was due to a misunderstanding on that parameter --- .../main/java/com/whylogs/core/ColumnProfile.java | 15 +++++++++------ .../java/com/whylogs/core/DatasetProfile.java | 6 +++--- .../com/whylogs/core/SingleFieldProjector.java | 10 +++++++--- .../java/com/whylogs/core/TestColumnProfile.java | 10 +++++----- 4 files changed, 24 insertions(+), 17 deletions(-) diff --git a/java/core/src/main/java/com/whylogs/core/ColumnProfile.java b/java/core/src/main/java/com/whylogs/core/ColumnProfile.java index 52896c0e91..23e3aee387 100644 --- a/java/core/src/main/java/com/whylogs/core/ColumnProfile.java +++ b/java/core/src/main/java/com/whylogs/core/ColumnProfile.java @@ -6,20 +6,23 @@ import com.whylogs.core.views.ColumnProfileView; import java.util.ArrayList; import java.util.HashMap; + +import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.ToString; @Getter @ToString +@EqualsAndHashCode public class ColumnProfile { // Required - private String name; - private ColumnSchema schema; - private int cacheSize; + private final String name; + private final ColumnSchema schema; + private final int cacheSize; // Has Defaults private HashMap metrics; - private SingleFieldProjector projector; + private final SingleFieldProjector projector; private int successCount; private int failureCount; @@ -32,7 +35,7 @@ public ColumnProfile(String name, ColumnSchema schema, int cacheSize) { // Defaulted this.metrics = new HashMap<>(); - this.projector = new SingleFieldProjector(name); + this.projector = new SingleFieldProjector(name); this.successCount = 0; this.failureCount = 0; this.cachedValues = new ArrayList<>(); @@ -47,7 +50,7 @@ public void addMetric(Metric metric) { } // TODO: this only gets one not every part of the row. Should projector actually do it multiple? - public void track(HashMap row) { + public void track(HashMap row) { T value = this.projector.apply(row); this.cachedValues.add(value); diff --git a/java/core/src/main/java/com/whylogs/core/DatasetProfile.java b/java/core/src/main/java/com/whylogs/core/DatasetProfile.java index 2f1d13f73f..26717fbb27 100644 --- a/java/core/src/main/java/com/whylogs/core/DatasetProfile.java +++ b/java/core/src/main/java/com/whylogs/core/DatasetProfile.java @@ -44,7 +44,7 @@ public void addMetric(String colName, Metric metric) { this.columns.get(colName).addMetric(metric); } - public void track(HashMap row) { + public void track(HashMap row) { try { this.isActive = true; this.trackCount += 1; @@ -54,7 +54,7 @@ public void track(HashMap row) { } } - private void doTrack(HashMap row) { + private void doTrack(HashMap row) { boolean dirty = this.schema.resolve(row); if (dirty) { Set schemaColumnNames = this.schema.getColNames(); @@ -68,7 +68,7 @@ private void doTrack(HashMap row) { } for (String col : row.keySet()) { - ArrayList values = new ArrayList<>(); + ArrayList values = new ArrayList<>(); values.add(row.get(col)); this.columns.get(col).trackColumn(values); } diff --git a/java/core/src/main/java/com/whylogs/core/SingleFieldProjector.java b/java/core/src/main/java/com/whylogs/core/SingleFieldProjector.java index 34c2d0f08a..10e0f38fbf 100644 --- a/java/core/src/main/java/com/whylogs/core/SingleFieldProjector.java +++ b/java/core/src/main/java/com/whylogs/core/SingleFieldProjector.java @@ -6,10 +6,14 @@ @RequiredArgsConstructor @Getter -public class SingleFieldProjector { +public class SingleFieldProjector { private final String columnName; - public T apply(HashMap row) { - return row.get(columnName); + public T apply(HashMap row) { + if(!row.containsKey(this.columnName)) { + throw new IllegalArgumentException("Column " + this.columnName + " not found in row"); + } + + return (T) row.get(this.columnName); } } diff --git a/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java b/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java index 126003ce36..1bab1af024 100644 --- a/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java +++ b/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java @@ -14,8 +14,8 @@ @Test public class TestColumnProfile { - private String columnName = "testColumn"; - private int CACHE_SIZE = 2; + private final String columnName = "testColumn"; + private final int CACHE_SIZE = 2; private ColumnProfile getDefaultColumnProfile() { ColumnSchema standardSchema = @@ -54,7 +54,7 @@ public void testTrack() { ColumnProfile profile = getDefaultColumnProfile(); Assert.assertEquals(profile.getCacheSize(), CACHE_SIZE); - HashMap row = new HashMap<>(); + HashMap row = new HashMap<>(); row.put(columnName, 5); row.put("test2", 2); @@ -74,7 +74,7 @@ public void testTrackNull() { ColumnProfile profile = getDefaultColumnProfile(); Assert.assertEquals(profile.getCacheSize(), CACHE_SIZE); - HashMap row = new HashMap<>(); + HashMap row = new HashMap<>(); row.put(columnName, 1); profile.track(row); @@ -93,7 +93,7 @@ public void testTrackNull() { public void testFlush() { ColumnProfile profile = getDefaultColumnProfile(); - HashMap row = new HashMap<>(); + HashMap row = new HashMap<>(); row.put(columnName, 5); profile.track(row); From 4b48821e715a36d88136ce1ed8f16bd58345162a Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Fri, 23 Sep 2022 14:33:34 -0700 Subject: [PATCH 42/71] linter --- java/core/src/main/java/com/whylogs/core/ColumnProfile.java | 1 - .../src/main/java/com/whylogs/core/SingleFieldProjector.java | 2 +- java/core/src/test/java/com/whylogs/core/TestColumnProfile.java | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/java/core/src/main/java/com/whylogs/core/ColumnProfile.java b/java/core/src/main/java/com/whylogs/core/ColumnProfile.java index 23e3aee387..7d694894ac 100644 --- a/java/core/src/main/java/com/whylogs/core/ColumnProfile.java +++ b/java/core/src/main/java/com/whylogs/core/ColumnProfile.java @@ -6,7 +6,6 @@ import com.whylogs.core.views.ColumnProfileView; import java.util.ArrayList; import java.util.HashMap; - import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.ToString; diff --git a/java/core/src/main/java/com/whylogs/core/SingleFieldProjector.java b/java/core/src/main/java/com/whylogs/core/SingleFieldProjector.java index 10e0f38fbf..38f11ae900 100644 --- a/java/core/src/main/java/com/whylogs/core/SingleFieldProjector.java +++ b/java/core/src/main/java/com/whylogs/core/SingleFieldProjector.java @@ -10,7 +10,7 @@ public class SingleFieldProjector { private final String columnName; public T apply(HashMap row) { - if(!row.containsKey(this.columnName)) { + if (!row.containsKey(this.columnName)) { throw new IllegalArgumentException("Column " + this.columnName + " not found in row"); } diff --git a/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java b/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java index 1bab1af024..28cdde81ad 100644 --- a/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java +++ b/java/core/src/test/java/com/whylogs/core/TestColumnProfile.java @@ -15,7 +15,7 @@ @Test public class TestColumnProfile { private final String columnName = "testColumn"; - private final int CACHE_SIZE = 2; + private final int CACHE_SIZE = 2; private ColumnProfile getDefaultColumnProfile() { ColumnSchema standardSchema = From 0cc8949ecd17fa41d3d4913358d9595959cc3073 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Mon, 26 Sep 2022 10:42:06 -0700 Subject: [PATCH 43/71] Change to a shallow copy of metrics --- .../src/main/java/com/whylogs/core/views/ColumnProfileView.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java b/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java index 13f91f928e..59bf5f4ebb 100644 --- a/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java +++ b/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java @@ -20,7 +20,7 @@ public ColumnProfileView(HashMap metrics) { public ColumnProfileView(HashMap metrics, int successes, int failures) { this.metrics = new HashMap<>(); if (metrics != null) { - this.metrics = metrics; + this.metrics.putAll(metrics); } this.successes = successes; this.failures = failures; From e08bba17929b519babc33f508bf57d15068c6927 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Mon, 26 Sep 2022 10:46:06 -0700 Subject: [PATCH 44/71] Cleaning up metric merge --- .../src/main/java/com/whylogs/core/metrics/Metric.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/java/core/src/main/java/com/whylogs/core/metrics/Metric.java b/java/core/src/main/java/com/whylogs/core/metrics/Metric.java index 6fc4a231cf..d5ad2b7c20 100644 --- a/java/core/src/main/java/com/whylogs/core/metrics/Metric.java +++ b/java/core/src/main/java/com/whylogs/core/metrics/Metric.java @@ -5,6 +5,7 @@ import com.whylogs.core.metrics.components.MetricComponent; import java.util.HashMap; import lombok.*; +import org.apache.commons.lang3.NotImplementedException; @EqualsAndHashCode @Getter @@ -23,15 +24,15 @@ public abstract class Metric { public abstract HashMap getComponents(); public Metric merge(Metric other) { - Metric merged = this; if (!this.namespace.equals(other.namespace)) { throw new IllegalArgumentException("Cannot merge metrics with different namespaces"); } if (this instanceof IntegralMetric) { - ((IntegralMetric) merged).merge((IntegralMetric) other); + return ((IntegralMetric) this).merge((IntegralMetric) other); } - return merged; + + throw new NotImplementedException("Cannot merge metrics of type " + this.getClass().getName()); } public @NonNull String getNamespace() { From 2b4ca16d4e1f9bb5dbb1d5867c10962c404d2d23 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Mon, 26 Sep 2022 13:56:56 -0700 Subject: [PATCH 45/71] Change from Date to Instance --- .../java/com/whylogs/core/ColumnProfile.java | 1 + .../java/com/whylogs/core/DatasetProfile.java | 22 +++++++++++-------- .../core/views/DatasetProfileView.java | 19 ++++++++++++---- .../core/views/TestDatasetProfileView.java | 9 +++++--- 4 files changed, 35 insertions(+), 16 deletions(-) diff --git a/java/core/src/main/java/com/whylogs/core/ColumnProfile.java b/java/core/src/main/java/com/whylogs/core/ColumnProfile.java index 7d694894ac..e1190b4417 100644 --- a/java/core/src/main/java/com/whylogs/core/ColumnProfile.java +++ b/java/core/src/main/java/com/whylogs/core/ColumnProfile.java @@ -46,6 +46,7 @@ public void addMetric(Metric metric) { } this.metrics.put(metric.getNamespace(), metric); + // TODO: Wouldn't this implement a success count here? } // TODO: this only gets one not every part of the row. Should projector actually do it multiple? diff --git a/java/core/src/main/java/com/whylogs/core/DatasetProfile.java b/java/core/src/main/java/com/whylogs/core/DatasetProfile.java index 26717fbb27..e7015a3eef 100644 --- a/java/core/src/main/java/com/whylogs/core/DatasetProfile.java +++ b/java/core/src/main/java/com/whylogs/core/DatasetProfile.java @@ -5,6 +5,8 @@ import com.whylogs.core.schemas.DatasetSchema; import com.whylogs.core.views.ColumnProfileView; import com.whylogs.core.views.DatasetProfileView; + +import java.time.Instant; import java.time.ZonedDateTime; import java.util.*; import lombok.Getter; @@ -14,24 +16,22 @@ @Getter @ToString public class DatasetProfile { - // TODO: Time zone is all mixed up. Fix public static int _LARGE_CACHE_SIZE_LIMIT = 1024 * 100; private DatasetSchema schema; - // QUESTION: Do we need zones here? Do we just use UTC? - private Date datasetTimestamp; - private Date creationTimestamp; + private Instant datasetTimestamp; + private Instant creationTimestamp; private HashMap> columns; private boolean isActive = false; private int trackCount = 0; public DatasetProfile( Optional datasetSchema, - Optional datasetTimestamp, - Optional creationTimestamp) { + Optional datasetTimestamp, + Optional creationTimestamp) { this.schema = datasetSchema.orElse(new DatasetSchema()); - this.datasetTimestamp = datasetTimestamp.orElse(new Date()); - this.creationTimestamp = creationTimestamp.orElse(new Date()); + this.datasetTimestamp = datasetTimestamp.orElse(Instant.now()); + this.creationTimestamp = creationTimestamp.orElse(Instant.now()); this.columns = new HashMap<>(); this.initializeNewColumns(schema.getColNames()); @@ -83,7 +83,11 @@ public void setDatasetTimestamp(ZonedDateTime datasetTimestamp) { if (datasetTimestamp.getZone() == null) { // TODO: log warning if it's not there } - this.datasetTimestamp = Date.from(datasetTimestamp.toInstant()); + this.datasetTimestamp = datasetTimestamp.toInstant(); + } + + public void setDatasetTimestamp(Instant datasetTimestamp) { + this.datasetTimestamp = datasetTimestamp; } private void initializeNewColumns(Set colNames) { diff --git a/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java b/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java index c99fda703a..62a98a4960 100644 --- a/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java +++ b/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java @@ -1,18 +1,29 @@ package com.whylogs.core.views; +import java.time.Instant; import java.util.*; -import lombok.AllArgsConstructor; + import lombok.Getter; import lombok.ToString; // TODO: extend writable when we do Protobuf -@AllArgsConstructor @Getter @ToString public class DatasetProfileView { private HashMap columns; - private Date datasetTimestamp; - private Date creationTimestamp; + private Instant datasetTimestamp; + private Instant creationTimestamp; + + + public DatasetProfileView(HashMap columns){ + this(columns, Instant.now(), Instant.now()); + } + + public DatasetProfileView(HashMap columns, Instant datasetTimestamp, Instant creationTimestamp) { + this.columns = columns; + this.datasetTimestamp = datasetTimestamp; + this.creationTimestamp = creationTimestamp; + } public DatasetProfileView merge(DatasetProfileView otherView) { if (otherView == null) { diff --git a/java/core/src/test/java/com/whylogs/core/views/TestDatasetProfileView.java b/java/core/src/test/java/com/whylogs/core/views/TestDatasetProfileView.java index 1d10adc0a0..39ac00bc0d 100644 --- a/java/core/src/test/java/com/whylogs/core/views/TestDatasetProfileView.java +++ b/java/core/src/test/java/com/whylogs/core/views/TestDatasetProfileView.java @@ -3,8 +3,11 @@ import com.whylogs.core.metrics.IntegralMetric; import com.whylogs.core.metrics.Metric; import com.whylogs.core.metrics.MetricConfig; + +import java.time.Instant; import java.util.Date; import java.util.HashMap; +import java.util.Optional; import org.testng.Assert; import org.testng.annotations.Test; @@ -15,13 +18,13 @@ private DatasetProfileView getDefaultDatasetProfile() { HashMap testMetrics = new HashMap<>(); testMetrics.put("ints", IntegralMetric.zero(new MetricConfig())); columnProfileViews.put("test", new ColumnProfileView(testMetrics)); - return new DatasetProfileView(columnProfileViews, new Date(), new Date()); + return new DatasetProfileView(columnProfileViews); } @Test public void testDatasetProfileViewInit() { DatasetProfileView view = - new DatasetProfileView(new HashMap(), new Date(), new Date()); + new DatasetProfileView(new HashMap(), Instant.now(), Instant.now()); Assert.assertEquals(view.getColumns().size(), 0); view = getDefaultDatasetProfile(); @@ -52,7 +55,7 @@ public void testMergeWithEmpty() { DatasetProfileView result = view.merge( new DatasetProfileView( - new HashMap(), new Date(), new Date())); + new HashMap())); Assert.assertEquals(result.getColumns().size(), 1); Assert.assertNotNull(result.getColumns().get("test").getMetric("ints")); } From 2df442bfcc24b0237af1d3f255429cb5f28d6aa5 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Mon, 26 Sep 2022 15:11:35 -0700 Subject: [PATCH 46/71] Adds null count to OperationResult, adds testing. Discovered bug (todos to reference it) --- .../java/com/whylogs/core/ColumnProfile.java | 3 + .../java/com/whylogs/core/DatasetProfile.java | 5 +- .../whylogs/core/metrics/IntegralMetric.java | 2 +- .../whylogs/core/metrics/OperationResult.java | 20 ++- .../com/whylogs/core/TestDatasetProfile.java | 123 ++++++++++++++++++ 5 files changed, 146 insertions(+), 7 deletions(-) create mode 100644 java/core/src/test/java/com/whylogs/core/TestDatasetProfile.java diff --git a/java/core/src/main/java/com/whylogs/core/ColumnProfile.java b/java/core/src/main/java/com/whylogs/core/ColumnProfile.java index e1190b4417..cbd5000f06 100644 --- a/java/core/src/main/java/com/whylogs/core/ColumnProfile.java +++ b/java/core/src/main/java/com/whylogs/core/ColumnProfile.java @@ -24,6 +24,7 @@ public class ColumnProfile { private final SingleFieldProjector projector; private int successCount; private int failureCount; + private int nullCount; private ArrayList cachedValues; @@ -37,6 +38,7 @@ public ColumnProfile(String name, ColumnSchema schema, int cacheSize) { this.projector = new SingleFieldProjector(name); this.successCount = 0; this.failureCount = 0; + this.nullCount = 0; this.cachedValues = new ArrayList<>(); } @@ -73,6 +75,7 @@ public void trackColumn(ArrayList values) { OperationResult result = metric.columnarUpdate(proccessedColumn); this.successCount += result.getSuccesses(); this.failureCount += result.getFailures(); + this.nullCount += result.getNulls(); } } diff --git a/java/core/src/main/java/com/whylogs/core/DatasetProfile.java b/java/core/src/main/java/com/whylogs/core/DatasetProfile.java index e7015a3eef..702dbd5ed6 100644 --- a/java/core/src/main/java/com/whylogs/core/DatasetProfile.java +++ b/java/core/src/main/java/com/whylogs/core/DatasetProfile.java @@ -25,6 +25,8 @@ public class DatasetProfile { private boolean isActive = false; private int trackCount = 0; + // TODO: I don't like how this works for customers. I wouldn't want + // TODO: to have to pass the optionals around. We should just use overloading instead public DatasetProfile( Optional datasetSchema, Optional datasetTimestamp, @@ -110,6 +112,7 @@ public DatasetProfileView view() { return new DatasetProfileView(columns, this.datasetTimestamp, this.creationTimestamp); } + // TODO: This isn't working correctly because track with the cache isn't working correctly public void flush() { for (String colName : this.columns.keySet()) { this.columns.get(colName).flush(); @@ -124,7 +127,7 @@ public static String getDefaultPath(Optional path) { } if (!path.get().endsWith("bin")) { - String finalPath = path.get() + defaultPath; + String finalPath = path.get() + "_" + defaultPath; return finalPath; } diff --git a/java/core/src/main/java/com/whylogs/core/metrics/IntegralMetric.java b/java/core/src/main/java/com/whylogs/core/metrics/IntegralMetric.java index 2deaa12fe5..d2a9184f0c 100644 --- a/java/core/src/main/java/com/whylogs/core/metrics/IntegralMetric.java +++ b/java/core/src/main/java/com/whylogs/core/metrics/IntegralMetric.java @@ -60,7 +60,7 @@ public OperationResult columnarUpdate(PreprocessedColumn data) { this.setMax(max_); this.setMin(min_); - return OperationResult.ok(successes); + return OperationResult.status(successes, 0, data.getNullCount()); } @Override diff --git a/java/core/src/main/java/com/whylogs/core/metrics/OperationResult.java b/java/core/src/main/java/com/whylogs/core/metrics/OperationResult.java index 4a23b0b4c3..8b49af8cc4 100644 --- a/java/core/src/main/java/com/whylogs/core/metrics/OperationResult.java +++ b/java/core/src/main/java/com/whylogs/core/metrics/OperationResult.java @@ -8,26 +8,36 @@ public class OperationResult { private final int successes; private final int failures; + private final int nulls; + + public static OperationResult nulls(int numberOfNulls){ + return new OperationResult(0, 0, numberOfNulls); + } + + public static OperationResult status(int successes, int failures, int nulls){ + return new OperationResult(successes, failures, nulls); + } public static OperationResult ok(int successes) { - return new OperationResult(successes, 0); + return new OperationResult(successes, 0, 0); } public static OperationResult ok() { - return new OperationResult(1, 0); + return new OperationResult(1, 0, 0); } public static OperationResult failed(int failures) { - return new OperationResult(0, failures); + return new OperationResult(0, failures, 0); } public static OperationResult failed() { - return new OperationResult(0, 1); + return new OperationResult(0, 1, 0); } public OperationResult add(OperationResult other) { int new_successes = this.successes + other.getSuccesses(); int new_failures = this.failures + other.getFailures(); - return new OperationResult(new_successes, new_failures); + int new_nulls = this.nulls + other.getNulls(); + return new OperationResult(new_successes, new_failures, new_nulls); } } diff --git a/java/core/src/test/java/com/whylogs/core/TestDatasetProfile.java b/java/core/src/test/java/com/whylogs/core/TestDatasetProfile.java new file mode 100644 index 0000000000..d6ab294ad4 --- /dev/null +++ b/java/core/src/test/java/com/whylogs/core/TestDatasetProfile.java @@ -0,0 +1,123 @@ +package com.whylogs.core; + +import com.whylogs.core.metrics.IntegralMetric; +import com.whylogs.core.schemas.DatasetSchema; +import com.whylogs.core.views.DatasetProfileView; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.time.Instant; +import java.util.HashMap; +import java.util.Optional; + +// TODO: Tracking down bug in this. Something is weird with how the track happens + +@Test +public class TestDatasetProfile { + private Instant creationTime; + private Instant datasetTime; + + + private DatasetSchema defaultSchema() { + DatasetSchema datasetSchema = new DatasetSchema(); + HashMap data = new HashMap<>(); + // TODO: Double check this dataschema that it's working as expected + data.put("test", 1); + data.put("test2", "2"); + Assert.assertTrue(datasetSchema.resolve(data)); + return datasetSchema; + } + + private DatasetProfile defaultProfile(){ + return new DatasetProfile(Optional.of(defaultSchema()), Optional.empty(), Optional.empty()); + } + + private DatasetProfile customTimeZone(){ + return new DatasetProfile(Optional.of(defaultSchema()), Optional.ofNullable(creationTime), Optional.ofNullable(datasetTime)); + } + + @Test + public void testDatasetTimes(){ + DatasetProfile profile = customTimeZone(); + creationTime = Instant.now(); + datasetTime = Instant.now(); + Assert.assertEquals(profile.getCreationTimestamp().getEpochSecond(), creationTime.getEpochSecond()); + Assert.assertEquals(profile.getDatasetTimestamp().getEpochSecond(), datasetTime.getEpochSecond()); + + datasetTime = Instant.now(); + profile.setDatasetTimestamp(datasetTime); + Assert.assertEquals(profile.getDatasetTimestamp().getEpochSecond(), datasetTime.getEpochSecond()); + } + + @Test + public void testDatasetProfileInit(){ + DatasetProfile profile = defaultProfile(); + Assert.assertEquals(profile.getColumns().size(), 2); + Assert.assertEquals(profile.getColumns().get("test").getFailureCount(), 0); + + Assert.assertTrue(DatasetProfile.getDefaultPath(Optional.of("test")).contains("test_profile")); + Assert.assertEquals(profile.getSchema().getColumns().size(), 2); + Assert.assertEquals(profile.getSchema().getColumns().get("test").getMetrics().size(), 2); + } + + @Test + public void testAddMetric(){ + DatasetProfile profile = defaultProfile(); + profile.addMetric("test", IntegralMetric.zero()); + Assert.assertEquals(profile.getColumns().get("test").getMetrics().size(), 1); + Assert.assertEquals(profile.getTrackCount(), 0); // Because we added directly + } + + @Test + public void testTrackData(){ + DatasetProfile profile = defaultProfile(); + HashMap data = new HashMap<>(); + data.put("test", 1); + data.put("test2", "2"); + profile.track(data); + + Assert.assertEquals(profile.getColumns().get("test").getSuccessCount(), 1); + Assert.assertEquals(profile.getTrackCount(), 1); + } + + @Test + public void testTrackNullDate(){ + DatasetProfile profile = defaultProfile(); + HashMap data = new HashMap<>(); + data.put("test", null); + data.put("test2", "2"); + profile.track(data); + + Assert.assertEquals(profile.getColumns().get("test").getCachedValues(), 1); + Assert.assertEquals(profile.getTrackCount(), 1); + } + + @Test + public void testDirty(){ + DatasetProfile profile = defaultProfile(); + HashMap data = new HashMap<>(); + data.put("notSeen", 100); + profile.track(data); + + Assert.assertEquals(profile.getColumns().get("notSeen").getSuccessCount(), 1); + Assert.assertEquals(profile.getTrackCount(), 1); + + } + + public void testView(){ + DatasetProfile profile = defaultProfile(); + DatasetProfileView view = profile.view(); + Assert.assertEquals(view.getColumns().size(), 2); + Assert.assertEquals(view.getColumns().get("test").getMetrics().size(), 0); + } + + public void testFlush(){ + DatasetProfile profile = defaultProfile(); + HashMap data = new HashMap<>(); + data.put("test", 1); + data.put("test2", "2"); + profile.track(data); + profile.flush(); + + } +} From 442209769bfac2c330881519b5390efa67bb8570 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Fri, 7 Oct 2022 12:16:46 -0700 Subject: [PATCH 47/71] Fixes issue with metric tracking, a couple sp fixes --- .../src/main/java/com/whylogs/core/ColumnProfile.java | 2 +- .../java/com/whylogs/core/schemas/ColumnSchema.java | 4 ++-- .../java/com/whylogs/core/schemas/DatasetSchema.java | 10 +++++++++- .../test/java/com/whylogs/core/TestDatasetProfile.java | 9 ++------- 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/java/core/src/main/java/com/whylogs/core/ColumnProfile.java b/java/core/src/main/java/com/whylogs/core/ColumnProfile.java index cbd5000f06..4a82b6b6c3 100644 --- a/java/core/src/main/java/com/whylogs/core/ColumnProfile.java +++ b/java/core/src/main/java/com/whylogs/core/ColumnProfile.java @@ -34,7 +34,7 @@ public ColumnProfile(String name, ColumnSchema schema, int cacheSize) { this.cacheSize = cacheSize; // TODO: add logger for size of cache on column // Defaulted - this.metrics = new HashMap<>(); + this.metrics = this.schema.getMetrics(); this.projector = new SingleFieldProjector(name); this.successCount = 0; this.failureCount = 0; diff --git a/java/core/src/main/java/com/whylogs/core/schemas/ColumnSchema.java b/java/core/src/main/java/com/whylogs/core/schemas/ColumnSchema.java index 9a1ef50709..b1aaf1722b 100644 --- a/java/core/src/main/java/com/whylogs/core/schemas/ColumnSchema.java +++ b/java/core/src/main/java/com/whylogs/core/schemas/ColumnSchema.java @@ -12,8 +12,8 @@ @AllArgsConstructor public class ColumnSchema { // Thoughts: we could have this ColumnSchema instead of having it as a member - // bu this might be easier to use? If we did we would need to use the CRTP again - // like in Metric to be able to see the type but also have them in a collection togehter + // by this might be easier to use? If we did we would need to use the CRTP again + // like in Metric to be able to see the type but also have them in a collection together private Type type; private MetricConfig config; private Resolver resolver; diff --git a/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java b/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java index 06a514762b..77a22fbcdd 100644 --- a/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java +++ b/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java @@ -6,6 +6,8 @@ import java.util.HashMap; import java.util.Optional; import java.util.Set; + +import com.whylogs.core.resolvers.StandardResolver; import lombok.Data; @Data @@ -19,13 +21,19 @@ public class DatasetSchema { public boolean schema_based_automerge = false; public DatasetSchema() { + this(Optional.empty(), Optional.empty()); + } + + public DatasetSchema(Optional defaultConfig, Optional resolver) { this.columns = new HashMap<>(); - this.defaultConfig = new MetricConfig(); + this.defaultConfig = defaultConfig.orElse(new MetricConfig()); + this.resolver = resolver.orElse(new StandardResolver()); } public DatasetSchema(int cache_size, boolean schema_based_automerge) { this.columns = new HashMap<>(); this.defaultConfig = new MetricConfig(); + this.resolver = new StandardResolver(); this.cache_size = cache_size; this.schema_based_automerge = schema_based_automerge; diff --git a/java/core/src/test/java/com/whylogs/core/TestDatasetProfile.java b/java/core/src/test/java/com/whylogs/core/TestDatasetProfile.java index d6ab294ad4..3193f80bf9 100644 --- a/java/core/src/test/java/com/whylogs/core/TestDatasetProfile.java +++ b/java/core/src/test/java/com/whylogs/core/TestDatasetProfile.java @@ -10,18 +10,15 @@ import java.util.HashMap; import java.util.Optional; -// TODO: Tracking down bug in this. Something is weird with how the track happens - @Test public class TestDatasetProfile { private Instant creationTime; private Instant datasetTime; - private DatasetSchema defaultSchema() { DatasetSchema datasetSchema = new DatasetSchema(); HashMap data = new HashMap<>(); - // TODO: Double check this dataschema that it's working as expected + // TODO: Double check this data schema that it's working as expected data.put("test", 1); data.put("test2", "2"); Assert.assertTrue(datasetSchema.resolve(data)); @@ -57,7 +54,7 @@ public void testDatasetProfileInit(){ Assert.assertTrue(DatasetProfile.getDefaultPath(Optional.of("test")).contains("test_profile")); Assert.assertEquals(profile.getSchema().getColumns().size(), 2); - Assert.assertEquals(profile.getSchema().getColumns().get("test").getMetrics().size(), 2); + Assert.assertEquals(profile.getSchema().getColumns().get("test").getMetrics().size(), 1); // THere should only be the IntegralMetric } @Test @@ -101,7 +98,6 @@ public void testDirty(){ Assert.assertEquals(profile.getColumns().get("notSeen").getSuccessCount(), 1); Assert.assertEquals(profile.getTrackCount(), 1); - } public void testView(){ @@ -118,6 +114,5 @@ public void testFlush(){ data.put("test2", "2"); profile.track(data); profile.flush(); - } } From 4d42803f6948c9816a65e1776b2c677d0f9e32fa Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Fri, 7 Oct 2022 12:23:44 -0700 Subject: [PATCH 48/71] Linter --- .../java/com/whylogs/core/DatasetProfile.java | 1 - .../whylogs/core/metrics/OperationResult.java | 4 +- .../whylogs/core/schemas/DatasetSchema.java | 3 +- .../core/views/DatasetProfileView.java | 9 +- .../com/whylogs/core/TestDatasetProfile.java | 219 +++++++++--------- .../core/views/TestDatasetProfileView.java | 10 +- 6 files changed, 124 insertions(+), 122 deletions(-) diff --git a/java/core/src/main/java/com/whylogs/core/DatasetProfile.java b/java/core/src/main/java/com/whylogs/core/DatasetProfile.java index 702dbd5ed6..1c5c20d1e2 100644 --- a/java/core/src/main/java/com/whylogs/core/DatasetProfile.java +++ b/java/core/src/main/java/com/whylogs/core/DatasetProfile.java @@ -5,7 +5,6 @@ import com.whylogs.core.schemas.DatasetSchema; import com.whylogs.core.views.ColumnProfileView; import com.whylogs.core.views.DatasetProfileView; - import java.time.Instant; import java.time.ZonedDateTime; import java.util.*; diff --git a/java/core/src/main/java/com/whylogs/core/metrics/OperationResult.java b/java/core/src/main/java/com/whylogs/core/metrics/OperationResult.java index 8b49af8cc4..14a2ef3ded 100644 --- a/java/core/src/main/java/com/whylogs/core/metrics/OperationResult.java +++ b/java/core/src/main/java/com/whylogs/core/metrics/OperationResult.java @@ -10,11 +10,11 @@ public class OperationResult { private final int failures; private final int nulls; - public static OperationResult nulls(int numberOfNulls){ + public static OperationResult nulls(int numberOfNulls) { return new OperationResult(0, 0, numberOfNulls); } - public static OperationResult status(int successes, int failures, int nulls){ + public static OperationResult status(int successes, int failures, int nulls) { return new OperationResult(successes, failures, nulls); } diff --git a/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java b/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java index 77a22fbcdd..1d438e643d 100644 --- a/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java +++ b/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java @@ -2,12 +2,11 @@ import com.whylogs.core.metrics.MetricConfig; import com.whylogs.core.resolvers.Resolver; +import com.whylogs.core.resolvers.StandardResolver; import java.lang.reflect.Type; import java.util.HashMap; import java.util.Optional; import java.util.Set; - -import com.whylogs.core.resolvers.StandardResolver; import lombok.Data; @Data diff --git a/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java b/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java index 62a98a4960..32113f4b92 100644 --- a/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java +++ b/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java @@ -2,7 +2,6 @@ import java.time.Instant; import java.util.*; - import lombok.Getter; import lombok.ToString; @@ -14,12 +13,14 @@ public class DatasetProfileView { private Instant datasetTimestamp; private Instant creationTimestamp; - - public DatasetProfileView(HashMap columns){ + public DatasetProfileView(HashMap columns) { this(columns, Instant.now(), Instant.now()); } - public DatasetProfileView(HashMap columns, Instant datasetTimestamp, Instant creationTimestamp) { + public DatasetProfileView( + HashMap columns, + Instant datasetTimestamp, + Instant creationTimestamp) { this.columns = columns; this.datasetTimestamp = datasetTimestamp; this.creationTimestamp = creationTimestamp; diff --git a/java/core/src/test/java/com/whylogs/core/TestDatasetProfile.java b/java/core/src/test/java/com/whylogs/core/TestDatasetProfile.java index 3193f80bf9..0d464d759b 100644 --- a/java/core/src/test/java/com/whylogs/core/TestDatasetProfile.java +++ b/java/core/src/test/java/com/whylogs/core/TestDatasetProfile.java @@ -3,116 +3,123 @@ import com.whylogs.core.metrics.IntegralMetric; import com.whylogs.core.schemas.DatasetSchema; import com.whylogs.core.views.DatasetProfileView; -import org.testng.Assert; -import org.testng.annotations.Test; - import java.time.Instant; import java.util.HashMap; import java.util.Optional; +import org.testng.Assert; +import org.testng.annotations.Test; @Test public class TestDatasetProfile { - private Instant creationTime; - private Instant datasetTime; - - private DatasetSchema defaultSchema() { - DatasetSchema datasetSchema = new DatasetSchema(); - HashMap data = new HashMap<>(); - // TODO: Double check this data schema that it's working as expected - data.put("test", 1); - data.put("test2", "2"); - Assert.assertTrue(datasetSchema.resolve(data)); - return datasetSchema; - } - - private DatasetProfile defaultProfile(){ - return new DatasetProfile(Optional.of(defaultSchema()), Optional.empty(), Optional.empty()); - } - - private DatasetProfile customTimeZone(){ - return new DatasetProfile(Optional.of(defaultSchema()), Optional.ofNullable(creationTime), Optional.ofNullable(datasetTime)); - } - - @Test - public void testDatasetTimes(){ - DatasetProfile profile = customTimeZone(); - creationTime = Instant.now(); - datasetTime = Instant.now(); - Assert.assertEquals(profile.getCreationTimestamp().getEpochSecond(), creationTime.getEpochSecond()); - Assert.assertEquals(profile.getDatasetTimestamp().getEpochSecond(), datasetTime.getEpochSecond()); - - datasetTime = Instant.now(); - profile.setDatasetTimestamp(datasetTime); - Assert.assertEquals(profile.getDatasetTimestamp().getEpochSecond(), datasetTime.getEpochSecond()); - } - - @Test - public void testDatasetProfileInit(){ - DatasetProfile profile = defaultProfile(); - Assert.assertEquals(profile.getColumns().size(), 2); - Assert.assertEquals(profile.getColumns().get("test").getFailureCount(), 0); - - Assert.assertTrue(DatasetProfile.getDefaultPath(Optional.of("test")).contains("test_profile")); - Assert.assertEquals(profile.getSchema().getColumns().size(), 2); - Assert.assertEquals(profile.getSchema().getColumns().get("test").getMetrics().size(), 1); // THere should only be the IntegralMetric - } - - @Test - public void testAddMetric(){ - DatasetProfile profile = defaultProfile(); - profile.addMetric("test", IntegralMetric.zero()); - Assert.assertEquals(profile.getColumns().get("test").getMetrics().size(), 1); - Assert.assertEquals(profile.getTrackCount(), 0); // Because we added directly - } - - @Test - public void testTrackData(){ - DatasetProfile profile = defaultProfile(); - HashMap data = new HashMap<>(); - data.put("test", 1); - data.put("test2", "2"); - profile.track(data); - - Assert.assertEquals(profile.getColumns().get("test").getSuccessCount(), 1); - Assert.assertEquals(profile.getTrackCount(), 1); - } - - @Test - public void testTrackNullDate(){ - DatasetProfile profile = defaultProfile(); - HashMap data = new HashMap<>(); - data.put("test", null); - data.put("test2", "2"); - profile.track(data); - - Assert.assertEquals(profile.getColumns().get("test").getCachedValues(), 1); - Assert.assertEquals(profile.getTrackCount(), 1); - } - - @Test - public void testDirty(){ - DatasetProfile profile = defaultProfile(); - HashMap data = new HashMap<>(); - data.put("notSeen", 100); - profile.track(data); - - Assert.assertEquals(profile.getColumns().get("notSeen").getSuccessCount(), 1); - Assert.assertEquals(profile.getTrackCount(), 1); - } - - public void testView(){ - DatasetProfile profile = defaultProfile(); - DatasetProfileView view = profile.view(); - Assert.assertEquals(view.getColumns().size(), 2); - Assert.assertEquals(view.getColumns().get("test").getMetrics().size(), 0); - } - - public void testFlush(){ - DatasetProfile profile = defaultProfile(); - HashMap data = new HashMap<>(); - data.put("test", 1); - data.put("test2", "2"); - profile.track(data); - profile.flush(); - } + private Instant creationTime; + private Instant datasetTime; + + private DatasetSchema defaultSchema() { + DatasetSchema datasetSchema = new DatasetSchema(); + HashMap data = new HashMap<>(); + // TODO: Double check this data schema that it's working as expected + data.put("test", 1); + data.put("test2", "2"); + Assert.assertTrue(datasetSchema.resolve(data)); + return datasetSchema; + } + + private DatasetProfile defaultProfile() { + return new DatasetProfile(Optional.of(defaultSchema()), Optional.empty(), Optional.empty()); + } + + private DatasetProfile customTimeZone() { + return new DatasetProfile( + Optional.of(defaultSchema()), + Optional.ofNullable(creationTime), + Optional.ofNullable(datasetTime)); + } + + @Test + public void testDatasetTimes() { + DatasetProfile profile = customTimeZone(); + creationTime = Instant.now(); + datasetTime = Instant.now(); + Assert.assertEquals( + profile.getCreationTimestamp().getEpochSecond(), creationTime.getEpochSecond()); + Assert.assertEquals( + profile.getDatasetTimestamp().getEpochSecond(), datasetTime.getEpochSecond()); + + datasetTime = Instant.now(); + profile.setDatasetTimestamp(datasetTime); + Assert.assertEquals( + profile.getDatasetTimestamp().getEpochSecond(), datasetTime.getEpochSecond()); + } + + @Test + public void testDatasetProfileInit() { + DatasetProfile profile = defaultProfile(); + Assert.assertEquals(profile.getColumns().size(), 2); + Assert.assertEquals(profile.getColumns().get("test").getFailureCount(), 0); + + Assert.assertTrue(DatasetProfile.getDefaultPath(Optional.of("test")).contains("test_profile")); + Assert.assertEquals(profile.getSchema().getColumns().size(), 2); + Assert.assertEquals( + profile.getSchema().getColumns().get("test").getMetrics().size(), + 1); // THere should only be the IntegralMetric + } + + @Test + public void testAddMetric() { + DatasetProfile profile = defaultProfile(); + profile.addMetric("test", IntegralMetric.zero()); + Assert.assertEquals(profile.getColumns().get("test").getMetrics().size(), 1); + Assert.assertEquals(profile.getTrackCount(), 0); // Because we added directly + } + + @Test + public void testTrackData() { + DatasetProfile profile = defaultProfile(); + HashMap data = new HashMap<>(); + data.put("test", 1); + data.put("test2", "2"); + profile.track(data); + + Assert.assertEquals(profile.getColumns().get("test").getSuccessCount(), 1); + Assert.assertEquals(profile.getTrackCount(), 1); + } + + @Test + public void testTrackNullDate() { + DatasetProfile profile = defaultProfile(); + HashMap data = new HashMap<>(); + data.put("test", null); + data.put("test2", "2"); + profile.track(data); + + Assert.assertEquals(profile.getColumns().get("test").getCachedValues().size(), 1); + Assert.assertEquals(profile.getTrackCount(), 1); + } + + @Test + public void testDirty() { + DatasetProfile profile = defaultProfile(); + HashMap data = new HashMap<>(); + data.put("notSeen", 100); + profile.track(data); + + Assert.assertEquals(profile.getColumns().get("notSeen").getSuccessCount(), 1); + Assert.assertEquals(profile.getTrackCount(), 1); + } + + public void testView() { + DatasetProfile profile = defaultProfile(); + DatasetProfileView view = profile.view(); + Assert.assertEquals(view.getColumns().size(), 2); + Assert.assertEquals(view.getColumns().get("test").getMetrics().size(), 0); + } + + public void testFlush() { + DatasetProfile profile = defaultProfile(); + HashMap data = new HashMap<>(); + data.put("test", 1); + data.put("test2", "2"); + profile.track(data); + profile.flush(); + } } diff --git a/java/core/src/test/java/com/whylogs/core/views/TestDatasetProfileView.java b/java/core/src/test/java/com/whylogs/core/views/TestDatasetProfileView.java index 39ac00bc0d..66353af026 100644 --- a/java/core/src/test/java/com/whylogs/core/views/TestDatasetProfileView.java +++ b/java/core/src/test/java/com/whylogs/core/views/TestDatasetProfileView.java @@ -3,11 +3,8 @@ import com.whylogs.core.metrics.IntegralMetric; import com.whylogs.core.metrics.Metric; import com.whylogs.core.metrics.MetricConfig; - import java.time.Instant; -import java.util.Date; import java.util.HashMap; -import java.util.Optional; import org.testng.Assert; import org.testng.annotations.Test; @@ -24,7 +21,8 @@ private DatasetProfileView getDefaultDatasetProfile() { @Test public void testDatasetProfileViewInit() { DatasetProfileView view = - new DatasetProfileView(new HashMap(), Instant.now(), Instant.now()); + new DatasetProfileView( + new HashMap(), Instant.now(), Instant.now()); Assert.assertEquals(view.getColumns().size(), 0); view = getDefaultDatasetProfile(); @@ -53,9 +51,7 @@ public void testMergeWithNull() { public void testMergeWithEmpty() { DatasetProfileView view = getDefaultDatasetProfile(); DatasetProfileView result = - view.merge( - new DatasetProfileView( - new HashMap())); + view.merge(new DatasetProfileView(new HashMap())); Assert.assertEquals(result.getColumns().size(), 1); Assert.assertNotNull(result.getColumns().get("test").getMetric("ints")); } From 0e9ede1fb710a199e79e97c7df787a289f9b59ae Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Fri, 7 Oct 2022 12:39:01 -0700 Subject: [PATCH 49/71] Adds autoclosable on columnprofile and linter --- .../core/src/main/java/com/whylogs/core/ColumnProfile.java | 7 ++++++- .../src/main/java/com/whylogs/core/DatasetProfile.java | 3 ++- .../main/java/com/whylogs/core/metrics/IntegralMetric.java | 2 +- .../src/test/java/com/whylogs/core/metrics/TestMetric.java | 2 +- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/java/core/src/main/java/com/whylogs/core/ColumnProfile.java b/java/core/src/main/java/com/whylogs/core/ColumnProfile.java index 4a82b6b6c3..5cfaa9f989 100644 --- a/java/core/src/main/java/com/whylogs/core/ColumnProfile.java +++ b/java/core/src/main/java/com/whylogs/core/ColumnProfile.java @@ -13,7 +13,7 @@ @Getter @ToString @EqualsAndHashCode -public class ColumnProfile { +public class ColumnProfile implements AutoCloseable { // Required private final String name; private final ColumnSchema schema; @@ -83,4 +83,9 @@ public ColumnProfileView view() { this.flush(); return new ColumnProfileView(this.metrics, this.successCount, this.failureCount); } + + @Override + public void close() throws Exception { + this.flush(); + } } diff --git a/java/core/src/main/java/com/whylogs/core/DatasetProfile.java b/java/core/src/main/java/com/whylogs/core/DatasetProfile.java index 1c5c20d1e2..f6ffec8a97 100644 --- a/java/core/src/main/java/com/whylogs/core/DatasetProfile.java +++ b/java/core/src/main/java/com/whylogs/core/DatasetProfile.java @@ -68,8 +68,9 @@ private void doTrack(HashMap row) { this.initializeNewColumns(newColumnNames); } + ArrayList values = new ArrayList<>(); for (String col : row.keySet()) { - ArrayList values = new ArrayList<>(); + values = new ArrayList<>(); values.add(row.get(col)); this.columns.get(col).trackColumn(values); } diff --git a/java/core/src/main/java/com/whylogs/core/metrics/IntegralMetric.java b/java/core/src/main/java/com/whylogs/core/metrics/IntegralMetric.java index d2a9184f0c..75ec2e7291 100644 --- a/java/core/src/main/java/com/whylogs/core/metrics/IntegralMetric.java +++ b/java/core/src/main/java/com/whylogs/core/metrics/IntegralMetric.java @@ -76,7 +76,7 @@ public static IntegralMetric zero(MetricConfig config) { } public static IntegralMetric zero() { - return new IntegralMetric(); + return IntegralMetric.zero(new MetricConfig()); } @Override diff --git a/java/core/src/test/java/com/whylogs/core/metrics/TestMetric.java b/java/core/src/test/java/com/whylogs/core/metrics/TestMetric.java index 569b2c5235..f9aaa6ac98 100644 --- a/java/core/src/test/java/com/whylogs/core/metrics/TestMetric.java +++ b/java/core/src/test/java/com/whylogs/core/metrics/TestMetric.java @@ -14,8 +14,8 @@ public void testMetrics() { metrics.add(IntegralMetric.zero(new MetricConfig())); for (Metric metric : metrics) { - Assert.assertTrue(metric instanceof IntegralMetric); metric.merge(new IntegralMetric()); + Assert.assertTrue(metric instanceof IntegralMetric); } Assert.assertEquals(metrics.size(), 2); From 0ea402974df8c436105c37cec376cce271c5e05c Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Fri, 7 Oct 2022 13:12:11 -0700 Subject: [PATCH 50/71] Fixes tests and spotlessapply --- .../src/test/java/com/whylogs/core/TestDatasetProfile.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/java/core/src/test/java/com/whylogs/core/TestDatasetProfile.java b/java/core/src/test/java/com/whylogs/core/TestDatasetProfile.java index 0d464d759b..9cd3f2f5a5 100644 --- a/java/core/src/test/java/com/whylogs/core/TestDatasetProfile.java +++ b/java/core/src/test/java/com/whylogs/core/TestDatasetProfile.java @@ -92,7 +92,7 @@ public void testTrackNullDate() { data.put("test2", "2"); profile.track(data); - Assert.assertEquals(profile.getColumns().get("test").getCachedValues().size(), 1); + Assert.assertEquals(profile.getColumns().get("test").getNullCount(), 1); Assert.assertEquals(profile.getTrackCount(), 1); } @@ -111,7 +111,7 @@ public void testView() { DatasetProfile profile = defaultProfile(); DatasetProfileView view = profile.view(); Assert.assertEquals(view.getColumns().size(), 2); - Assert.assertEquals(view.getColumns().get("test").getMetrics().size(), 0); + Assert.assertEquals(view.getColumns().get("test").getMetrics().size(), 1); } public void testFlush() { From 66c35bd1a2d8f82d380e549d99292d6a970a54dd Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Fri, 7 Oct 2022 14:50:23 -0700 Subject: [PATCH 51/71] Changing metric to change the Metric to be the start of the CRTP instead of AbstractMetric.java --- .../java/com/whylogs/core/ColumnProfile.java | 8 +++---- .../whylogs/core/metrics/AbstractMetric.java | 16 ------------- .../whylogs/core/metrics/IntegralMetric.java | 9 +++---- .../java/com/whylogs/core/metrics/Metric.java | 15 ++---------- .../com/whylogs/core/resolvers/Resolver.java | 2 +- .../core/resolvers/StandardResolver.java | 4 ++-- .../whylogs/core/schemas/ColumnSchema.java | 2 +- .../whylogs/core/views/ColumnProfileView.java | 24 +++++++++---------- .../core/resolvers/TestStandardResolver.java | 2 +- .../core/views/TestColumnProfileView.java | 8 +++---- .../core/views/TestDatasetProfileView.java | 2 +- 11 files changed, 33 insertions(+), 59 deletions(-) delete mode 100644 java/core/src/main/java/com/whylogs/core/metrics/AbstractMetric.java diff --git a/java/core/src/main/java/com/whylogs/core/ColumnProfile.java b/java/core/src/main/java/com/whylogs/core/ColumnProfile.java index 5cfaa9f989..709c44366f 100644 --- a/java/core/src/main/java/com/whylogs/core/ColumnProfile.java +++ b/java/core/src/main/java/com/whylogs/core/ColumnProfile.java @@ -20,7 +20,7 @@ public class ColumnProfile implements AutoCloseable { private final int cacheSize; // Has Defaults - private HashMap metrics; + private HashMap> metrics; private final SingleFieldProjector projector; private int successCount; private int failureCount; @@ -35,14 +35,14 @@ public ColumnProfile(String name, ColumnSchema schema, int cacheSize) { // Defaulted this.metrics = this.schema.getMetrics(); - this.projector = new SingleFieldProjector(name); + this.projector = new SingleFieldProjector<>(name); this.successCount = 0; this.failureCount = 0; this.nullCount = 0; this.cachedValues = new ArrayList<>(); } - public void addMetric(Metric metric) { + public void addMetric(Metric metric) { if (this.metrics.containsKey(metric.getNamespace())) { // TODO: Add logger with warning about replacement } @@ -71,7 +71,7 @@ public void flush() { public void trackColumn(ArrayList values) { PreprocessedColumn proccessedColumn = PreprocessedColumn.apply(values); - for (Metric metric : this.metrics.values()) { + for (Metric metric : this.metrics.values()) { OperationResult result = metric.columnarUpdate(proccessedColumn); this.successCount += result.getSuccesses(); this.failureCount += result.getFailures(); diff --git a/java/core/src/main/java/com/whylogs/core/metrics/AbstractMetric.java b/java/core/src/main/java/com/whylogs/core/metrics/AbstractMetric.java deleted file mode 100644 index 2f2bf5d4f1..0000000000 --- a/java/core/src/main/java/com/whylogs/core/metrics/AbstractMetric.java +++ /dev/null @@ -1,16 +0,0 @@ -package com.whylogs.core.metrics; - -import lombok.NonNull; - -// Wrapper allows for the use of the Metric class be in collections without losing -// the type. This allows for the merge and other methods to return the correct type. -public abstract class AbstractMetric extends Metric { - - public AbstractMetric(@NonNull String namespace) { - super(namespace); - } - - public abstract TSubclass merge(TSubclass other); - // public abstract TSubclass fromProtobuf(MetricMessage message); TODO: this will need to be moved - // to a factory -} diff --git a/java/core/src/main/java/com/whylogs/core/metrics/IntegralMetric.java b/java/core/src/main/java/com/whylogs/core/metrics/IntegralMetric.java index 75ec2e7291..e62f4980b2 100644 --- a/java/core/src/main/java/com/whylogs/core/metrics/IntegralMetric.java +++ b/java/core/src/main/java/com/whylogs/core/metrics/IntegralMetric.java @@ -13,7 +13,7 @@ @Getter @EqualsAndHashCode(callSuper = false) -public class IntegralMetric extends AbstractMetric { +public class IntegralMetric extends Metric { public static final String NAMESPACE = "ints"; private MaxIntegralComponent maxComponent; private MinIntegralComponent minComponent; @@ -95,7 +95,7 @@ public HashMap toSummaryDict(SummaryConfig config) { } @Override - public IntegralMetric merge(IntegralMetric other) { + public IntegralMetric merge(Metric other) { if (!this.getNamespace().equals(other.getNamespace())) { throw new IllegalArgumentException( "Cannot merge IntegralMetrics with different namespaces:" @@ -104,8 +104,9 @@ public IntegralMetric merge(IntegralMetric other) { + other.getNamespace()); } - int max = Integer.max(this.maxComponent.getValue(), other.maxComponent.getValue()); - int min = Integer.min(this.minComponent.getValue(), other.minComponent.getValue()); + IntegralMetric other_ = (IntegralMetric) other; + int max = Integer.max(this.maxComponent.getValue(), other_.maxComponent.getValue()); + int min = Integer.min(this.minComponent.getValue(), other_.minComponent.getValue()); return new IntegralMetric(new MaxIntegralComponent(max), new MinIntegralComponent(min)); } diff --git a/java/core/src/main/java/com/whylogs/core/metrics/Metric.java b/java/core/src/main/java/com/whylogs/core/metrics/Metric.java index d5ad2b7c20..5f45e5b489 100644 --- a/java/core/src/main/java/com/whylogs/core/metrics/Metric.java +++ b/java/core/src/main/java/com/whylogs/core/metrics/Metric.java @@ -5,13 +5,12 @@ import com.whylogs.core.metrics.components.MetricComponent; import java.util.HashMap; import lombok.*; -import org.apache.commons.lang3.NotImplementedException; @EqualsAndHashCode @Getter @Setter @RequiredArgsConstructor -public abstract class Metric { +public abstract class Metric { @NonNull private String namespace; @@ -23,17 +22,7 @@ public abstract class Metric { public abstract HashMap getComponents(); - public Metric merge(Metric other) { - if (!this.namespace.equals(other.namespace)) { - throw new IllegalArgumentException("Cannot merge metrics with different namespaces"); - } - - if (this instanceof IntegralMetric) { - return ((IntegralMetric) this).merge((IntegralMetric) other); - } - - throw new NotImplementedException("Cannot merge metrics of type " + this.getClass().getName()); - } + public abstract TSubclass merge(Metric other); public @NonNull String getNamespace() { return namespace; diff --git a/java/core/src/main/java/com/whylogs/core/resolvers/Resolver.java b/java/core/src/main/java/com/whylogs/core/resolvers/Resolver.java index 25fd280481..e2a6b79ff4 100644 --- a/java/core/src/main/java/com/whylogs/core/resolvers/Resolver.java +++ b/java/core/src/main/java/com/whylogs/core/resolvers/Resolver.java @@ -6,5 +6,5 @@ public abstract class Resolver { - public abstract HashMap resolve(ColumnSchema schema); + public abstract HashMap> resolve(ColumnSchema schema); } diff --git a/java/core/src/main/java/com/whylogs/core/resolvers/StandardResolver.java b/java/core/src/main/java/com/whylogs/core/resolvers/StandardResolver.java index 3b8ffede04..62cd7e15bb 100644 --- a/java/core/src/main/java/com/whylogs/core/resolvers/StandardResolver.java +++ b/java/core/src/main/java/com/whylogs/core/resolvers/StandardResolver.java @@ -14,8 +14,8 @@ public StandardResolver() { // TODO: the rest of the metrics need implmeented @Override - public HashMap resolve(ColumnSchema schema) { - HashMap resolvedMetrics = new HashMap<>(); + public HashMap> resolve(ColumnSchema schema) { + HashMap> resolvedMetrics = new HashMap<>(); if (DataTypes.Integral.includes(schema.getType())) { resolvedMetrics.put(IntegralMetric.NAMESPACE, IntegralMetric.zero(schema.getConfig())); diff --git a/java/core/src/main/java/com/whylogs/core/schemas/ColumnSchema.java b/java/core/src/main/java/com/whylogs/core/schemas/ColumnSchema.java index b1aaf1722b..a7f55cf5da 100644 --- a/java/core/src/main/java/com/whylogs/core/schemas/ColumnSchema.java +++ b/java/core/src/main/java/com/whylogs/core/schemas/ColumnSchema.java @@ -18,7 +18,7 @@ public class ColumnSchema { private MetricConfig config; private Resolver resolver; - public HashMap getMetrics() { + public HashMap> getMetrics() { return this.resolver.resolve(this); } } diff --git a/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java b/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java index 59bf5f4ebb..c724f90dc6 100644 --- a/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java +++ b/java/core/src/main/java/com/whylogs/core/views/ColumnProfileView.java @@ -9,15 +9,15 @@ @Getter public class ColumnProfileView { - private HashMap metrics; + private HashMap> metrics; private int successes = 0; private int failures = 0; - public ColumnProfileView(HashMap metrics) { + public ColumnProfileView(HashMap> metrics) { this.metrics = metrics; } - public ColumnProfileView(HashMap metrics, int successes, int failures) { + public ColumnProfileView(HashMap> metrics, int successes, int failures) { this.metrics = new HashMap<>(); if (metrics != null) { this.metrics.putAll(metrics); @@ -36,12 +36,12 @@ public ColumnProfileView merge(ColumnProfileView otherView) { allMetricNames.addAll(this.metrics.keySet()); allMetricNames.addAll(otherView.metrics.keySet()); - HashMap mergedMetrics = new HashMap<>(); + HashMap> mergedMetrics = new HashMap<>(); for (String metricName : allMetricNames) { - Metric thisMetric = this.metrics.get(metricName); - Metric otherMetric = otherView.metrics.get(metricName); + Metric thisMetric = this.metrics.get(metricName); + Metric otherMetric = otherView.metrics.get(metricName); - Metric result = thisMetric; + Metric result = thisMetric; if (thisMetric != null && otherMetric != null) { result = thisMetric.merge(otherMetric); @@ -56,7 +56,7 @@ public ColumnProfileView merge(ColumnProfileView otherView) { mergedMetrics, this.successes + otherView.successes, this.failures + otherView.failures); } - public Optional getMetric(String metricName) { + public Optional> getMetric(String metricName) { return Optional.ofNullable(this.metrics.get(metricName)); } @@ -68,7 +68,7 @@ public static ColumnProfileView zero() { public List getMetricComponentPaths() { ArrayList paths = new ArrayList<>(); for (String metricName : this.getMetricNames()) { - Optional metric = this.getMetric(metricName); + Optional> metric = this.getMetric(metricName); if (metric.isPresent()) { for (String componentName : metric.get().getComponents().keySet()) { paths.add(metricName + "/" + componentName); @@ -105,9 +105,9 @@ public HashMap toSummaryDict( } private Map getMetricSummaryHelper( - SummaryConfig summaryConfig, Optional maybeMetric) { + SummaryConfig summaryConfig, Optional> maybeMetric) { HashMap result = new HashMap<>(); - Metric metric; + Metric metric; if (maybeMetric.isPresent()) { metric = maybeMetric.get(); HashMap metricSummary = metric.toSummaryDict(summaryConfig); @@ -122,7 +122,7 @@ private Map getMetricSummaryHelper( public Map getComponents() { HashMap result = new HashMap<>(); for (String metricName : this.getMetricNames()) { - Optional metric = this.getMetric(metricName); + Optional> metric = this.getMetric(metricName); metric.ifPresent(value -> result.putAll(value.getComponents())); } return Collections.unmodifiableMap(result); diff --git a/java/core/src/test/java/com/whylogs/core/resolvers/TestStandardResolver.java b/java/core/src/test/java/com/whylogs/core/resolvers/TestStandardResolver.java index 2fd891031c..511ebd854f 100644 --- a/java/core/src/test/java/com/whylogs/core/resolvers/TestStandardResolver.java +++ b/java/core/src/test/java/com/whylogs/core/resolvers/TestStandardResolver.java @@ -15,7 +15,7 @@ public class TestStandardResolver { public void test_integralInput() { StandardResolver resolver = new StandardResolver(); ColumnSchema columnSchema = new ColumnSchema(Integer.class, new MetricConfig(), resolver); - HashMap metrics = resolver.resolve(columnSchema); + HashMap> metrics = resolver.resolve(columnSchema); Assert.assertEquals(metrics.get("ints").getClass(), IntegralMetric.class); } diff --git a/java/core/src/test/java/com/whylogs/core/views/TestColumnProfileView.java b/java/core/src/test/java/com/whylogs/core/views/TestColumnProfileView.java index 78cc8ab8f0..4cc079908b 100644 --- a/java/core/src/test/java/com/whylogs/core/views/TestColumnProfileView.java +++ b/java/core/src/test/java/com/whylogs/core/views/TestColumnProfileView.java @@ -14,16 +14,16 @@ public class TestColumnProfileView { private ColumnProfileView getDefaultColumnProfile() { - Metric integralMetric = IntegralMetric.zero(new MetricConfig()); - HashMap metrics = new HashMap<>(); + Metric integralMetric = IntegralMetric.zero(new MetricConfig()); + HashMap> metrics = new HashMap<>(); metrics.put(integralMetric.getNamespace(), integralMetric); return new ColumnProfileView(metrics); } private ColumnProfileView getChangedSuccessFailProfile(int success, int fail) { - Metric integralMetric = IntegralMetric.zero(new MetricConfig()); - HashMap metrics = new HashMap<>(); + Metric integralMetric = IntegralMetric.zero(new MetricConfig()); + HashMap> metrics = new HashMap<>(); metrics.put(integralMetric.getNamespace(), integralMetric); return new ColumnProfileView(metrics, success, fail); diff --git a/java/core/src/test/java/com/whylogs/core/views/TestDatasetProfileView.java b/java/core/src/test/java/com/whylogs/core/views/TestDatasetProfileView.java index 66353af026..bf920d888d 100644 --- a/java/core/src/test/java/com/whylogs/core/views/TestDatasetProfileView.java +++ b/java/core/src/test/java/com/whylogs/core/views/TestDatasetProfileView.java @@ -12,7 +12,7 @@ public class TestDatasetProfileView { private DatasetProfileView getDefaultDatasetProfile() { HashMap columnProfileViews = new HashMap<>(); - HashMap testMetrics = new HashMap<>(); + HashMap> testMetrics = new HashMap<>(); testMetrics.put("ints", IntegralMetric.zero(new MetricConfig())); columnProfileViews.put("test", new ColumnProfileView(testMetrics)); return new DatasetProfileView(columnProfileViews); From 4c4ff50c4c3c146b3d2f210d98ac5616b2508379 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Tue, 11 Oct 2022 14:46:09 -0700 Subject: [PATCH 52/71] Finishes merge, linted, and finished the tests --- .../api/logger/resultSets/ResultSet.java | 2 +- .../java/com/whylogs/core/DatasetProfile.java | 12 +-- .../core/views/DatasetProfileView.java | 2 +- .../api/resultsets/TestProfileResultSet.java | 99 +++++++++++-------- .../api/resultsets/TestViewResultSet.java | 21 ++-- 5 files changed, 82 insertions(+), 54 deletions(-) diff --git a/java/core/src/main/java/com/whylogs/api/logger/resultSets/ResultSet.java b/java/core/src/main/java/com/whylogs/api/logger/resultSets/ResultSet.java index 10cb25011c..70787d6fef 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/resultSets/ResultSet.java +++ b/java/core/src/main/java/com/whylogs/api/logger/resultSets/ResultSet.java @@ -30,7 +30,7 @@ public abstract class ResultSet { // TODO: Come back for ModelPerformanceMetrics // Question: why is the python addMetrics when it only adds the one? - public void addMetric(String name, Metric metric) throws Error { + public void addMetric(String name, Metric metric) throws Error { if (!this.profile().isPresent()) { throw new Error( "Cannot add " + name + " metric " + metric + " to a result set without a profile"); diff --git a/java/core/src/main/java/com/whylogs/core/DatasetProfile.java b/java/core/src/main/java/com/whylogs/core/DatasetProfile.java index 65596735e1..2b5e8d1e50 100644 --- a/java/core/src/main/java/com/whylogs/core/DatasetProfile.java +++ b/java/core/src/main/java/com/whylogs/core/DatasetProfile.java @@ -39,14 +39,14 @@ public DatasetProfile( this.initializeNewColumns(schema.getColNames()); } - public void addMetric(String colName, Metric metric) { + public void addMetric(String colName, Metric metric) { if (!this.columns.containsKey(colName)) { throw new InputMismatchException("Column name not found in schema"); } this.columns.get(colName).addMetric(metric); } - public void addDatasetMetric(String name, Metric metric) { + public void addDatasetMetric(String name, Metric metric) { this.metrics.put(name, metric); } @@ -73,7 +73,7 @@ private void doTrack(HashMap row) { this.initializeNewColumns(newColumnNames); } - ArrayList values = new ArrayList<>(); + ArrayList values; for (String col : row.keySet()) { values = new ArrayList<>(); values.add(row.get(col)); @@ -101,7 +101,8 @@ private void initializeNewColumns(Set colNames) { for (String column : colNames) { ColumnSchema columnSchema = this.schema.getColumns().get(column); if (columnSchema != null) { - this.columns.put(column, new ColumnProfile(column, columnSchema, this.schema.getCacheSize())); + this.columns.put( + column, new ColumnProfile<>(column, columnSchema, this.schema.getCacheSize())); } // TODO: log warning 'Encountered a column without schema: %s", col' in an else } @@ -131,8 +132,7 @@ public static String getDefaultPath(Optional path) { } if (!path.get().endsWith("bin")) { - String finalPath = path.get() + "_" + defaultPath; - return finalPath; + return path.get() + "_" + defaultPath; } return path.get(); diff --git a/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java b/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java index db03038a0a..32113f4b92 100644 --- a/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java +++ b/java/core/src/main/java/com/whylogs/core/views/DatasetProfileView.java @@ -27,7 +27,7 @@ public DatasetProfileView( } public DatasetProfileView merge(DatasetProfileView otherView) { - if(otherView == null) { + if (otherView == null) { return this; } diff --git a/java/core/src/test/java/com/whylogs/api/resultsets/TestProfileResultSet.java b/java/core/src/test/java/com/whylogs/api/resultsets/TestProfileResultSet.java index 07747e1fdb..fa0914ef89 100644 --- a/java/core/src/test/java/com/whylogs/api/resultsets/TestProfileResultSet.java +++ b/java/core/src/test/java/com/whylogs/api/resultsets/TestProfileResultSet.java @@ -2,59 +2,78 @@ import com.whylogs.api.logger.resultSets.ProfileResultSet; import com.whylogs.core.DatasetProfile; - import com.whylogs.core.errors.Error; import com.whylogs.core.metrics.IntegralMetric; import com.whylogs.core.metrics.MetricConfig; import com.whylogs.core.schemas.DatasetSchema; -import org.testng.Assert; -import org.testng.annotations.Test; - import java.util.HashMap; import java.util.InputMismatchException; import java.util.Optional; - +import org.testng.Assert; +import org.testng.annotations.Test; @Test public class TestProfileResultSet { - private ProfileResultSet defaultResultSet(){ - HashMap data = new HashMap<>(); - data.put("test", 1); - data.put("test2", "2"); - DatasetSchema datasetSchema = new DatasetSchema(); - datasetSchema.resolve(data); + private ProfileResultSet defaultResultSet() { + HashMap data = new HashMap<>(); + data.put("test", 1); + data.put("test2", "2"); + DatasetSchema datasetSchema = new DatasetSchema(); + datasetSchema.resolve(data); - DatasetProfile datasetProfile = new DatasetProfile(Optional.of(datasetSchema), Optional.empty(), Optional.empty()); - return new ProfileResultSet(datasetProfile); + DatasetProfile datasetProfile = + new DatasetProfile(Optional.of(datasetSchema), Optional.empty(), Optional.empty()); + return new ProfileResultSet(datasetProfile); + } + + @Test + public void testProfileResultSet() { + ProfileResultSet profileResultSet = defaultResultSet(); + Assert.assertNotNull(profileResultSet); + + if (profileResultSet.profile().isPresent()) { + DatasetProfile datasetProfile = profileResultSet.profile().get(); + Assert.assertNotNull(datasetProfile); + Assert.assertEquals(datasetProfile.getSchema().getColumns().size(), 2); + } else { + Assert.fail("Profile is not present"); + } + + if (profileResultSet.view().isPresent()) { + Assert.assertEquals(profileResultSet.view().get().getColumns().size(), 2); + Assert.assertEquals( + profileResultSet.view().get().getColumns().get("test").getComponents().size(), 2); + } else { + Assert.fail("View is not present"); } - @Test - public void testProfileResultSet() { - ProfileResultSet profileResultSet = defaultResultSet(); - Assert.assertNotNull(profileResultSet); - Assert.assertEquals(profileResultSet.profile().get().getSchema().getColNames().size(), 2); - Assert.assertEquals(profileResultSet.view().get().getColumns().size(), 2); - // TODO: BUG HERE IN DATASET SCHEMA Assert.assertEquals(profileResultSet.view().get().getColumns().get("test").getComponents(), 1); - - - // Test expected error on unknown column name - try { - profileResultSet.addMetric("newTest", IntegralMetric.zero(new MetricConfig())); - } catch (Error error) { - Assert.fail("Error adding metric: " + error.getMessage()); - } catch (InputMismatchException e){ - // expected - } - - // - try { - profileResultSet.addMetric("test", IntegralMetric.zero(new MetricConfig())); - } catch (Error error) { - Assert.fail("Error adding metric: " + error.getMessage()); - } - - Assert.assertEquals(profileResultSet.view().get().getColumns().size(), 2); - Assert.assertEquals(profileResultSet.view().get().getColumns().get("test").getComponents().get("MaxIntegralComponent").getValue(), Integer.MIN_VALUE); + // Test expected error on unknown column name + try { + profileResultSet.addMetric("newTest", IntegralMetric.zero(new MetricConfig())); + } catch (Error error) { + Assert.fail("Error adding metric: " + error.getMessage()); + } catch (InputMismatchException e) { + // expected } + + // + try { + profileResultSet.addMetric("test", IntegralMetric.zero(new MetricConfig())); + } catch (Error error) { + Assert.fail("Error adding metric: " + error.getMessage()); + } + + Assert.assertEquals(profileResultSet.view().get().getColumns().size(), 2); + Assert.assertEquals( + profileResultSet + .view() + .get() + .getColumns() + .get("test") + .getComponents() + .get("MaxIntegralComponent") + .getValue(), + Integer.MIN_VALUE); + } } diff --git a/java/core/src/test/java/com/whylogs/api/resultsets/TestViewResultSet.java b/java/core/src/test/java/com/whylogs/api/resultsets/TestViewResultSet.java index bae4a89476..75ea8024c6 100644 --- a/java/core/src/test/java/com/whylogs/api/resultsets/TestViewResultSet.java +++ b/java/core/src/test/java/com/whylogs/api/resultsets/TestViewResultSet.java @@ -6,7 +6,7 @@ import com.whylogs.core.metrics.MetricConfig; import com.whylogs.core.views.ColumnProfileView; import com.whylogs.core.views.DatasetProfileView; -import java.util.Date; +import java.time.Instant; import java.util.HashMap; import org.testng.Assert; import org.testng.annotations.Test; @@ -14,19 +14,28 @@ @Test public class TestViewResultSet { - private DatasetProfileView getDefaultDatasetProfile() { + private DatasetProfileView getDefaultDatasetProfile(Instant timestamp) { HashMap columnProfileViews = new HashMap<>(); - HashMap testMetrics = new HashMap<>(); + HashMap> testMetrics = new HashMap<>(); testMetrics.put("ints", IntegralMetric.zero(new MetricConfig())); columnProfileViews.put("test", new ColumnProfileView(testMetrics)); - return new DatasetProfileView(columnProfileViews, new Date(), new Date()); + + return new DatasetProfileView(columnProfileViews, timestamp, timestamp); } @Test public void testViewResultSet() { - DatasetProfileView view = getDefaultDatasetProfile(); + Instant timestamp = Instant.now(); + DatasetProfileView view = getDefaultDatasetProfile(timestamp); ViewResultSet viewResultSet = new ViewResultSet(view); Assert.assertNotNull(viewResultSet); - Assert.assertEquals(viewResultSet.view().get().getColumns().size(), 1); + + if (viewResultSet.view().isPresent()) { + Assert.assertEquals(viewResultSet.view().get().getColumns().size(), 1); + Assert.assertEquals( + viewResultSet.view().get().getColumns().get("test").getComponents().size(), 2); + } else { + Assert.fail("View is not present"); + } } } From 24aff919df99795bfd77dc8dee2e73d90186651a Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Mon, 17 Oct 2022 15:57:22 -0700 Subject: [PATCH 53/71] Getting skeletons for Writers and writable in. Will need to think through arguments passing in Java --- .../java/com/whylogs/api/writer/Writable.java | 22 +++++++++++++++++++ .../java/com/whylogs/api/writer/Writer.java | 16 ++++++++++++++ .../whylogs/api/writer/WritersRegistry.java | 8 +++++++ 3 files changed, 46 insertions(+) create mode 100644 java/core/src/main/java/com/whylogs/api/writer/Writable.java create mode 100644 java/core/src/main/java/com/whylogs/api/writer/Writer.java create mode 100644 java/core/src/main/java/com/whylogs/api/writer/WritersRegistry.java diff --git a/java/core/src/main/java/com/whylogs/api/writer/Writable.java b/java/core/src/main/java/com/whylogs/api/writer/Writable.java new file mode 100644 index 0000000000..ba9651ebf7 --- /dev/null +++ b/java/core/src/main/java/com/whylogs/api/writer/Writable.java @@ -0,0 +1,22 @@ +package com.whylogs.api.writer; + +import java.io.File; +import java.io.FileWriter; + +public interface Writable { + + static FileWriter safeOpenWrite(String path) { + // Open 'path' for writing, creating any parent directories as needed + File file = new File(path); + FileWriter writer = null; + try { + writer = new FileWriter(file, true); + } catch (Exception e) { + System.out.println("Error: " + e); + e.printStackTrace(); + } + + // this close happens latter on + return writer; + } +} diff --git a/java/core/src/main/java/com/whylogs/api/writer/Writer.java b/java/core/src/main/java/com/whylogs/api/writer/Writer.java new file mode 100644 index 0000000000..13ea4ead2a --- /dev/null +++ b/java/core/src/main/java/com/whylogs/api/writer/Writer.java @@ -0,0 +1,16 @@ +package com.whylogs.api.writer; + +import javax.swing.text.html.Option; +import java.util.Optional; + +// TODO: this is a temp holding class for logger that will be implmented next +public abstract class Writer { + /*Validate an interval configuration for a given writer. + Some writer only accepts certain interval configuration. By default, this should return True for a valid + non-negative interval.*/ + public void check_interval(int interval_seconds) { + // TODO: implement (not implemented in java either + } + public abstract void write(Writable file, Optional dest); + public abstract T option(T writer); +} diff --git a/java/core/src/main/java/com/whylogs/api/writer/WritersRegistry.java b/java/core/src/main/java/com/whylogs/api/writer/WritersRegistry.java new file mode 100644 index 0000000000..0f9d23f06a --- /dev/null +++ b/java/core/src/main/java/com/whylogs/api/writer/WritersRegistry.java @@ -0,0 +1,8 @@ +package com.whylogs.api.writer; + +public class WritersRegistry { + public static T get(String name) { + // TODO: Not implemented yet + return null; + } +} From 86e816e49ae2fd40e7b5e5ab14e6893720914b73 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Mon, 17 Oct 2022 15:57:54 -0700 Subject: [PATCH 54/71] Adding skeleton writers to logger --- .../java/com/whylogs/api/logger/Logger.java | 50 ++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/java/core/src/main/java/com/whylogs/api/logger/Logger.java b/java/core/src/main/java/com/whylogs/api/logger/Logger.java index b8ebb9e2c7..996c6575b4 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/Logger.java +++ b/java/core/src/main/java/com/whylogs/api/logger/Logger.java @@ -1,3 +1,51 @@ package com.whylogs.api.logger; -public class Logger {} +import com.whylogs.api.writer.Writer; +import com.whylogs.api.writer.WritersRegistry; +import com.whylogs.core.schemas.DatasetSchema; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; + +import java.util.ArrayList; + +@NoArgsConstructor +@Getter +@EqualsAndHashCode +public abstract class Logger implements AutoCloseable { + private boolean isClosed = false; + private DatasetSchema schema; + private ArrayList writers = new ArrayList<>(); + + public Logger(DatasetSchema schema) { + this.schema = schema; + } + + public void checkWriter(T Writer) { + // Checks if a writer is configured correctly for this class + // Question: why is this empty but not an abstract? + } + + public void appendWriter(String name){ + if(name == null || name.isEmpty()){ + throw new IllegalArgumentException("Writer name cannot be empty"); + } + + Writer writer = WritersRegistry.get(name); + if(writer == null){ + throw new IllegalArgumentException("Writer " + name + " is not registered"); + } + + appendWriter(writer); + } + + public void appendWriter(Writer writer){ + if(writer == null){ + throw new IllegalArgumentException("Writer cannot be null"); + } + + checkWriter(writer); + writers.add(writer); + } + +} From 7d0423f727523af0984f3811f2bbd830cbd2a820 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Mon, 17 Oct 2022 16:11:12 -0700 Subject: [PATCH 55/71] Logs a hashmap. Python has a generic object, we need to implement this as well --- .../java/com/whylogs/api/logger/Logger.java | 34 +++++++++++++++++++ .../java/com/whylogs/core/DatasetProfile.java | 4 +-- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/java/core/src/main/java/com/whylogs/api/logger/Logger.java b/java/core/src/main/java/com/whylogs/api/logger/Logger.java index 996c6575b4..1f9d0ff8d4 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/Logger.java +++ b/java/core/src/main/java/com/whylogs/api/logger/Logger.java @@ -1,17 +1,24 @@ package com.whylogs.api.logger; +import com.whylogs.api.logger.resultSets.ProfileResultSet; +import com.whylogs.api.logger.resultSets.ResultSet; import com.whylogs.api.writer.Writer; import com.whylogs.api.writer.WritersRegistry; +import com.whylogs.core.DatasetProfile; import com.whylogs.core.schemas.DatasetSchema; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.NoArgsConstructor; +import lombok.ToString; import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; @NoArgsConstructor @Getter @EqualsAndHashCode +@ToString public abstract class Logger implements AutoCloseable { private boolean isClosed = false; private DatasetSchema schema; @@ -48,4 +55,31 @@ public void appendWriter(Writer writer){ writers.add(writer); } + protected abstract ArrayList getMatchingProfiles(Object data); + protected abstract ArrayList getMatchingProfiles(Map data); + + @Override + public void close(){ + isClosed = true; + } + + public ResultSet log(HashMap data){ + // What type of data is the object? Right now we don't process that in track. + if(isClosed){ + throw new IllegalStateException("Logger is closed"); + } else if(data == null){ + throw new IllegalArgumentException("Data cannot be null"); + } + + // TODO: implement segment processing here + + ArrayList profiles = getMatchingProfiles(data); + for(DatasetProfile profile : profiles){ + profile.track(data); + } + + // Question: Why does this only return the first profile? IS this + // getting ready for multiple profiles later on? + return new ProfileResultSet(profiles.get(0)); + } } diff --git a/java/core/src/main/java/com/whylogs/core/DatasetProfile.java b/java/core/src/main/java/com/whylogs/core/DatasetProfile.java index 2b5e8d1e50..2940f24198 100644 --- a/java/core/src/main/java/com/whylogs/core/DatasetProfile.java +++ b/java/core/src/main/java/com/whylogs/core/DatasetProfile.java @@ -50,7 +50,7 @@ public void addDatasetMetric(String name, Metric metric) { this.metrics.put(name, metric); } - public void track(HashMap row) { + public void track(HashMap row) { try { this.isActive = true; this.trackCount += 1; @@ -60,7 +60,7 @@ public void track(HashMap row) { } } - private void doTrack(HashMap row) { + private void doTrack(HashMap row) { boolean dirty = this.schema.resolve(row); if (dirty) { Set schemaColumnNames = this.schema.getColNames(); From d5c2d40d6420b6827fd6daff97ee5340905db017 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Mon, 17 Oct 2022 16:20:20 -0700 Subject: [PATCH 56/71] Adds TransientLogger --- .../whylogs/api/logger/TransientLogger.java | 44 ++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/java/core/src/main/java/com/whylogs/api/logger/TransientLogger.java b/java/core/src/main/java/com/whylogs/api/logger/TransientLogger.java index a5986a5e19..da84a52946 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/TransientLogger.java +++ b/java/core/src/main/java/com/whylogs/api/logger/TransientLogger.java @@ -1,3 +1,45 @@ package com.whylogs.api.logger; -public class TransientLogger {} +import com.whylogs.core.DatasetProfile; +import com.whylogs.core.schemas.DatasetSchema; +import lombok.*; +import org.apache.commons.lang3.NotImplementedException; + +import java.lang.reflect.Array; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Map; +import java.util.Optional; + +@NoArgsConstructor +@Getter +@EqualsAndHashCode(callSuper = false) +@ToString +public class TransientLogger extends Logger{ + public TransientLogger(DatasetSchema schema) { + super(schema); + } + + @Override + protected ArrayList getMatchingProfiles(Object data) { + // In this case, we don't have any profiles to match against + ArrayList profiles = new ArrayList<>(); + DatasetProfile profile = new DatasetProfile(Optional.ofNullable(getSchema()), Optional.empty(), Optional.empty()); + profiles.add(profile); + return profiles; + } + + @Override + protected ArrayList getMatchingProfiles(Map data) { + // In this case, we don't have any profiles to match against + return getMatchingProfiles((Object) data); + } + + public void flush(){ + throw new NotImplementedException(); + } + + public void close(){ + throw new NotImplementedException(); + } +} From d3e48e4af7c10d6712213b97dc33f3a1a5bfbaac Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Tue, 18 Oct 2022 10:09:13 -0700 Subject: [PATCH 57/71] TimedRolledLogger started and Scheduler worked on. Still needs the timer start and schedule fixed with TimerTask --- .../api/logger/rollingLogger/Scheduler.java | 50 +++++++++++++++++++ .../rollingLogger/TimedRollingLogger.java | 4 ++ 2 files changed, 54 insertions(+) create mode 100644 java/core/src/main/java/com/whylogs/api/logger/rollingLogger/Scheduler.java create mode 100644 java/core/src/main/java/com/whylogs/api/logger/rollingLogger/TimedRollingLogger.java diff --git a/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/Scheduler.java b/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/Scheduler.java new file mode 100644 index 0000000000..d0c101a8be --- /dev/null +++ b/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/Scheduler.java @@ -0,0 +1,50 @@ +package com.whylogs.api.logger.rollingLogger; + +import com.sun.org.apache.xpath.internal.functions.FuncFalse; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; + +import java.util.Timer; + +@Getter +@EqualsAndHashCode +@ToString +public class Scheduler { + // Multithreading schedule. + // Schedule a function to be called repeatedly based on a schedule + + private Timer timer; + private float initial; + private boolean ranInitial = false; + private float interval; + private Runnable func; + private boolean isRunning = false; + // TODO: figure out args an dkwards + + public Scheduler() { + this.start(); + } + + private void run(){ + this.isRunning = false; + this.start(); + this.func.run(); // TODO: figure out args and kwargs + } + + public void start(){ + if (this.isRunning){ + return; + } + + float interval = this.getInterval(); + if(!this.ranInitial){ + interval = this.getInitial(); + this.ranInitial = true; + } + this.isRunning = true; + + this.timer = new Timer(interval, this::run); + this.timer.schedule(this::run, (long) this.initial, (long) this.interval); + } +} diff --git a/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/TimedRollingLogger.java b/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/TimedRollingLogger.java new file mode 100644 index 0000000000..44d4bf693d --- /dev/null +++ b/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/TimedRollingLogger.java @@ -0,0 +1,4 @@ +package com.whylogs.api.logger.rollingLogger; + +public class TimedRollingLogger { +} From e0cd6bd87ac26bc9e43b961a3dd545cae0a5e059 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Tue, 18 Oct 2022 21:25:41 -0700 Subject: [PATCH 58/71] iteration of scheduler - adds in scheduledExecurtor --- .../api/logger/rollingLogger/Scheduler.java | 31 +++++++++++++------ 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/Scheduler.java b/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/Scheduler.java index d0c101a8be..b00b8ba09a 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/Scheduler.java +++ b/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/Scheduler.java @@ -1,11 +1,12 @@ package com.whylogs.api.logger.rollingLogger; -import com.sun.org.apache.xpath.internal.functions.FuncFalse; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.ToString; -import java.util.Timer; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; @Getter @EqualsAndHashCode @@ -13,22 +14,27 @@ public class Scheduler { // Multithreading schedule. // Schedule a function to be called repeatedly based on a schedule - - private Timer timer; + private ScheduledExecutorService scheduledService; private float initial; private boolean ranInitial = false; private float interval; private Runnable func; private boolean isRunning = false; + private String[] args; // TODO: figure out args an dkwards - public Scheduler() { + public Scheduler(float initial, float interval, Runnable func, String[] args) { + this.initial = initial; + this.interval = interval; + this.func = func; + this.args = args; this.start(); } private void run(){ + // TODO: Looking at this I think this is wrong to have lines 35 & 36 this.isRunning = false; - this.start(); + this.start(); // Question: why do we need to start again? this.func.run(); // TODO: figure out args and kwargs } @@ -37,14 +43,19 @@ public void start(){ return; } - float interval = this.getInterval(); + float initial = 0; if(!this.ranInitial){ - interval = this.getInitial(); + initial = this.getInitial(); this.ranInitial = true; } + + this.scheduledService = Executors.newSingleThreadScheduledExecutor(); + this.scheduledService.scheduleAtFixedRate(this::run, (long) initial, (long) this.interval, TimeUnit.SECONDS); this.isRunning = true; + } - this.timer = new Timer(interval, this::run); - this.timer.schedule(this::run, (long) this.initial, (long) this.interval); + public void stop(){ + this.scheduledService.shutdown(); + this.isRunning = false; } } From 10bb8b841bb138c8e2f938556524f9bfa49ca708 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Tue, 18 Oct 2022 21:53:54 -0700 Subject: [PATCH 59/71] basis for the TimedRollingLogger, need to think though the flush and how we want to do it in java --- .../rollingLogger/TimedRollingLogger.java | 175 +++++++++++++++++- 1 file changed, 174 insertions(+), 1 deletion(-) diff --git a/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/TimedRollingLogger.java b/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/TimedRollingLogger.java index 44d4bf693d..3887156801 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/TimedRollingLogger.java +++ b/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/TimedRollingLogger.java @@ -1,4 +1,177 @@ package com.whylogs.api.logger.rollingLogger; -public class TimedRollingLogger { +import com.whylogs.api.logger.Logger; +import com.whylogs.api.writer.Writer; +import com.whylogs.core.DatasetProfile; +import com.whylogs.core.schemas.DatasetSchema; + +import java.time.Instant; +import java.util.ArrayList; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.Callable; + +public class TimedRollingLogger extends Logger implements AutoCloseable{ + // A rolling logger that continously rotates files based on time + + private DatasetSchema schema; + private String baseName; + private String fileExtension; + private int interval; + private Character when = 'H'; // TODO: Make the Literals of S M H D + private boolean utc = false; + private boolean align = true; + private boolean fork = false; + private boolean skipEmpty = false; + private String suffix; + + private DatasetProfile currentProfile; + private Callable callback; // TODO: this isn't the write signatture + private Scheduler scheduler; + private int currentBatchTimestamp; + + + + // TODO: callback: Optional[Callable[[Writer, DatasetProfileView, str], None]] + + public TimedRollingLogger(DatasetSchema schema, String baseName, String fileExtension, int interval) { + this(schema, baseName, fileExtension, interval, 'H', false, true, false, false); + } + + public TimedRollingLogger(DatasetSchema schema, String baseName, String fileExtension, int interval, Character when, boolean utc, boolean align, boolean fork, boolean skipEmpty) { + super(schema); + + this.schema = schema; + this.baseName = baseName; + this.fileExtension = fileExtension; + this.interval = interval; + this.when = Character.toUpperCase(when); + this.utc = utc; + this.align = align; + this.fork = fork; + this.skipEmpty = skipEmpty; + + if(this.baseName == null || this.baseName.isEmpty()) { + this.baseName = "profile"; + } + if(this.fileExtension == null || this.fileExtension.isEmpty()) { + this.fileExtension = ".bin"; // TODO: should we make this .whylogs? + } + + switch(this.when) { + case 'S': + this.interval = 1; // one second + this.suffix = "%Y-%m-%d_%H-%M-%S"; + break; + case 'M': + this.interval = 60; // one minute + this.suffix = "%Y-%m-%d_%H-%M"; + break; + case 'H': + this.interval = 60 * 60; // one hour + this.suffix = "%Y-%m-%d_%H"; + break; + case 'D': + this.interval = 60 * 60 * 24; // one day + this.suffix = "%Y-%m-%d"; + break; + default: + throw new IllegalArgumentException("Invalid value for when: " + this.when); + } + + this.interval = this.interval * interval; /// multiply by units requested + this.utc = utc; + + Instant currentTime = Instant.now(); + this.currentBatchTimestamp = this.computeCurrentBatchTimestamp(currentTime.getEpochSecond()); + this.currentProfile = new DatasetProfile(Optional.ofNullable(schema), Optional.of(currentTime), Optional.of(currentTime)); + int initialRunAfter = (this.currentBatchTimestamp + this.interval) - (int) currentTime.getEpochSecond(); + if(initialRunAfter < 0) { + // TODO: Add logging error as this shouldn't happen + initialRunAfter = this.interval; + } + + this.scheduler = new Scheduler(initialRunAfter, this.interval, this::doRollover, null); + this.scheduler.start(); + + // autoclosable closes at end + } + + private int computeCurrentBatchTimestamp(long nowEpoch) { + int roundedNow = (int) nowEpoch; + if(this.align){ + return ((int) Math.floorDiv((roundedNow - 1), this.interval)) * this.interval + this.interval; + } + return roundedNow; + } + + public void checkWriter(Writer writer){ + writer.check_interval(this.interval); + } + + @Override + protected ArrayList getMatchingProfiles(Object data) { + return null; + } + + @Override + protected ArrayList getMatchingProfiles(Map data) { + return null; + } + + private void doRollover() { + if(this.isClosed()) { + return; + } + + DatasetProfile oldProfile = this.currentProfile; + Instant currentTime = Instant.now(); + this.currentBatchTimestamp = this.computeCurrentBatchTimestamp(currentTime.getEpochSecond()); + this.currentProfile = new DatasetProfile(Optional.ofNullable(schema), Optional.of(currentTime), Optional.of(currentTime)); + + this.flush(oldProfile); + } + + private void flush(DatasetProfile profile) { + if (profile == null) { + return; + } else if (this.skipEmpty && profile.isEmpty()) { + // set logger logger.debug("skip_empty is set. Skipping empty profiles") + return; + } + + + // TODO: let's go ahead and rethink this whole section + int pid = 0; + if(this.fork) { + pid = 0; // TODO: get pid + } + + if(pid > 0) { + // TODO: document with logger + } else { + if(this.fork) { + // TODO: document with logger + } else { + // TODO: document with logger + } + + if(this.utc){ + // TODO: figure out timeTuple of python time.getTime + } else { + // + } + // writers + } + + } + + public void close() { + // TODO log that we are closing the writer + if(!this.isClosed()) { + // Autoclose handles the isCLosed() + this.scheduler.stop(); + this.flush(this.currentProfile); + } + } } From ec273019594bab237cbb2356b32b11706da68e67 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Thu, 27 Oct 2022 11:31:51 -0700 Subject: [PATCH 60/71] Finishes TransientLogger after testing and debugging. TimedRollingLogger will need to be tested after Writers are done. --- .../java/com/whylogs/api/logger/Logger.java | 8 +-- .../whylogs/api/logger/TransientLogger.java | 11 --- .../rollingLogger/TimedRollingLogger.java | 67 ++++++++----------- .../whylogs/core/schemas/DatasetSchema.java | 1 + .../com/whylogs/api/logger/TestLogger.java | 38 +++++++++++ .../whylogs/api/logger/TestRollingLogger.java | 31 +++++++++ 6 files changed, 102 insertions(+), 54 deletions(-) create mode 100644 java/core/src/test/java/com/whylogs/api/logger/TestLogger.java create mode 100644 java/core/src/test/java/com/whylogs/api/logger/TestRollingLogger.java diff --git a/java/core/src/main/java/com/whylogs/api/logger/Logger.java b/java/core/src/main/java/com/whylogs/api/logger/Logger.java index 1f9d0ff8d4..f65712d721 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/Logger.java +++ b/java/core/src/main/java/com/whylogs/api/logger/Logger.java @@ -1,7 +1,7 @@ package com.whylogs.api.logger; -import com.whylogs.api.logger.resultSets.ProfileResultSet; -import com.whylogs.api.logger.resultSets.ResultSet; +import com.whylogs.api.logger.resultsets.ProfileResultSet; +import com.whylogs.api.logger.resultsets.ResultSet; import com.whylogs.api.writer.Writer; import com.whylogs.api.writer.WritersRegistry; import com.whylogs.core.DatasetProfile; @@ -21,7 +21,7 @@ @ToString public abstract class Logger implements AutoCloseable { private boolean isClosed = false; - private DatasetSchema schema; + private DatasetSchema schema = new DatasetSchema(); private ArrayList writers = new ArrayList<>(); public Logger(DatasetSchema schema) { @@ -63,7 +63,7 @@ public void close(){ isClosed = true; } - public ResultSet log(HashMap data){ + public ResultSet log(HashMap data){ // What type of data is the object? Right now we don't process that in track. if(isClosed){ throw new IllegalStateException("Logger is closed"); diff --git a/java/core/src/main/java/com/whylogs/api/logger/TransientLogger.java b/java/core/src/main/java/com/whylogs/api/logger/TransientLogger.java index 86319e038d..d00044cb3a 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/TransientLogger.java +++ b/java/core/src/main/java/com/whylogs/api/logger/TransientLogger.java @@ -5,11 +5,8 @@ import lombok.*; import org.apache.commons.lang3.NotImplementedException; -import java.lang.reflect.Array; import java.util.ArrayList; -import java.util.Collection; import java.util.Map; -import java.util.Optional; @NoArgsConstructor @Getter @@ -34,12 +31,4 @@ protected ArrayList getMatchingProfiles(Map data) // In this case, we don't have any profiles to match against return getMatchingProfiles((Object) data); } - - public void flush(){ - throw new NotImplementedException(); - } - - public void close(){ - throw new NotImplementedException(); - } } diff --git a/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/TimedRollingLogger.java b/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/TimedRollingLogger.java index 3887156801..d9ab23b822 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/TimedRollingLogger.java +++ b/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/TimedRollingLogger.java @@ -4,7 +4,10 @@ import com.whylogs.api.writer.Writer; import com.whylogs.core.DatasetProfile; import com.whylogs.core.schemas.DatasetSchema; +import com.whylogs.core.views.DatasetProfileView; +import org.apache.commons.lang3.NotImplementedException; +import java.lang.reflect.Array; import java.time.Instant; import java.util.ArrayList; import java.util.Map; @@ -12,8 +15,7 @@ import java.util.concurrent.Callable; public class TimedRollingLogger extends Logger implements AutoCloseable{ - // A rolling logger that continously rotates files based on time - + // A rolling logger that continuously rotates files based on time private DatasetSchema schema; private String baseName; private String fileExtension; @@ -21,24 +23,24 @@ public class TimedRollingLogger extends Logger implements AutoCloseable{ private Character when = 'H'; // TODO: Make the Literals of S M H D private boolean utc = false; private boolean align = true; - private boolean fork = false; private boolean skipEmpty = false; private String suffix; private DatasetProfile currentProfile; - private Callable callback; // TODO: this isn't the write signatture + private Callable callback; // TODO: this isn't the write signature private Scheduler scheduler; private int currentBatchTimestamp; - - // TODO: callback: Optional[Callable[[Writer, DatasetProfileView, str], None]] - public TimedRollingLogger(DatasetSchema schema, String baseName, String fileExtension, int interval) { - this(schema, baseName, fileExtension, interval, 'H', false, true, false, false); + this(schema, baseName, fileExtension, interval, 'H', false, true, false); + } + + public TimedRollingLogger(DatasetSchema schema, String baseName, String fileExtension, int interval, Character when) { + this(schema, baseName, fileExtension, interval, when, false, true, false); } - public TimedRollingLogger(DatasetSchema schema, String baseName, String fileExtension, int interval, Character when, boolean utc, boolean align, boolean fork, boolean skipEmpty) { + public TimedRollingLogger(DatasetSchema schema, String baseName, String fileExtension, int interval, Character when, boolean utc, boolean align, boolean skipEmpty) { super(schema); this.schema = schema; @@ -48,7 +50,6 @@ public TimedRollingLogger(DatasetSchema schema, String baseName, String fileExte this.when = Character.toUpperCase(when); this.utc = utc; this.align = align; - this.fork = fork; this.skipEmpty = skipEmpty; if(this.baseName == null || this.baseName.isEmpty()) { @@ -84,7 +85,7 @@ public TimedRollingLogger(DatasetSchema schema, String baseName, String fileExte Instant currentTime = Instant.now(); this.currentBatchTimestamp = this.computeCurrentBatchTimestamp(currentTime.getEpochSecond()); - this.currentProfile = new DatasetProfile(Optional.ofNullable(schema), Optional.of(currentTime), Optional.of(currentTime)); + this.currentProfile = new DatasetProfile(schema, currentTime, currentTime); int initialRunAfter = (this.currentBatchTimestamp + this.interval) - (int) currentTime.getEpochSecond(); if(initialRunAfter < 0) { // TODO: Add logging error as this shouldn't happen @@ -94,13 +95,13 @@ public TimedRollingLogger(DatasetSchema schema, String baseName, String fileExte this.scheduler = new Scheduler(initialRunAfter, this.interval, this::doRollover, null); this.scheduler.start(); - // autoclosable closes at end + // autocloseable closes at end } private int computeCurrentBatchTimestamp(long nowEpoch) { int roundedNow = (int) nowEpoch; if(this.align){ - return ((int) Math.floorDiv((roundedNow - 1), this.interval)) * this.interval + this.interval; + return (Math.floorDiv((roundedNow - 1), this.interval)) * this.interval + this.interval; } return roundedNow; } @@ -109,14 +110,20 @@ public void checkWriter(Writer writer){ writer.check_interval(this.interval); } + private ArrayList getMatchingProfiles(){ + ArrayList matchingProfiles = new ArrayList<>(); + matchingProfiles.add(this.currentProfile); + return matchingProfiles; + } + @Override protected ArrayList getMatchingProfiles(Object data) { - return null; + return this.getMatchingProfiles(); } @Override protected ArrayList getMatchingProfiles(Map data) { - return null; + return this.getMatchingProfiles(); } private void doRollover() { @@ -127,7 +134,7 @@ private void doRollover() { DatasetProfile oldProfile = this.currentProfile; Instant currentTime = Instant.now(); this.currentBatchTimestamp = this.computeCurrentBatchTimestamp(currentTime.getEpochSecond()); - this.currentProfile = new DatasetProfile(Optional.ofNullable(schema), Optional.of(currentTime), Optional.of(currentTime)); + this.currentProfile = new DatasetProfile(schema, currentTime, currentTime); this.flush(oldProfile); } @@ -140,30 +147,12 @@ private void flush(DatasetProfile profile) { return; } + // get time to get name + String timedFileName = this.baseName + "_" + this.currentBatchTimestamp + this.fileExtension; - // TODO: let's go ahead and rethink this whole section - int pid = 0; - if(this.fork) { - pid = 0; // TODO: get pid - } - - if(pid > 0) { - // TODO: document with logger - } else { - if(this.fork) { - // TODO: document with logger - } else { - // TODO: document with logger - } - - if(this.utc){ - // TODO: figure out timeTuple of python time.getTime - } else { - // - } - // writers - } - + // Sleep while the profile is active? + // TODO: this is where we call the store list.write + // TODO: go through through the writers } public void close() { diff --git a/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java b/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java index 7da0165b2c..37774e2cc5 100644 --- a/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java +++ b/java/core/src/main/java/com/whylogs/core/schemas/DatasetSchema.java @@ -23,6 +23,7 @@ public DatasetSchema() { this(Optional.empty(), Optional.empty()); } + // TODO: Use overloading instead of optionals public DatasetSchema(Optional defaultConfig, Optional resolver) { this.columns = new HashMap<>(); this.defaultConfig = defaultConfig.orElse(new MetricConfig()); diff --git a/java/core/src/test/java/com/whylogs/api/logger/TestLogger.java b/java/core/src/test/java/com/whylogs/api/logger/TestLogger.java new file mode 100644 index 0000000000..2ee98fce3b --- /dev/null +++ b/java/core/src/test/java/com/whylogs/api/logger/TestLogger.java @@ -0,0 +1,38 @@ +package com.whylogs.api.logger; + +import com.whylogs.api.logger.resultsets.ResultSet; +import com.whylogs.core.DatasetProfile; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.util.HashMap; + +@Test +public class TestLogger { + + @Test + public void testBasicLog(){ + HashMap data = createBasicLog(); + + ResultSet results = new TransientLogger().log(data); + Assert.assertNotNull(results); + + DatasetProfile profile = results.profile().get(); + Assert.assertNotNull(profile); + + //profile.getColumns().get("col1"); + Assert.assertEquals(profile.getColumns().get("col1").getMetrics().size(), 1); + Assert.assertEquals(profile.getColumns().get("col1").getSuccessCount(), 1); + Assert.assertEquals(profile.getColumns().get("col1").getSchema().getType(), Integer.class); + + } + + private HashMap createBasicLog() { + HashMap data = new HashMap<>(); + data.put("col1", 2); + data.put("col2", 3); + data.put("col3", 100); + + return data; + } +} diff --git a/java/core/src/test/java/com/whylogs/api/logger/TestRollingLogger.java b/java/core/src/test/java/com/whylogs/api/logger/TestRollingLogger.java new file mode 100644 index 0000000000..aec3eda1fc --- /dev/null +++ b/java/core/src/test/java/com/whylogs/api/logger/TestRollingLogger.java @@ -0,0 +1,31 @@ +package com.whylogs.api.logger; + +import com.whylogs.api.logger.rollingLogger.TimedRollingLogger; +import com.whylogs.core.DatasetProfile; +import com.whylogs.core.schemas.DatasetSchema; +import com.whylogs.core.views.DatasetProfileView; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.util.HashMap; + +@Test +public class TestRollingLogger { + + @Test + public void testClosing(){ + HashMap data = createBasicLog(); + TimedRollingLogger logger = new TimedRollingLogger(new DatasetSchema(), "test", ".bin", 1, 'M'); + logger.log(data); + // TODO: testing needs the writer + } + + private HashMap createBasicLog() { + HashMap data = new HashMap<>(); + data.put("col1", 2); + data.put("col2", 3); + data.put("col3", 100); + + return data; + } +} From f36a1d3cbd1ffa97d7a4a38747d0c634aff63c38 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Thu, 27 Oct 2022 11:33:04 -0700 Subject: [PATCH 61/71] Renames folder to match google style guide and linter --- .../java/com/whylogs/api/logger/Logger.java | 104 +++---- .../whylogs/api/logger/TransientLogger.java | 38 ++- .../logger/resultsets/ProfileResultSet.java | 2 +- .../api/logger/rollingLogger/Scheduler.java | 90 +++--- .../rollingLogger/TimedRollingLogger.java | 276 +++++++++--------- .../java/com/whylogs/api/writer/Writable.java | 26 +- .../java/com/whylogs/api/writer/Writer.java | 19 +- .../whylogs/api/writer/WritersRegistry.java | 8 +- .../com/whylogs/api/logger/TestLogger.java | 42 ++- .../whylogs/api/logger/TestRollingLogger.java | 34 +-- 10 files changed, 319 insertions(+), 320 deletions(-) diff --git a/java/core/src/main/java/com/whylogs/api/logger/Logger.java b/java/core/src/main/java/com/whylogs/api/logger/Logger.java index f65712d721..44263748f3 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/Logger.java +++ b/java/core/src/main/java/com/whylogs/api/logger/Logger.java @@ -6,80 +6,80 @@ import com.whylogs.api.writer.WritersRegistry; import com.whylogs.core.DatasetProfile; import com.whylogs.core.schemas.DatasetSchema; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.NoArgsConstructor; import lombok.ToString; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Map; - @NoArgsConstructor @Getter @EqualsAndHashCode @ToString public abstract class Logger implements AutoCloseable { - private boolean isClosed = false; - private DatasetSchema schema = new DatasetSchema(); - private ArrayList writers = new ArrayList<>(); - - public Logger(DatasetSchema schema) { - this.schema = schema; + private boolean isClosed = false; + private DatasetSchema schema = new DatasetSchema(); + private ArrayList writers = new ArrayList<>(); + + public Logger(DatasetSchema schema) { + this.schema = schema; + } + + public void checkWriter(T Writer) { + // Checks if a writer is configured correctly for this class + // Question: why is this empty but not an abstract? + } + + public void appendWriter(String name) { + if (name == null || name.isEmpty()) { + throw new IllegalArgumentException("Writer name cannot be empty"); } - public void checkWriter(T Writer) { - // Checks if a writer is configured correctly for this class - // Question: why is this empty but not an abstract? + Writer writer = WritersRegistry.get(name); + if (writer == null) { + throw new IllegalArgumentException("Writer " + name + " is not registered"); } - public void appendWriter(String name){ - if(name == null || name.isEmpty()){ - throw new IllegalArgumentException("Writer name cannot be empty"); - } - - Writer writer = WritersRegistry.get(name); - if(writer == null){ - throw new IllegalArgumentException("Writer " + name + " is not registered"); - } + appendWriter(writer); + } - appendWriter(writer); + public void appendWriter(Writer writer) { + if (writer == null) { + throw new IllegalArgumentException("Writer cannot be null"); } - public void appendWriter(Writer writer){ - if(writer == null){ - throw new IllegalArgumentException("Writer cannot be null"); - } + checkWriter(writer); + writers.add(writer); + } - checkWriter(writer); - writers.add(writer); - } + protected abstract ArrayList getMatchingProfiles(Object data); - protected abstract ArrayList getMatchingProfiles(Object data); - protected abstract ArrayList getMatchingProfiles(Map data); + protected abstract ArrayList getMatchingProfiles(Map data); - @Override - public void close(){ - isClosed = true; - } + @Override + public void close() { + isClosed = true; + } - public ResultSet log(HashMap data){ - // What type of data is the object? Right now we don't process that in track. - if(isClosed){ - throw new IllegalStateException("Logger is closed"); - } else if(data == null){ - throw new IllegalArgumentException("Data cannot be null"); - } - - // TODO: implement segment processing here + public ResultSet log(HashMap data) { + // What type of data is the object? Right now we don't process that in track. + if (isClosed) { + throw new IllegalStateException("Logger is closed"); + } else if (data == null) { + throw new IllegalArgumentException("Data cannot be null"); + } - ArrayList profiles = getMatchingProfiles(data); - for(DatasetProfile profile : profiles){ - profile.track(data); - } + // TODO: implement segment processing here - // Question: Why does this only return the first profile? IS this - // getting ready for multiple profiles later on? - return new ProfileResultSet(profiles.get(0)); + ArrayList profiles = getMatchingProfiles(data); + for (DatasetProfile profile : profiles) { + profile.track(data); } + + // Question: Why does this only return the first profile? IS this + // getting ready for multiple profiles later on? + return new ProfileResultSet(profiles.get(0)); + } } diff --git a/java/core/src/main/java/com/whylogs/api/logger/TransientLogger.java b/java/core/src/main/java/com/whylogs/api/logger/TransientLogger.java index d00044cb3a..77c9645110 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/TransientLogger.java +++ b/java/core/src/main/java/com/whylogs/api/logger/TransientLogger.java @@ -2,33 +2,31 @@ import com.whylogs.core.DatasetProfile; import com.whylogs.core.schemas.DatasetSchema; -import lombok.*; -import org.apache.commons.lang3.NotImplementedException; - import java.util.ArrayList; import java.util.Map; +import lombok.*; @NoArgsConstructor @Getter @EqualsAndHashCode(callSuper = false) @ToString -public class TransientLogger extends Logger{ - public TransientLogger(DatasetSchema schema) { - super(schema); - } +public class TransientLogger extends Logger { + public TransientLogger(DatasetSchema schema) { + super(schema); + } - @Override - protected ArrayList getMatchingProfiles(Object data) { - // In this case, we don't have any profiles to match against - ArrayList profiles = new ArrayList<>(); - DatasetProfile profile = new DatasetProfile(getSchema()); - profiles.add(profile); - return profiles; - } + @Override + protected ArrayList getMatchingProfiles(Object data) { + // In this case, we don't have any profiles to match against + ArrayList profiles = new ArrayList<>(); + DatasetProfile profile = new DatasetProfile(getSchema()); + profiles.add(profile); + return profiles; + } - @Override - protected ArrayList getMatchingProfiles(Map data) { - // In this case, we don't have any profiles to match against - return getMatchingProfiles((Object) data); - } + @Override + protected ArrayList getMatchingProfiles(Map data) { + // In this case, we don't have any profiles to match against + return getMatchingProfiles((Object) data); + } } diff --git a/java/core/src/main/java/com/whylogs/api/logger/resultsets/ProfileResultSet.java b/java/core/src/main/java/com/whylogs/api/logger/resultsets/ProfileResultSet.java index ead1df320e..fe9649aaf2 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/resultsets/ProfileResultSet.java +++ b/java/core/src/main/java/com/whylogs/api/logger/resultsets/ProfileResultSet.java @@ -12,7 +12,7 @@ public class ProfileResultSet extends ResultSet { @NonNull private final DatasetProfile profile; - public ProfileResultSet(DatasetProfile profile) { + public ProfileResultSet(@NonNull DatasetProfile profile) { super(); this.profile = profile; } diff --git a/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/Scheduler.java b/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/Scheduler.java index b00b8ba09a..6f3878340a 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/Scheduler.java +++ b/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/Scheduler.java @@ -1,61 +1,61 @@ package com.whylogs.api.logger.rollingLogger; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.ToString; - import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; @Getter @EqualsAndHashCode @ToString public class Scheduler { - // Multithreading schedule. - // Schedule a function to be called repeatedly based on a schedule - private ScheduledExecutorService scheduledService; - private float initial; - private boolean ranInitial = false; - private float interval; - private Runnable func; - private boolean isRunning = false; - private String[] args; - // TODO: figure out args an dkwards - - public Scheduler(float initial, float interval, Runnable func, String[] args) { - this.initial = initial; - this.interval = interval; - this.func = func; - this.args = args; - this.start(); + // Multithreading schedule. + // Schedule a function to be called repeatedly based on a schedule + private ScheduledExecutorService scheduledService; + private float initial; + private boolean ranInitial = false; + private float interval; + private Runnable func; + private boolean isRunning = false; + private String[] args; + // TODO: figure out args an dkwards + + public Scheduler(float initial, float interval, Runnable func, String[] args) { + this.initial = initial; + this.interval = interval; + this.func = func; + this.args = args; + this.start(); + } + + private void run() { + // TODO: Looking at this I think this is wrong to have lines 35 & 36 + this.isRunning = false; + this.start(); // Question: why do we need to start again? + this.func.run(); // TODO: figure out args and kwargs + } + + public void start() { + if (this.isRunning) { + return; } - private void run(){ - // TODO: Looking at this I think this is wrong to have lines 35 & 36 - this.isRunning = false; - this.start(); // Question: why do we need to start again? - this.func.run(); // TODO: figure out args and kwargs + float initial = 0; + if (!this.ranInitial) { + initial = this.getInitial(); + this.ranInitial = true; } - public void start(){ - if (this.isRunning){ - return; - } - - float initial = 0; - if(!this.ranInitial){ - initial = this.getInitial(); - this.ranInitial = true; - } + this.scheduledService = Executors.newSingleThreadScheduledExecutor(); + this.scheduledService.scheduleAtFixedRate( + this::run, (long) initial, (long) this.interval, TimeUnit.SECONDS); + this.isRunning = true; + } - this.scheduledService = Executors.newSingleThreadScheduledExecutor(); - this.scheduledService.scheduleAtFixedRate(this::run, (long) initial, (long) this.interval, TimeUnit.SECONDS); - this.isRunning = true; - } - - public void stop(){ - this.scheduledService.shutdown(); - this.isRunning = false; - } + public void stop() { + this.scheduledService.shutdown(); + this.isRunning = false; + } } diff --git a/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/TimedRollingLogger.java b/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/TimedRollingLogger.java index d9ab23b822..5b2618b32e 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/TimedRollingLogger.java +++ b/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/TimedRollingLogger.java @@ -4,163 +4,169 @@ import com.whylogs.api.writer.Writer; import com.whylogs.core.DatasetProfile; import com.whylogs.core.schemas.DatasetSchema; -import com.whylogs.core.views.DatasetProfileView; -import org.apache.commons.lang3.NotImplementedException; - -import java.lang.reflect.Array; import java.time.Instant; import java.util.ArrayList; import java.util.Map; -import java.util.Optional; import java.util.concurrent.Callable; -public class TimedRollingLogger extends Logger implements AutoCloseable{ - // A rolling logger that continuously rotates files based on time - private DatasetSchema schema; - private String baseName; - private String fileExtension; - private int interval; - private Character when = 'H'; // TODO: Make the Literals of S M H D - private boolean utc = false; - private boolean align = true; - private boolean skipEmpty = false; - private String suffix; - - private DatasetProfile currentProfile; - private Callable callback; // TODO: this isn't the write signature - private Scheduler scheduler; - private int currentBatchTimestamp; - - // TODO: callback: Optional[Callable[[Writer, DatasetProfileView, str], None]] - public TimedRollingLogger(DatasetSchema schema, String baseName, String fileExtension, int interval) { - this(schema, baseName, fileExtension, interval, 'H', false, true, false); +public class TimedRollingLogger extends Logger implements AutoCloseable { + // A rolling logger that continuously rotates files based on time + private DatasetSchema schema; + private String baseName; + private String fileExtension; + private int interval; + private Character when = 'H'; // TODO: Make the Literals of S M H D + private boolean utc = false; + private boolean align = true; + private boolean skipEmpty = false; + private String suffix; + + private DatasetProfile currentProfile; + private Callable callback; // TODO: this isn't the write signature + private Scheduler scheduler; + private int currentBatchTimestamp; + + // TODO: callback: Optional[Callable[[Writer, DatasetProfileView, str], None]] + public TimedRollingLogger( + DatasetSchema schema, String baseName, String fileExtension, int interval) { + this(schema, baseName, fileExtension, interval, 'H', false, true, false); + } + + public TimedRollingLogger( + DatasetSchema schema, String baseName, String fileExtension, int interval, Character when) { + this(schema, baseName, fileExtension, interval, when, false, true, false); + } + + public TimedRollingLogger( + DatasetSchema schema, + String baseName, + String fileExtension, + int interval, + Character when, + boolean utc, + boolean align, + boolean skipEmpty) { + super(schema); + + this.schema = schema; + this.baseName = baseName; + this.fileExtension = fileExtension; + this.interval = interval; + this.when = Character.toUpperCase(when); + this.utc = utc; + this.align = align; + this.skipEmpty = skipEmpty; + + if (this.baseName == null || this.baseName.isEmpty()) { + this.baseName = "profile"; } - - public TimedRollingLogger(DatasetSchema schema, String baseName, String fileExtension, int interval, Character when) { - this(schema, baseName, fileExtension, interval, when, false, true, false); + if (this.fileExtension == null || this.fileExtension.isEmpty()) { + this.fileExtension = ".bin"; // TODO: should we make this .whylogs? } - public TimedRollingLogger(DatasetSchema schema, String baseName, String fileExtension, int interval, Character when, boolean utc, boolean align, boolean skipEmpty) { - super(schema); - - this.schema = schema; - this.baseName = baseName; - this.fileExtension = fileExtension; - this.interval = interval; - this.when = Character.toUpperCase(when); - this.utc = utc; - this.align = align; - this.skipEmpty = skipEmpty; - - if(this.baseName == null || this.baseName.isEmpty()) { - this.baseName = "profile"; - } - if(this.fileExtension == null || this.fileExtension.isEmpty()) { - this.fileExtension = ".bin"; // TODO: should we make this .whylogs? - } - - switch(this.when) { - case 'S': - this.interval = 1; // one second - this.suffix = "%Y-%m-%d_%H-%M-%S"; - break; - case 'M': - this.interval = 60; // one minute - this.suffix = "%Y-%m-%d_%H-%M"; - break; - case 'H': - this.interval = 60 * 60; // one hour - this.suffix = "%Y-%m-%d_%H"; - break; - case 'D': - this.interval = 60 * 60 * 24; // one day - this.suffix = "%Y-%m-%d"; - break; - default: - throw new IllegalArgumentException("Invalid value for when: " + this.when); - } - - this.interval = this.interval * interval; /// multiply by units requested - this.utc = utc; - - Instant currentTime = Instant.now(); - this.currentBatchTimestamp = this.computeCurrentBatchTimestamp(currentTime.getEpochSecond()); - this.currentProfile = new DatasetProfile(schema, currentTime, currentTime); - int initialRunAfter = (this.currentBatchTimestamp + this.interval) - (int) currentTime.getEpochSecond(); - if(initialRunAfter < 0) { - // TODO: Add logging error as this shouldn't happen - initialRunAfter = this.interval; - } - - this.scheduler = new Scheduler(initialRunAfter, this.interval, this::doRollover, null); - this.scheduler.start(); - - // autocloseable closes at end + switch (this.when) { + case 'S': + this.interval = 1; // one second + this.suffix = "%Y-%m-%d_%H-%M-%S"; + break; + case 'M': + this.interval = 60; // one minute + this.suffix = "%Y-%m-%d_%H-%M"; + break; + case 'H': + this.interval = 60 * 60; // one hour + this.suffix = "%Y-%m-%d_%H"; + break; + case 'D': + this.interval = 60 * 60 * 24; // one day + this.suffix = "%Y-%m-%d"; + break; + default: + throw new IllegalArgumentException("Invalid value for when: " + this.when); } - private int computeCurrentBatchTimestamp(long nowEpoch) { - int roundedNow = (int) nowEpoch; - if(this.align){ - return (Math.floorDiv((roundedNow - 1), this.interval)) * this.interval + this.interval; - } - return roundedNow; + this.interval = this.interval * interval; // / multiply by units requested + this.utc = utc; + + Instant currentTime = Instant.now(); + this.currentBatchTimestamp = this.computeCurrentBatchTimestamp(currentTime.getEpochSecond()); + this.currentProfile = new DatasetProfile(schema, currentTime, currentTime); + int initialRunAfter = + (this.currentBatchTimestamp + this.interval) - (int) currentTime.getEpochSecond(); + if (initialRunAfter < 0) { + // TODO: Add logging error as this shouldn't happen + initialRunAfter = this.interval; } - public void checkWriter(Writer writer){ - writer.check_interval(this.interval); - } + this.scheduler = new Scheduler(initialRunAfter, this.interval, this::doRollover, null); + this.scheduler.start(); - private ArrayList getMatchingProfiles(){ - ArrayList matchingProfiles = new ArrayList<>(); - matchingProfiles.add(this.currentProfile); - return matchingProfiles; - } + // autocloseable closes at end + } - @Override - protected ArrayList getMatchingProfiles(Object data) { - return this.getMatchingProfiles(); + private int computeCurrentBatchTimestamp(long nowEpoch) { + int roundedNow = (int) nowEpoch; + if (this.align) { + return (Math.floorDiv((roundedNow - 1), this.interval)) * this.interval + this.interval; } - - @Override - protected ArrayList getMatchingProfiles(Map data) { - return this.getMatchingProfiles(); + return roundedNow; + } + + public void checkWriter(Writer writer) { + writer.check_interval(this.interval); + } + + private ArrayList getMatchingProfiles() { + ArrayList matchingProfiles = new ArrayList<>(); + matchingProfiles.add(this.currentProfile); + return matchingProfiles; + } + + @Override + protected ArrayList getMatchingProfiles(Object data) { + return this.getMatchingProfiles(); + } + + @Override + protected ArrayList getMatchingProfiles(Map data) { + return this.getMatchingProfiles(); + } + + private void doRollover() { + if (this.isClosed()) { + return; } - private void doRollover() { - if(this.isClosed()) { - return; - } + DatasetProfile oldProfile = this.currentProfile; + Instant currentTime = Instant.now(); + this.currentBatchTimestamp = this.computeCurrentBatchTimestamp(currentTime.getEpochSecond()); + this.currentProfile = new DatasetProfile(schema, currentTime, currentTime); - DatasetProfile oldProfile = this.currentProfile; - Instant currentTime = Instant.now(); - this.currentBatchTimestamp = this.computeCurrentBatchTimestamp(currentTime.getEpochSecond()); - this.currentProfile = new DatasetProfile(schema, currentTime, currentTime); + this.flush(oldProfile); + } - this.flush(oldProfile); + private void flush(DatasetProfile profile) { + if (profile == null) { + return; + } else if (this.skipEmpty && profile.isEmpty()) { + // set logger logger.debug("skip_empty is set. Skipping empty profiles") + return; } - private void flush(DatasetProfile profile) { - if (profile == null) { - return; - } else if (this.skipEmpty && profile.isEmpty()) { - // set logger logger.debug("skip_empty is set. Skipping empty profiles") - return; - } - - // get time to get name - String timedFileName = this.baseName + "_" + this.currentBatchTimestamp + this.fileExtension; + // get time to get name + String timedFileName = this.baseName + "_" + this.currentBatchTimestamp + this.fileExtension; - // Sleep while the profile is active? - // TODO: this is where we call the store list.write - // TODO: go through through the writers - } + // Sleep while the profile is active? + // TODO: this is where we call the store list.write + // TODO: go through through the writers + } - public void close() { - // TODO log that we are closing the writer - if(!this.isClosed()) { - // Autoclose handles the isCLosed() - this.scheduler.stop(); - this.flush(this.currentProfile); - } + public void close() { + // TODO log that we are closing the writer + if (!this.isClosed()) { + // Autoclose handles the isCLosed() + this.scheduler.stop(); + this.flush(this.currentProfile); } + } } diff --git a/java/core/src/main/java/com/whylogs/api/writer/Writable.java b/java/core/src/main/java/com/whylogs/api/writer/Writable.java index ba9651ebf7..744d609f82 100644 --- a/java/core/src/main/java/com/whylogs/api/writer/Writable.java +++ b/java/core/src/main/java/com/whylogs/api/writer/Writable.java @@ -5,18 +5,18 @@ public interface Writable { - static FileWriter safeOpenWrite(String path) { - // Open 'path' for writing, creating any parent directories as needed - File file = new File(path); - FileWriter writer = null; - try { - writer = new FileWriter(file, true); - } catch (Exception e) { - System.out.println("Error: " + e); - e.printStackTrace(); - } - - // this close happens latter on - return writer; + static FileWriter safeOpenWrite(String path) { + // Open 'path' for writing, creating any parent directories as needed + File file = new File(path); + FileWriter writer = null; + try { + writer = new FileWriter(file, true); + } catch (Exception e) { + System.out.println("Error: " + e); + e.printStackTrace(); } + + // this close happens latter on + return writer; + } } diff --git a/java/core/src/main/java/com/whylogs/api/writer/Writer.java b/java/core/src/main/java/com/whylogs/api/writer/Writer.java index 13ea4ead2a..0af5504dd5 100644 --- a/java/core/src/main/java/com/whylogs/api/writer/Writer.java +++ b/java/core/src/main/java/com/whylogs/api/writer/Writer.java @@ -1,16 +1,17 @@ package com.whylogs.api.writer; -import javax.swing.text.html.Option; import java.util.Optional; // TODO: this is a temp holding class for logger that will be implmented next public abstract class Writer { - /*Validate an interval configuration for a given writer. - Some writer only accepts certain interval configuration. By default, this should return True for a valid - non-negative interval.*/ - public void check_interval(int interval_seconds) { - // TODO: implement (not implemented in java either - } - public abstract void write(Writable file, Optional dest); - public abstract T option(T writer); + /*Validate an interval configuration for a given writer. + Some writer only accepts certain interval configuration. By default, this should return True for a valid + non-negative interval.*/ + public void check_interval(int interval_seconds) { + // TODO: implement (not implemented in java either + } + + public abstract void write(Writable file, Optional dest); + + public abstract T option(T writer); } diff --git a/java/core/src/main/java/com/whylogs/api/writer/WritersRegistry.java b/java/core/src/main/java/com/whylogs/api/writer/WritersRegistry.java index 0f9d23f06a..9f3a0630a4 100644 --- a/java/core/src/main/java/com/whylogs/api/writer/WritersRegistry.java +++ b/java/core/src/main/java/com/whylogs/api/writer/WritersRegistry.java @@ -1,8 +1,8 @@ package com.whylogs.api.writer; public class WritersRegistry { - public static T get(String name) { - // TODO: Not implemented yet - return null; - } + public static T get(String name) { + // TODO: Not implemented yet + return null; + } } diff --git a/java/core/src/test/java/com/whylogs/api/logger/TestLogger.java b/java/core/src/test/java/com/whylogs/api/logger/TestLogger.java index 2ee98fce3b..e553eaa83a 100644 --- a/java/core/src/test/java/com/whylogs/api/logger/TestLogger.java +++ b/java/core/src/test/java/com/whylogs/api/logger/TestLogger.java @@ -2,37 +2,35 @@ import com.whylogs.api.logger.resultsets.ResultSet; import com.whylogs.core.DatasetProfile; +import java.util.HashMap; import org.testng.Assert; import org.testng.annotations.Test; -import java.util.HashMap; - @Test public class TestLogger { - @Test - public void testBasicLog(){ - HashMap data = createBasicLog(); - - ResultSet results = new TransientLogger().log(data); - Assert.assertNotNull(results); + @Test + public void testBasicLog() { + HashMap data = createBasicLog(); - DatasetProfile profile = results.profile().get(); - Assert.assertNotNull(profile); + ResultSet results = new TransientLogger().log(data); + Assert.assertNotNull(results); - //profile.getColumns().get("col1"); - Assert.assertEquals(profile.getColumns().get("col1").getMetrics().size(), 1); - Assert.assertEquals(profile.getColumns().get("col1").getSuccessCount(), 1); - Assert.assertEquals(profile.getColumns().get("col1").getSchema().getType(), Integer.class); + DatasetProfile profile = results.profile().get(); + Assert.assertNotNull(profile); - } + // profile.getColumns().get("col1"); + Assert.assertEquals(profile.getColumns().get("col1").getMetrics().size(), 1); + Assert.assertEquals(profile.getColumns().get("col1").getSuccessCount(), 1); + Assert.assertEquals(profile.getColumns().get("col1").getSchema().getType(), Integer.class); + } - private HashMap createBasicLog() { - HashMap data = new HashMap<>(); - data.put("col1", 2); - data.put("col2", 3); - data.put("col3", 100); + private HashMap createBasicLog() { + HashMap data = new HashMap<>(); + data.put("col1", 2); + data.put("col2", 3); + data.put("col3", 100); - return data; - } + return data; + } } diff --git a/java/core/src/test/java/com/whylogs/api/logger/TestRollingLogger.java b/java/core/src/test/java/com/whylogs/api/logger/TestRollingLogger.java index aec3eda1fc..bda0a07fec 100644 --- a/java/core/src/test/java/com/whylogs/api/logger/TestRollingLogger.java +++ b/java/core/src/test/java/com/whylogs/api/logger/TestRollingLogger.java @@ -1,31 +1,27 @@ package com.whylogs.api.logger; import com.whylogs.api.logger.rollingLogger.TimedRollingLogger; -import com.whylogs.core.DatasetProfile; import com.whylogs.core.schemas.DatasetSchema; -import com.whylogs.core.views.DatasetProfileView; -import org.testng.Assert; -import org.testng.annotations.Test; - import java.util.HashMap; +import org.testng.annotations.Test; @Test public class TestRollingLogger { - @Test - public void testClosing(){ - HashMap data = createBasicLog(); - TimedRollingLogger logger = new TimedRollingLogger(new DatasetSchema(), "test", ".bin", 1, 'M'); - logger.log(data); - // TODO: testing needs the writer - } + @Test + public void testClosing() { + HashMap data = createBasicLog(); + TimedRollingLogger logger = new TimedRollingLogger(new DatasetSchema(), "test", ".bin", 1, 'M'); + logger.log(data); + // TODO: testing needs the writer + } - private HashMap createBasicLog() { - HashMap data = new HashMap<>(); - data.put("col1", 2); - data.put("col2", 3); - data.put("col3", 100); + private HashMap createBasicLog() { + HashMap data = new HashMap<>(); + data.put("col1", 2); + data.put("col2", 3); + data.put("col3", 100); - return data; - } + return data; + } } From ad59752cf7f831b581f58ebe54306c3cf9222571 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Thu, 27 Oct 2022 11:38:16 -0700 Subject: [PATCH 62/71] Updates resultset --- .../logger/resultsets/ProfileResultSet.java | 4 ++-- .../api/logger/resultsets/ResultSet.java | 20 ++++++++----------- .../api/logger/resultsets/ViewResultSet.java | 4 +++- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/java/core/src/main/java/com/whylogs/api/logger/resultsets/ProfileResultSet.java b/java/core/src/main/java/com/whylogs/api/logger/resultsets/ProfileResultSet.java index fe9649aaf2..e720ff9f06 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/resultsets/ProfileResultSet.java +++ b/java/core/src/main/java/com/whylogs/api/logger/resultsets/ProfileResultSet.java @@ -1,4 +1,4 @@ -package com.whylogs.api.logger.resultsets; +package com.whylogs.api.logger.resultSets; import com.whylogs.core.DatasetProfile; import com.whylogs.core.views.DatasetProfileView; @@ -12,7 +12,7 @@ public class ProfileResultSet extends ResultSet { @NonNull private final DatasetProfile profile; - public ProfileResultSet(@NonNull DatasetProfile profile) { + public ProfileResultSet(DatasetProfile profile) { super(); this.profile = profile; } diff --git a/java/core/src/main/java/com/whylogs/api/logger/resultsets/ResultSet.java b/java/core/src/main/java/com/whylogs/api/logger/resultsets/ResultSet.java index b31df25d22..70787d6fef 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/resultsets/ResultSet.java +++ b/java/core/src/main/java/com/whylogs/api/logger/resultsets/ResultSet.java @@ -1,4 +1,4 @@ -package com.whylogs.api.logger.resultsets; +package com.whylogs.api.logger.resultSets; import com.whylogs.core.DatasetProfile; import com.whylogs.core.errors.Error; @@ -29,17 +29,13 @@ public abstract class ResultSet { // TODO: Come back for ModelPerformanceMetrics + // Question: why is the python addMetrics when it only adds the one? public void addMetric(String name, Metric metric) throws Error { - DatasetProfile profile = - this.profile() - .orElseThrow( - () -> - new Error( - "Cannot add " - + name - + " metric " - + metric - + " to a result set without a profile")); - profile.addMetric(name, metric); + if (!this.profile().isPresent()) { + throw new Error( + "Cannot add " + name + " metric " + metric + " to a result set without a profile"); + } + + this.profile().get().addMetric(name, metric); } } diff --git a/java/core/src/main/java/com/whylogs/api/logger/resultsets/ViewResultSet.java b/java/core/src/main/java/com/whylogs/api/logger/resultsets/ViewResultSet.java index f2a4c940e8..17d4aef7ce 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/resultsets/ViewResultSet.java +++ b/java/core/src/main/java/com/whylogs/api/logger/resultsets/ViewResultSet.java @@ -1,4 +1,4 @@ -package com.whylogs.api.logger.resultsets; +package com.whylogs.api.logger.resultSets; import com.whylogs.core.DatasetProfile; import com.whylogs.core.views.DatasetProfileView; @@ -11,10 +11,12 @@ public class ViewResultSet extends ResultSet { @NonNull private final DatasetProfileView view; public ViewResultSet(@NonNull DatasetProfileView view) { + super(); this.view = view; } public ViewResultSet(DatasetProfile profile) { + super(); this.view = profile.view(); } From 3f413cd1fbb014511b7fbcfcbc1b78fb8cbf85c8 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Thu, 27 Oct 2022 11:39:36 -0700 Subject: [PATCH 63/71] fixes package error on result set --- .../com/whylogs/api/logger/resultsets/ProfileResultSet.java | 4 ++-- .../java/com/whylogs/api/logger/resultsets/ResultSet.java | 2 +- .../java/com/whylogs/api/logger/resultsets/ViewResultSet.java | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/java/core/src/main/java/com/whylogs/api/logger/resultsets/ProfileResultSet.java b/java/core/src/main/java/com/whylogs/api/logger/resultsets/ProfileResultSet.java index e720ff9f06..fe9649aaf2 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/resultsets/ProfileResultSet.java +++ b/java/core/src/main/java/com/whylogs/api/logger/resultsets/ProfileResultSet.java @@ -1,4 +1,4 @@ -package com.whylogs.api.logger.resultSets; +package com.whylogs.api.logger.resultsets; import com.whylogs.core.DatasetProfile; import com.whylogs.core.views.DatasetProfileView; @@ -12,7 +12,7 @@ public class ProfileResultSet extends ResultSet { @NonNull private final DatasetProfile profile; - public ProfileResultSet(DatasetProfile profile) { + public ProfileResultSet(@NonNull DatasetProfile profile) { super(); this.profile = profile; } diff --git a/java/core/src/main/java/com/whylogs/api/logger/resultsets/ResultSet.java b/java/core/src/main/java/com/whylogs/api/logger/resultsets/ResultSet.java index 70787d6fef..f9a4af7c0a 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/resultsets/ResultSet.java +++ b/java/core/src/main/java/com/whylogs/api/logger/resultsets/ResultSet.java @@ -1,4 +1,4 @@ -package com.whylogs.api.logger.resultSets; +package com.whylogs.api.logger.resultsets; import com.whylogs.core.DatasetProfile; import com.whylogs.core.errors.Error; diff --git a/java/core/src/main/java/com/whylogs/api/logger/resultsets/ViewResultSet.java b/java/core/src/main/java/com/whylogs/api/logger/resultsets/ViewResultSet.java index 17d4aef7ce..476a94beb8 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/resultsets/ViewResultSet.java +++ b/java/core/src/main/java/com/whylogs/api/logger/resultsets/ViewResultSet.java @@ -1,4 +1,4 @@ -package com.whylogs.api.logger.resultSets; +package com.whylogs.api.logger.resultsets; import com.whylogs.core.DatasetProfile; import com.whylogs.core.views.DatasetProfileView; From 865fa41b765034ec15fdc15dce3ca0d8a1f98f41 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Thu, 27 Oct 2022 11:56:00 -0700 Subject: [PATCH 64/71] removing duplicate foler --- .../logger/resultSets/ProfileResultSet.java | 27 ------------ .../api/logger/resultSets/ResultSet.java | 41 ------------------- .../api/logger/resultSets/ViewResultSet.java | 32 --------------- 3 files changed, 100 deletions(-) delete mode 100644 java/core/src/main/java/com/whylogs/api/logger/resultSets/ProfileResultSet.java delete mode 100644 java/core/src/main/java/com/whylogs/api/logger/resultSets/ResultSet.java delete mode 100644 java/core/src/main/java/com/whylogs/api/logger/resultSets/ViewResultSet.java diff --git a/java/core/src/main/java/com/whylogs/api/logger/resultSets/ProfileResultSet.java b/java/core/src/main/java/com/whylogs/api/logger/resultSets/ProfileResultSet.java deleted file mode 100644 index e720ff9f06..0000000000 --- a/java/core/src/main/java/com/whylogs/api/logger/resultSets/ProfileResultSet.java +++ /dev/null @@ -1,27 +0,0 @@ -package com.whylogs.api.logger.resultSets; - -import com.whylogs.core.DatasetProfile; -import com.whylogs.core.views.DatasetProfileView; -import java.util.Optional; -import lombok.Data; -import lombok.EqualsAndHashCode; -import lombok.NonNull; - -@EqualsAndHashCode(callSuper = true) -@Data -public class ProfileResultSet extends ResultSet { - @NonNull private final DatasetProfile profile; - - public ProfileResultSet(DatasetProfile profile) { - super(); - this.profile = profile; - } - - public Optional profile() { - return Optional.of(this.profile); - } - - public Optional view() { - return Optional.of(this.profile.view()); - } -} diff --git a/java/core/src/main/java/com/whylogs/api/logger/resultSets/ResultSet.java b/java/core/src/main/java/com/whylogs/api/logger/resultSets/ResultSet.java deleted file mode 100644 index 70787d6fef..0000000000 --- a/java/core/src/main/java/com/whylogs/api/logger/resultSets/ResultSet.java +++ /dev/null @@ -1,41 +0,0 @@ -package com.whylogs.api.logger.resultSets; - -import com.whylogs.core.DatasetProfile; -import com.whylogs.core.errors.Error; -import com.whylogs.core.metrics.Metric; -import com.whylogs.core.views.DatasetProfileView; -import java.util.Optional; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** - * A holder object for profiling results. - * - *

A whylogs.log call can result in more than one profile. This wrapper class simplifies the - * navigation among these profiles. - * - *

Note that currently we only hold one profile but we're planning to add other kinds of profiles - * such as segmented profiles here. - */ -@Data -@NoArgsConstructor -public abstract class ResultSet { - - // TODO: implement read and write when I make the reader and writer - - public abstract Optional view(); - - public abstract Optional profile(); - - // TODO: Come back for ModelPerformanceMetrics - - // Question: why is the python addMetrics when it only adds the one? - public void addMetric(String name, Metric metric) throws Error { - if (!this.profile().isPresent()) { - throw new Error( - "Cannot add " + name + " metric " + metric + " to a result set without a profile"); - } - - this.profile().get().addMetric(name, metric); - } -} diff --git a/java/core/src/main/java/com/whylogs/api/logger/resultSets/ViewResultSet.java b/java/core/src/main/java/com/whylogs/api/logger/resultSets/ViewResultSet.java deleted file mode 100644 index 17d4aef7ce..0000000000 --- a/java/core/src/main/java/com/whylogs/api/logger/resultSets/ViewResultSet.java +++ /dev/null @@ -1,32 +0,0 @@ -package com.whylogs.api.logger.resultSets; - -import com.whylogs.core.DatasetProfile; -import com.whylogs.core.views.DatasetProfileView; -import java.util.Optional; -import lombok.*; - -@EqualsAndHashCode(callSuper = true) -@Data -public class ViewResultSet extends ResultSet { - @NonNull private final DatasetProfileView view; - - public ViewResultSet(@NonNull DatasetProfileView view) { - super(); - this.view = view; - } - - public ViewResultSet(DatasetProfile profile) { - super(); - this.view = profile.view(); - } - - @Override - public Optional view() { - return Optional.of(this.view); - } - - @Override - public Optional profile() { - throw new Error("No profile available for a view result set"); - } -} From e2057e2f11efff94260fa61e1961ee443f440344 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Thu, 27 Oct 2022 12:32:39 -0700 Subject: [PATCH 65/71] Accidentally overroad ResultSet changes. Fixes that. --- .../com/whylogs/api/logger/BasicCache.java | 3 --- .../api/logger/resultsets/ResultSet.java | 18 +++++++++++------- .../api/logger/resultsets/ViewResultSet.java | 2 -- 3 files changed, 11 insertions(+), 12 deletions(-) delete mode 100644 java/core/src/main/java/com/whylogs/api/logger/BasicCache.java diff --git a/java/core/src/main/java/com/whylogs/api/logger/BasicCache.java b/java/core/src/main/java/com/whylogs/api/logger/BasicCache.java deleted file mode 100644 index c3949032d1..0000000000 --- a/java/core/src/main/java/com/whylogs/api/logger/BasicCache.java +++ /dev/null @@ -1,3 +0,0 @@ -package com.whylogs.api.logger; - -public class BasicCache {} diff --git a/java/core/src/main/java/com/whylogs/api/logger/resultsets/ResultSet.java b/java/core/src/main/java/com/whylogs/api/logger/resultsets/ResultSet.java index f9a4af7c0a..ddf3854be2 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/resultsets/ResultSet.java +++ b/java/core/src/main/java/com/whylogs/api/logger/resultsets/ResultSet.java @@ -29,13 +29,17 @@ public abstract class ResultSet { // TODO: Come back for ModelPerformanceMetrics - // Question: why is the python addMetrics when it only adds the one? public void addMetric(String name, Metric metric) throws Error { - if (!this.profile().isPresent()) { - throw new Error( - "Cannot add " + name + " metric " + metric + " to a result set without a profile"); - } - - this.profile().get().addMetric(name, metric); + DatasetProfile profile = + this.profile() + .orElseThrow( + () -> + new Error( + "Cannot add " + + name + + " metric " + + metric + + " to a result set without a profile")); + profile.addMetric(name, metric); } } diff --git a/java/core/src/main/java/com/whylogs/api/logger/resultsets/ViewResultSet.java b/java/core/src/main/java/com/whylogs/api/logger/resultsets/ViewResultSet.java index 476a94beb8..f2a4c940e8 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/resultsets/ViewResultSet.java +++ b/java/core/src/main/java/com/whylogs/api/logger/resultsets/ViewResultSet.java @@ -11,12 +11,10 @@ public class ViewResultSet extends ResultSet { @NonNull private final DatasetProfileView view; public ViewResultSet(@NonNull DatasetProfileView view) { - super(); this.view = view; } public ViewResultSet(DatasetProfile profile) { - super(); this.view = profile.view(); } From 62713083ba9425c38a328f10eb882af4eadd2f35 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Thu, 27 Oct 2022 12:37:33 -0700 Subject: [PATCH 66/71] linter --- .../api/logger/resultsets/ResultSet.java | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/java/core/src/main/java/com/whylogs/api/logger/resultsets/ResultSet.java b/java/core/src/main/java/com/whylogs/api/logger/resultsets/ResultSet.java index ddf3854be2..b31df25d22 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/resultsets/ResultSet.java +++ b/java/core/src/main/java/com/whylogs/api/logger/resultsets/ResultSet.java @@ -31,15 +31,15 @@ public abstract class ResultSet { public void addMetric(String name, Metric metric) throws Error { DatasetProfile profile = - this.profile() - .orElseThrow( - () -> - new Error( - "Cannot add " - + name - + " metric " - + metric - + " to a result set without a profile")); + this.profile() + .orElseThrow( + () -> + new Error( + "Cannot add " + + name + + " metric " + + metric + + " to a result set without a profile")); profile.addMetric(name, metric); } } From 83cc077d7e32c2bcb1d3e3855e40b87a5d4828d0 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Thu, 27 Oct 2022 12:37:33 -0700 Subject: [PATCH 67/71] linter --- .../com/whylogs/api/logger/resultsets/ResultSet.java | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/java/core/src/main/java/com/whylogs/api/logger/resultsets/ResultSet.java b/java/core/src/main/java/com/whylogs/api/logger/resultsets/ResultSet.java index ddf3854be2..8784f28b1d 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/resultsets/ResultSet.java +++ b/java/core/src/main/java/com/whylogs/api/logger/resultsets/ResultSet.java @@ -30,16 +30,7 @@ public abstract class ResultSet { // TODO: Come back for ModelPerformanceMetrics public void addMetric(String name, Metric metric) throws Error { - DatasetProfile profile = - this.profile() - .orElseThrow( - () -> - new Error( - "Cannot add " - + name - + " metric " - + metric - + " to a result set without a profile")); + DatasetProfile profile = this.profile().orElseThrow(() -> new Error( "Cannot add " + name + " metric " + metric + " to a result set without a profile")); profile.addMetric(name, metric); } } From ac00bb0ac7d97103dc094c9273541e0859020a59 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Fri, 28 Oct 2022 12:08:42 -0700 Subject: [PATCH 68/71] Error and comments adds --- .../whylogs/api/logger/rollingLogger/TimedRollingLogger.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/TimedRollingLogger.java b/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/TimedRollingLogger.java index 5b2618b32e..4df691135d 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/TimedRollingLogger.java +++ b/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/TimedRollingLogger.java @@ -82,7 +82,7 @@ public TimedRollingLogger( this.suffix = "%Y-%m-%d"; break; default: - throw new IllegalArgumentException("Invalid value for when: " + this.when); + throw new IllegalArgumentException("Invalid value for when: " + this.when + ". Must be S, M, H, or D"); } this.interval = this.interval * interval; // / multiply by units requested @@ -105,7 +105,7 @@ public TimedRollingLogger( } private int computeCurrentBatchTimestamp(long nowEpoch) { - int roundedNow = (int) nowEpoch; + int roundedNow = (int) nowEpoch; // rounds by going from an long to a int (truncates) if (this.align) { return (Math.floorDiv((roundedNow - 1), this.interval)) * this.interval + this.interval; } From 2370f394b2f270fb7618766d4bdd691b2186429d Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Fri, 28 Oct 2022 12:09:10 -0700 Subject: [PATCH 69/71] linter --- .../whylogs/api/logger/rollingLogger/TimedRollingLogger.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/TimedRollingLogger.java b/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/TimedRollingLogger.java index 4df691135d..3bf403b760 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/TimedRollingLogger.java +++ b/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/TimedRollingLogger.java @@ -82,7 +82,8 @@ public TimedRollingLogger( this.suffix = "%Y-%m-%d"; break; default: - throw new IllegalArgumentException("Invalid value for when: " + this.when + ". Must be S, M, H, or D"); + throw new IllegalArgumentException( + "Invalid value for when: " + this.when + ". Must be S, M, H, or D"); } this.interval = this.interval * interval; // / multiply by units requested From 15c56e0250f6db01eb881be46f33aacce8b187ce Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Fri, 28 Oct 2022 12:09:10 -0700 Subject: [PATCH 70/71] linter --- .../main/java/com/whylogs/api/logger/resultsets/ResultSet.java | 1 - .../whylogs/api/logger/rollingLogger/TimedRollingLogger.java | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/java/core/src/main/java/com/whylogs/api/logger/resultsets/ResultSet.java b/java/core/src/main/java/com/whylogs/api/logger/resultsets/ResultSet.java index 8784f28b1d..868cdc88f5 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/resultsets/ResultSet.java +++ b/java/core/src/main/java/com/whylogs/api/logger/resultsets/ResultSet.java @@ -28,7 +28,6 @@ public abstract class ResultSet { public abstract Optional profile(); // TODO: Come back for ModelPerformanceMetrics - public void addMetric(String name, Metric metric) throws Error { DatasetProfile profile = this.profile().orElseThrow(() -> new Error( "Cannot add " + name + " metric " + metric + " to a result set without a profile")); profile.addMetric(name, metric); diff --git a/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/TimedRollingLogger.java b/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/TimedRollingLogger.java index 4df691135d..3bf403b760 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/TimedRollingLogger.java +++ b/java/core/src/main/java/com/whylogs/api/logger/rollingLogger/TimedRollingLogger.java @@ -82,7 +82,8 @@ public TimedRollingLogger( this.suffix = "%Y-%m-%d"; break; default: - throw new IllegalArgumentException("Invalid value for when: " + this.when + ". Must be S, M, H, or D"); + throw new IllegalArgumentException( + "Invalid value for when: " + this.when + ". Must be S, M, H, or D"); } this.interval = this.interval * interval; // / multiply by units requested From 9d0316ee25c6a6f1de7f1046d478b9bf4276a132 Mon Sep 17 00:00:00 2001 From: TheMellyBee Date: Fri, 28 Oct 2022 12:27:06 -0700 Subject: [PATCH 71/71] linter --- .../com/whylogs/api/logger/resultsets/ResultSet.java | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/java/core/src/main/java/com/whylogs/api/logger/resultsets/ResultSet.java b/java/core/src/main/java/com/whylogs/api/logger/resultsets/ResultSet.java index 868cdc88f5..668d6b0cdf 100644 --- a/java/core/src/main/java/com/whylogs/api/logger/resultsets/ResultSet.java +++ b/java/core/src/main/java/com/whylogs/api/logger/resultsets/ResultSet.java @@ -29,7 +29,16 @@ public abstract class ResultSet { // TODO: Come back for ModelPerformanceMetrics public void addMetric(String name, Metric metric) throws Error { - DatasetProfile profile = this.profile().orElseThrow(() -> new Error( "Cannot add " + name + " metric " + metric + " to a result set without a profile")); + DatasetProfile profile = + this.profile() + .orElseThrow( + () -> + new Error( + "Cannot add " + + name + + " metric " + + metric + + " to a result set without a profile")); profile.addMetric(name, metric); } }