diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java index 23b4bb931cf3..0364a4f04216 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java @@ -383,6 +383,7 @@ import static java.util.Objects.requireNonNull; import static java.util.function.Function.identity; import static java.util.stream.Collectors.joining; +import static java.util.stream.Collectors.toMap; import static org.apache.iceberg.MetadataTableType.ALL_ENTRIES; import static org.apache.iceberg.MetadataTableType.ENTRIES; import static org.apache.iceberg.ReachableFileUtil.metadataFileLocations; @@ -800,40 +801,44 @@ public ConnectorTableProperties getTableProperties(ConnectorSession session, Con Map columns = getProjectedColumns(icebergTable.schema(), typeManager, partitionSourceIds).stream() .collect(toImmutableMap(IcebergColumnHandle::getId, identity())); - Supplier> lazyFiles = Suppliers.memoize(() -> { + Supplier> uniquePartitions = Suppliers.memoize(() -> { TableScan tableScan = icebergTable.newScan() .useSnapshot(table.getSnapshotId().get()) .filter(toIcebergExpression(enforcedPredicate)); try (CloseableIterable iterator = tableScan.planFiles()) { - return ImmutableList.copyOf(iterator); + return Streams.stream(iterator) + .collect(toMap( + StructLikeWrapperWithFieldIdToIndex::createStructLikeWrapper, + FileScanTask::spec, + (p, _) -> p)); } catch (IOException e) { throw new UncheckedIOException(e); } }); - Iterable files = () -> lazyFiles.get().iterator(); - - Iterable> discreteTupleDomain = Iterables.transform(files, fileScan -> { - // Extract partition values in the data file - Map> partitionColumnValueStrings = getPartitionKeys(fileScan); - Map partitionValues = partitionSourceIds.stream() - .filter(partitionColumnValueStrings::containsKey) - .collect(toImmutableMap( - columns::get, - columnId -> { - IcebergColumnHandle column = columns.get(columnId); - Object prestoValue = deserializePartitionValue( - column.getType(), - partitionColumnValueStrings.get(columnId).orElse(null), - column.getName()); - - return new NullableValue(column.getType(), prestoValue); - })); - - return TupleDomain.fromFixedValues(partitionValues); - }); + Iterable> discreteTupleDomain = Iterables.transform( + () -> uniquePartitions.get().entrySet().iterator(), + entry -> { + // Extract partition values in the data file + Map> partitionColumnValueStrings = getPartitionKeys(entry.getKey().getStructLikeWrapper().get(), entry.getValue()); + Map partitionValues = partitionSourceIds.stream() + .filter(partitionColumnValueStrings::containsKey) + .collect(toImmutableMap( + columns::get, + columnId -> { + IcebergColumnHandle column = columns.get(columnId); + Object prestoValue = deserializePartitionValue( + column.getType(), + partitionColumnValueStrings.get(columnId).orElse(null), + column.getName()); + + return new NullableValue(column.getType(), prestoValue); + })); + + return TupleDomain.fromFixedValues(partitionValues); + }); discretePredicates = new DiscretePredicates( columns.values().stream() diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestMetadataQueryOptimization.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestMetadataQueryOptimization.java index 446b0d5b4c70..8757bc4d454f 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestMetadataQueryOptimization.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestMetadataQueryOptimization.java @@ -101,8 +101,8 @@ public void testOptimization() anyTree(values( ImmutableList.of("b", "c"), ImmutableList.of( - ImmutableList.of(new Constant(INTEGER, 6L), new Constant(INTEGER, 7L)), - ImmutableList.of(new Constant(INTEGER, 9L), new Constant(INTEGER, 10L)))))); + ImmutableList.of(new Constant(INTEGER, 9L), new Constant(INTEGER, 10L)), + ImmutableList.of(new Constant(INTEGER, 6L), new Constant(INTEGER, 7L)))))); assertPlan( format("SELECT DISTINCT b, c FROM %s WHERE b > 7", testTable), @@ -118,6 +118,33 @@ public void testOptimization() values(ImmutableList.of("b", "c"), ImmutableList.of()))); } + @Test + public void testOptimizationOnPartitionWithMultipleFiles() + { + String testTable = "test_metadata_optimization_on_partition_with_multiple_files"; + + getPlanTester().executeStatement(format( + "CREATE TABLE %s (a, b, c) WITH (PARTITIONING = ARRAY['b', 'c']) AS VALUES (1, 8, 9), (2, 8, 9)", + testTable)); + + Session session = Session.builder(getPlanTester().getDefaultSession()) + .setSystemProperty("optimize_metadata_queries", "true") + .build(); + + // Insert again to generate another file in same partition + getPlanTester().executeStatement(format( + "INSERT INTO %s VALUES (3, 8, 9)", + testTable)); + + assertPlan( + format("SELECT DISTINCT b, c FROM %s ORDER BY b", testTable), + session, + anyTree(values( + ImmutableList.of("b", "c"), + ImmutableList.of( + ImmutableList.of(new Constant(INTEGER, 8L), new Constant(INTEGER, 9L)))))); + } + @Test public void testOptimizationWithNullPartitions() {