From 6b830791daaff98a9234cc2a32309cfac15398d3 Mon Sep 17 00:00:00 2001 From: TieweiFang Date: Fri, 16 Aug 2024 17:29:25 +0800 Subject: [PATCH 1/2] fix --- .../hive_config_test/create_table.hql | 31 +++++ .../multi_catalog/hive_config_test/run.sh | 14 ++ .../datasource/hive/HiveMetaStoreCache.java | 8 +- .../hive/hive_config_test.out | 33 +++++ .../hive/hive_config_test.groovy | 124 ++++++++++++++++++ 5 files changed, 209 insertions(+), 1 deletion(-) create mode 100644 docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_config_test/create_table.hql create mode 100755 docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_config_test/run.sh create mode 100644 regression-test/data/external_table_p0/hive/hive_config_test.out create mode 100644 regression-test/suites/external_table_p0/hive/hive_config_test.groovy diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_config_test/create_table.hql b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_config_test/create_table.hql new file mode 100644 index 00000000000000..2f193a2e3c1987 --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_config_test/create_table.hql @@ -0,0 +1,31 @@ +create database if not exists default; +use default; + +CREATE TABLE `hive_recursive_directories_table`( + `id` int, + `name` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' +LOCATION + '/user/doris/suites/default/hive_recursive_directories_table'; + + +CREATE TABLE `hive_ignore_absent_partitions_table`( + `id` int, + `name` string) +PARTITIONED BY (country STRING, city STRING) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' +LOCATION + '/user/doris/suites/default/hive_ignore_absent_partitions_table'; + +ALTER TABLE hive_ignore_absent_partitions_table ADD PARTITION (country='USA', city='NewYork'); +ALTER TABLE hive_ignore_absent_partitions_table ADD PARTITION (country='India', city='Delhi'); diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_config_test/run.sh b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_config_test/run.sh new file mode 100755 index 00000000000000..7fc3d0555da2b5 --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_config_test/run.sh @@ -0,0 +1,14 @@ +#!/bin/bash +set -x + +CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" + +## mkdir and put data to hdfs +hadoop fs -mkdir -p /user/doris/suites/default/hive_recursive_directories_table +hadoop fs -mkdir -p /user/doris/suites/default/hive_ignore_absent_partitions_table + +# create table +hive -f "${CUR_DIR}"/create_table.hql + +hadoop fs -rm -r /user/doris/suites/default/hive_ignore_absent_partitions_table/country=India + diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java index 006ed83413a233..9630225eaca551 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java @@ -364,7 +364,9 @@ private FileCacheValue getFileCache(String location, String inputFormat, // So we need to recursively list data location. // https://blog.actorsfit.com/a?ID=00550-ce56ec63-1bff-4b0c-a6f7-447b93efaa31 List remoteFiles = new ArrayList<>(); - Status status = fs.listFiles(location, true, remoteFiles); + boolean isRecursiveDirectories = Boolean.valueOf( + catalog.getProperties().getOrDefault("hive.recursive-directories", "false")); + Status status = fs.listFiles(location, isRecursiveDirectories, remoteFiles); if (status.ok()) { for (RemoteFile remoteFile : remoteFiles) { String srcPath = remoteFile.getPath().toString(); @@ -376,6 +378,10 @@ private FileCacheValue getFileCache(String location, String inputFormat, // Hive doesn't aware that the removed partition is missing. // Here is to support this case without throw an exception. LOG.warn(String.format("File %s not exist.", location)); + if (!Boolean.valueOf(catalog.getProperties() + .getOrDefault("hive.ignore-absent-partitions", "true"))) { + throw new UserException("Partition location does not exist: " + location); + } } else { throw new RuntimeException(status.getErrMsg()); } diff --git a/regression-test/data/external_table_p0/hive/hive_config_test.out b/regression-test/data/external_table_p0/hive/hive_config_test.out new file mode 100644 index 00000000000000..1a000281dfcdd7 --- /dev/null +++ b/regression-test/data/external_table_p0/hive/hive_config_test.out @@ -0,0 +1,33 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !check_outfile -- +1 doris +2 nereids + +-- !check_outfile -- +1 doris +2 nereids + +-- !check_outfile -- +1 doris +2 nereids + +-- !1 -- +1 doris +2 nereids + +-- !2 -- +1 doris +1 doris +1 doris +2 nereids +2 nereids +2 nereids + +-- !check_outfile -- +1 doris +2 nereids + +-- !3 -- +1 doris USA NewYork +2 nereids USA NewYork + diff --git a/regression-test/suites/external_table_p0/hive/hive_config_test.groovy b/regression-test/suites/external_table_p0/hive/hive_config_test.groovy new file mode 100644 index 00000000000000..551890848b4a76 --- /dev/null +++ b/regression-test/suites/external_table_p0/hive/hive_config_test.groovy @@ -0,0 +1,124 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("hive_config_test", "p0,external,hive,external_docker,external_docker_hive") { + String db_name = "regression_test_external_table_p0_hive" + String internal_table = "hive_config_test" + String catalog_name = "docker_hive" + + // create table and insert + sql """ DROP TABLE IF EXISTS ${internal_table} """ + sql """ + CREATE TABLE IF NOT EXISTS ${internal_table} ( + `id` INT NOT NULL, + `name` STRING NOT NULL + ) + DISTRIBUTED BY HASH(id) PROPERTIES("replication_num" = "1"); + """ + // insert data into interal table + sql """ INSERT INTO ${internal_table} VALUES (1, 'doris'), (2, 'nereids'); """ + + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("diable Hive test.") + return; + } + + for (String hivePrefix : ["hive2"]) { + String hdfs_port = context.config.otherConfigs.get(hivePrefix + "HdfsPort") + String hms_port = context.config.otherConfigs.get(hivePrefix + "HmsPort") + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + def defaultFS = "hdfs://${externalEnvIp}:${hdfs_port}" + // It's okay to use random `hdfsUser`, but can not be empty. + def hdfsUserName = "doris" + + + def test_outfile = {format, uri -> + def res = sql """ + SELECT * FROM internal.${db_name}.${internal_table} t ORDER BY id + INTO OUTFILE "${defaultFS}${uri}" + FORMAT AS ${format} + PROPERTIES ( + "fs.defaultFS"="${defaultFS}", + "hadoop.username" = "${hdfsUserName}" + ); + """ + + def outfile_url = res[0][3] + // check data correctness + order_qt_check_outfile """ select * from hdfs( + "uri" = "${outfile_url}.${format}", + "hadoop.username" = "${hdfsUserName}", + "format" = "${format}"); + """ + } + + + // 1. test hive.recursive-directories-table config + test_outfile("orc", "/user/doris/suites/default/hive_recursive_directories_table/exp_") + test_outfile("orc", "/user/doris/suites/default/hive_recursive_directories_table/1/exp_") + test_outfile("orc", "/user/doris/suites/default/hive_recursive_directories_table/2/exp_") + + // test hive.recursive_directories_table = false + sql """drop catalog if exists ${catalog_name}""" + sql """create catalog if not exists ${catalog_name} properties ( + "type"="hms", + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}', + 'hive.recursive-directories' = 'false' + );""" + sql """use `${catalog_name}`.`default`""" + order_qt_1 """ select * from hive_recursive_directories_table order by id;""" + + // test hive.recursive_directories_table = true + sql """drop catalog if exists ${catalog_name}""" + sql """create catalog if not exists ${catalog_name} properties ( + "type"="hms", + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}', + 'hive.recursive-directories' = 'true' + );""" + sql """ use `${catalog_name}`.`default` """ + order_qt_2 """ select * from hive_recursive_directories_table order by id; """ + + // 2. test hive.ignore-absent-partitions-table + test_outfile("orc", "/user/doris/suites/default/hive_ignore_absent_partitions_table/country=USA/city=NewYork/exp_") + + // test 'hive.ignore-absent-partitions' = 'true' + sql """drop catalog if exists ${catalog_name}""" + sql """create catalog if not exists ${catalog_name} properties ( + "type"="hms", + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}', + 'hive.ignore-absent-partitions' = 'true' + );""" + sql """use `${catalog_name}`.`default`""" + order_qt_3 """ select * from hive_ignore_absent_partitions_table order by id;""" + + + // 'hive.ignore-absent-partitions' = 'false' + sql """drop catalog if exists ${catalog_name}""" + sql """create catalog if not exists ${catalog_name} properties ( + "type"="hms", + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}', + 'hive.ignore-absent-partitions' = 'false' + );""" + sql """use `${catalog_name}`.`default`""" + test { + sql """ select * from hive_ignore_absent_partitions_table order by id;""" + + exception "Partition location does not exist" + } + } +} \ No newline at end of file From 1cf2633f20b66326c45c500397abf0e5b6e85b2e Mon Sep 17 00:00:00 2001 From: TieweiFang Date: Wed, 21 Aug 2024 11:46:50 +0800 Subject: [PATCH 2/2] fix 2 --- .../datasource/hive/HiveMetaStoreCache.java | 4 ++-- .../hive/hive_config_test.groovy | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java index 9630225eaca551..b87c14afbc8a42 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java @@ -365,7 +365,7 @@ private FileCacheValue getFileCache(String location, String inputFormat, // https://blog.actorsfit.com/a?ID=00550-ce56ec63-1bff-4b0c-a6f7-447b93efaa31 List remoteFiles = new ArrayList<>(); boolean isRecursiveDirectories = Boolean.valueOf( - catalog.getProperties().getOrDefault("hive.recursive-directories", "false")); + catalog.getProperties().getOrDefault("hive.recursive_directories", "false")); Status status = fs.listFiles(location, isRecursiveDirectories, remoteFiles); if (status.ok()) { for (RemoteFile remoteFile : remoteFiles) { @@ -379,7 +379,7 @@ private FileCacheValue getFileCache(String location, String inputFormat, // Here is to support this case without throw an exception. LOG.warn(String.format("File %s not exist.", location)); if (!Boolean.valueOf(catalog.getProperties() - .getOrDefault("hive.ignore-absent-partitions", "true"))) { + .getOrDefault("hive.ignore_absent_partitions", "true"))) { throw new UserException("Partition location does not exist: " + location); } } else { diff --git a/regression-test/suites/external_table_p0/hive/hive_config_test.groovy b/regression-test/suites/external_table_p0/hive/hive_config_test.groovy index 551890848b4a76..e75f3f5fbba93b 100644 --- a/regression-test/suites/external_table_p0/hive/hive_config_test.groovy +++ b/regression-test/suites/external_table_p0/hive/hive_config_test.groovy @@ -68,7 +68,7 @@ suite("hive_config_test", "p0,external,hive,external_docker,external_docker_hive } - // 1. test hive.recursive-directories-table config + // 1. test hive.recursive_directories table config test_outfile("orc", "/user/doris/suites/default/hive_recursive_directories_table/exp_") test_outfile("orc", "/user/doris/suites/default/hive_recursive_directories_table/1/exp_") test_outfile("orc", "/user/doris/suites/default/hive_recursive_directories_table/2/exp_") @@ -78,7 +78,7 @@ suite("hive_config_test", "p0,external,hive,external_docker,external_docker_hive sql """create catalog if not exists ${catalog_name} properties ( "type"="hms", 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}', - 'hive.recursive-directories' = 'false' + 'hive.recursive_directories' = 'false' );""" sql """use `${catalog_name}`.`default`""" order_qt_1 """ select * from hive_recursive_directories_table order by id;""" @@ -88,31 +88,31 @@ suite("hive_config_test", "p0,external,hive,external_docker,external_docker_hive sql """create catalog if not exists ${catalog_name} properties ( "type"="hms", 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}', - 'hive.recursive-directories' = 'true' + 'hive.recursive_directories' = 'true' );""" sql """ use `${catalog_name}`.`default` """ order_qt_2 """ select * from hive_recursive_directories_table order by id; """ - // 2. test hive.ignore-absent-partitions-table + // 2. test hive.ignore_absent_partitions-table test_outfile("orc", "/user/doris/suites/default/hive_ignore_absent_partitions_table/country=USA/city=NewYork/exp_") - // test 'hive.ignore-absent-partitions' = 'true' + // test 'hive.ignore_absent_partitions' = 'true' sql """drop catalog if exists ${catalog_name}""" sql """create catalog if not exists ${catalog_name} properties ( "type"="hms", 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}', - 'hive.ignore-absent-partitions' = 'true' + 'hive.ignore_absent_partitions' = 'true' );""" sql """use `${catalog_name}`.`default`""" order_qt_3 """ select * from hive_ignore_absent_partitions_table order by id;""" - // 'hive.ignore-absent-partitions' = 'false' + // 'hive.ignore_absent_partitions' = 'false' sql """drop catalog if exists ${catalog_name}""" sql """create catalog if not exists ${catalog_name} properties ( "type"="hms", 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}', - 'hive.ignore-absent-partitions' = 'false' + 'hive.ignore_absent_partitions' = 'false' );""" sql """use `${catalog_name}`.`default`""" test {