Skip to content

Commit 9ecc5ae

Browse files
authored
[Fix](delete) Support delete when column name is Unicode (#39381)
## Proposed changes Issue Number: close #xxx before: ```sql mysql> delete from table_7298276 where 中文列名1 > '2023-08-17' and 中文列名2 > '-68' and 中文列名3 in ("77", "0", "-35", "-8", "93", "-87", "42", "24", "57", "74"); ERROR 1105 (HY000): errCode = 2, detailMessage = delete job failed, errmsg:10019: [(10.16.10.8)[INVALID_ARGUMENT]failed to parse condition_str, condtion=TCondition { 01: column_name (string) = "\xe4\xb8\xad\xe6\x96\x87\xe5\x88\x97\xe5\x90\x8d1", 02: condition_op (string) = ">", 03: condition_values (list) = list<string>[1] { [0] = "2023-08-17", }, 04: column_unique_id (i32) = 0, 05: marked_by_runtime_filter (bool) = false, 1000: compound_type (i32) = 0, }] ``` now: ```sql mysql> delete from table_7298276 where 中文列名1 > '2012-08-17' and 中文列名2 > -68 and 中文列名3 in (1,2,3); Query OK, 0 rows affected (0.14 sec) ```
1 parent 21e6bdd commit 9ecc5ae

File tree

4 files changed

+70
-28
lines changed

4 files changed

+70
-28
lines changed

be/src/olap/delete_handler.cpp

+21-28
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@
2121
#include <gen_cpp/olap_file.pb.h>
2222
#include <thrift/protocol/TDebugProtocol.h>
2323

24-
#include <boost/regex.hpp>
25-
#include <sstream>
2624
#include <string>
2725
#include <vector>
2826

@@ -40,12 +38,10 @@
4038
using apache::thrift::ThriftDebugString;
4139
using std::vector;
4240
using std::string;
43-
using std::stringstream;
4441

4542
using ::google::protobuf::RepeatedPtrField;
4643

4744
namespace doris {
48-
using namespace ErrorCode;
4945

5046
// construct sub condition from TCondition
5147
std::string construct_sub_predicate(const TCondition& condition) {
@@ -314,38 +310,35 @@ Status DeleteHandler::parse_condition(const DeleteSubPredicatePB& sub_cond, TCon
314310
// value: matches "1597751948193618247 and length(source)<1;\n;\n"
315311
//
316312
// For more info, see DeleteHandler::construct_sub_predicates
317-
// FIXME(gavin): support unicode. And this is a tricky implementation, it should
318-
// not be the final resolution, refactor it.
313+
// FIXME(gavin): This is a tricky implementation, it should not be the final resolution, refactor it.
319314
const char* const CONDITION_STR_PATTERN =
320-
// .----------------- column-name ----------------. .----------------------- operator ------------------------. .------------ value ----------.
321-
R"(([_a-zA-Z@0-9\s/][.a-zA-Z0-9_+-/?@#$%^&*"\s,:]*)\s*((?:=)|(?:!=)|(?:>>)|(?:<<)|(?:>=)|(?:<=)|(?:\*=)|(?: IS ))\s*('((?:[\s\S]+)?)'|(?:[\s\S]+)?))";
322-
// '----------------- group 1 --------------------' '--------------------- group 2 ---------------------------' | '-- group 4--' |
323-
// match any of: = != >> << >= <= *= " IS " '----------- group 3 ---------'
324-
// match **ANY THING** without(4)
325-
// or with(3) single quote
326-
boost::regex DELETE_HANDLER_REGEX(CONDITION_STR_PATTERN);
315+
// .----------------- column-name --------------------------. .----------------------- operator ------------------------. .------------ value ----------.
316+
R"(([_a-zA-Z@0-9\s/\p{L}][.a-zA-Z0-9_+-/?@#$%^&*"\s,:\p{L}]*)\s*((?:=)|(?:!=)|(?:>>)|(?:<<)|(?:>=)|(?:<=)|(?:\*=)|(?: IS ))\s*('((?:[\s\S]+)?)'|(?:[\s\S]+)?))";
317+
// '----------------- group 1 ------------------------------' '--------------------- group 2 ---------------------------' | '-- group 4--' |
318+
// match any of: = != >> << >= <= *= " IS " '----------- group 3 ---------'
319+
// match **ANY THING** without(4)
320+
// or with(3) single quote
327321
// clang-format on
322+
RE2 DELETE_HANDLER_REGEX(CONDITION_STR_PATTERN);
328323

329324
Status DeleteHandler::parse_condition(const std::string& condition_str, TCondition* condition) {
330-
bool matched = false;
331-
boost::smatch what;
332-
try {
333-
VLOG_NOTICE << "condition_str: " << condition_str;
334-
matched = boost::regex_match(condition_str, what, DELETE_HANDLER_REGEX) &&
335-
condition_str.size() == what[0].str().size(); // exact match
336-
} catch (boost::regex_error& e) {
337-
VLOG_NOTICE << "fail to parse expr. [expr=" << condition_str << "; error=" << e.what()
338-
<< "]";
339-
}
325+
std::string col_name, op, value, g4;
326+
327+
bool matched = RE2::FullMatch(condition_str, DELETE_HANDLER_REGEX, &col_name, &op, &value,
328+
&g4); // exact match
329+
340330
if (!matched) {
341-
return Status::Error<ErrorCode::INVALID_ARGUMENT>("fail to sub condition. condition={}",
342-
condition_str);
331+
return Status::InvalidArgument("fail to sub condition. condition={}", condition_str);
343332
}
344333

345-
condition->column_name = what[1].str();
346-
condition->condition_op = what[2].str() == " IS " ? "IS" : what[2].str();
334+
condition->column_name = col_name;
335+
condition->condition_op = op == " IS " ? "IS" : op;
347336
// match string with single quotes, a = b or a = 'b'
348-
condition->condition_values.push_back(what[3 + !!what[4].matched].str());
337+
if (!g4.empty()) {
338+
condition->condition_values.push_back(g4);
339+
} else {
340+
condition->condition_values.push_back(value);
341+
}
349342
VLOG_NOTICE << "parsed condition_str: col_name={" << condition->column_name << "} op={"
350343
<< condition->condition_op << "} val={" << condition->condition_values.back()
351344
<< "}";

be/test/olap/delete_handler_test.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -1225,6 +1225,10 @@ TEST_F(TestDeleteHandler, TestParseDeleteCondition) {
12251225
{R"(a IS b IS NOT NULL)", true, gen_cond(R"(a IS b)", "IS", R"(NOT NULL)" )}, // test " IS " in column name
12261226
{R"(_a-zA-Z@0-9 /.a-zA-Z0-9_+-/?@#$%^&*" ,:=hell)", true, gen_cond(R"(_a-zA-Z@0-9 /.a-zA-Z0-9_+-/?@#$%^&*" ,:)", "=", R"(hell)")}, // hellbound column name
12271227
{R"(this is a col very loooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooon colum name=long)", true, gen_cond(R"(this is a col very loooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooon colum name)", "=", R"(long)")}, // test " IS " in column name
1228+
{R"(中文列名1=b)" , true, gen_cond(R"(中文列名1)", "=" , R"(b)" )}, // Chinese case
1229+
{R"(错!!误!=b)" , false, gen_cond(R"(abc)" , "!=", R"(b)" )}, // illegal character
1230+
{R"(##错误<=b)" , false, gen_cond(R"(abc)" , "<=", R"(b)" )}, // illegal prefix
1231+
{R"(κάνεις지내세요>>b)" , true, gen_cond(R"(κάνεις지내세요)", ">>", R"(b)" )}, // other languages
12281232
};
12291233
for (auto& i : test_input) { test(i); }
12301234
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
-- This file is automatically generated. You should know what you did if you want to edit this
2+
-- !sql1 --
3+
2020-12-12 1 1 1
4+
5+
-- !sql2 --
6+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
suite("test_delete_unicode") {
19+
sql "set enable_unicode_name_support=true;"
20+
21+
sql """
22+
CREATE TABLE `table_7298276` (
23+
`中文列名1` date NOT NULL,
24+
`中文列名2` int NOT NULL,
25+
`中文列名3` bigint NOT NULL,
26+
`中文列名4` largeint NOT NULL,
27+
INDEX 中文列名2 (`中文列名2`) USING INVERTED,
28+
INDEX 中文列名4 (`中文列名4`) USING INVERTED
29+
) ENGINE=OLAP
30+
DUPLICATE KEY(`中文列名1`, `中文列名2`, `中文列名3`)
31+
DISTRIBUTED BY HASH(`中文列名1`, `中文列名2`, `中文列名3`) BUCKETS 4
32+
properties("replication_num" = "1");
33+
"""
34+
35+
sql """ insert into table_7298276 values ('2020-12-12',1,1,1);"""
36+
qt_sql1 "select * from table_7298276;"
37+
sql "delete from table_7298276 where 中文列名1 > '2012-08-17' and 中文列名2 > -68 and 中文列名3 in (1,2,3);"
38+
qt_sql2 "select * from table_7298276;"
39+
}

0 commit comments

Comments
 (0)