Skip to content

Commit d129a0e

Browse files
committed
util: Replace use of locale dependent atoi(…) with locale-independent std::from_chars(…) (C++17)
1 parent 4559d99 commit d129a0e

File tree

6 files changed

+108
-28
lines changed

6 files changed

+108
-28
lines changed

src/test/script_tests.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ CScript ParseScript(string s)
5959
(starts_with(w, "-") && all(string(w.begin()+1, w.end()), ::IsDigit)))
6060
{
6161
// Number
62-
int64_t n = atoi64(w);
62+
int64_t n = LocaleIndependentAtoi<int64_t>(w);
6363
result << n;
6464
}
6565
else if (starts_with(w, "0x") && IsHex(string(w.begin()+2, w.end())))

src/test/util_tests.cpp

+71
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,77 @@ BOOST_AUTO_TEST_CASE(test_IsDigit)
544544
BOOST_CHECK_EQUAL(IsDigit(9), false);
545545
}
546546

547+
BOOST_AUTO_TEST_CASE(test_LocaleIndependentAtoi)
548+
{
549+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("1234"), 1'234);
550+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("0"), 0);
551+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("01234"), 1'234);
552+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("-1234"), -1'234);
553+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>(" 1"), 1);
554+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("1 "), 1);
555+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("1a"), 1);
556+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("1.1"), 1);
557+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("1.9"), 1);
558+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("+01.9"), 1);
559+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("-1"), -1);
560+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>(" -1"), -1);
561+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("-1 "), -1);
562+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>(" -1 "), -1);
563+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("+1"), 1);
564+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>(" +1"), 1);
565+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>(" +1 "), 1);
566+
567+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("+-1"), 0);
568+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("-+1"), 0);
569+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("++1"), 0);
570+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("--1"), 0);
571+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>(""), 0);
572+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("aap"), 0);
573+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("0x1"), 0);
574+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("-32482348723847471234"), 0);
575+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("32482348723847471234"), 0);
576+
577+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int64_t>("-9223372036854775809"), 0);
578+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int64_t>("-9223372036854775808"), -9'223'372'036'854'775'807LL - 1LL);
579+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int64_t>("9223372036854775807"), 9'223'372'036'854'775'807);
580+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int64_t>("9223372036854775808"), 0);
581+
582+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint64_t>("-1"), 0U);
583+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint64_t>("0"), 0U);
584+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint64_t>("18446744073709551615"), 18'446'744'073'709'551'615ULL);
585+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint64_t>("18446744073709551616"), 0U);
586+
587+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("-2147483649"), 0);
588+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("-2147483648"), -2'147'483'648LL);
589+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("2147483647"), 2'147'483'647);
590+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("2147483648"), 0);
591+
592+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint32_t>("-1"), 0U);
593+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint32_t>("0"), 0U);
594+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint32_t>("4294967295"), 4'294'967'295U);
595+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint32_t>("4294967296"), 0U);
596+
597+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int16_t>("-32769"), 0);
598+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int16_t>("-32768"), -32'768);
599+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int16_t>("32767"), 32'767);
600+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int16_t>("32768"), 0);
601+
602+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint16_t>("-1"), 0U);
603+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint16_t>("0"), 0U);
604+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint16_t>("65535"), 65'535U);
605+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint16_t>("65536"), 0U);
606+
607+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int8_t>("-129"), 0);
608+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int8_t>("-128"), -128);
609+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int8_t>("127"), 127);
610+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int8_t>("128"), 0);
611+
612+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint8_t>("-1"), 0U);
613+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint8_t>("0"), 0U);
614+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint8_t>("255"), 255U);
615+
BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint8_t>("256"), 0U);
616+
}
617+
547618
BOOST_AUTO_TEST_CASE(test_ParseInt32)
548619
{
549620
int32_t n;

src/util/strencodings.cpp

-14
Original file line numberDiff line numberDiff line change
@@ -440,20 +440,6 @@ std::string FormatParagraph(const std::string& in, size_t width, size_t indent)
440440
return out.str();
441441
}
442442

443-
int64_t atoi64(const std::string& str)
444-
{
445-
#ifdef _MSC_VER
446-
return _atoi64(str.c_str());
447-
#else
448-
return strtoll(str.c_str(), nullptr, 10);
449-
#endif
450-
}
451-
452-
int atoi(const std::string& str)
453-
{
454-
return atoi(str.c_str());
455-
}
456-
457443
/** Upper bound for mantissa.
458444
* 10^18-1 is the largest arbitrary decimal that will fit in a signed 64-bit integer.
459445
* Larger integers cannot consist of arbitrary combinations of 0-9:

src/util/strencodings.h

+29-2
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@
1111

1212
#include <attributes.h>
1313
#include <span.h>
14+
#include <util/string.h>
1415

1516
#include <cassert>
17+
#include <charconv>
1618
#include <cstdint>
1719
#include <iterator>
1820
#include <string>
@@ -55,8 +57,33 @@ std::string EncodeBase32(const unsigned char* pch, size_t len);
5557
std::string EncodeBase32(const std::string& str);
5658

5759
void SplitHostPort(std::string in, int &portOut, std::string &hostOut);
58-
int64_t atoi64(const std::string& str);
59-
int atoi(const std::string& str);
60+
61+
// LocaleIndependentAtoi is provided for backwards compatibility reasons.
62+
//
63+
// New code should use the ParseInt64/ParseUInt64/ParseInt32/ParseUInt32 functions
64+
// which provide parse error feedback.
65+
//
66+
// The goal of LocaleIndependentAtoi is to replicate the exact defined behaviour
67+
// of atoi and atoi64 as they behave under the "C" locale.
68+
template <typename T>
69+
T LocaleIndependentAtoi(const std::string& str)
70+
{
71+
static_assert(std::is_integral<T>::value);
72+
T result;
73+
// Emulate atoi(...) handling of white space and leading +/-.
74+
std::string s = TrimString(str);
75+
if (!s.empty() && s[0] == '+') {
76+
if (s.length() >= 2 && s[1] == '-') {
77+
return 0;
78+
}
79+
s = s.substr(1);
80+
}
81+
auto [_, error_condition] = std::from_chars(s.data(), s.data() + s.size(), result);
82+
if (error_condition != std::errc{}) {
83+
return 0;
84+
}
85+
return result;
86+
}
6087

6188
/**
6289
* Tests if the given character is a decimal digit.

src/util/system.cpp

+7-9
Original file line numberDiff line numberDiff line change
@@ -54,16 +54,14 @@ ArgsManager gArgs;
5454
/**
5555
* Interpret a string argument as a boolean.
5656
*
57-
* The definition of atoi() requires that non-numeric string values like "foo",
58-
* return 0. This means that if a user unintentionally supplies a non-integer
59-
* argument here, the return value is always false. This means that -foo=false
60-
* does what the user probably expects, but -foo=true is well defined but does
61-
* not do what they probably expected.
57+
* The definition of LocaleIndependentAtoi<int>() requires that non-numeric string values
58+
* like "foo", return 0. This means that if a user unintentionally supplies a
59+
* non-integer argument here, the return value is always false. This means that
60+
* -foo=false does what the user probably expects, but -foo=true is well defined
61+
* but does not do what they probably expected.
6262
*
63-
* The return value of atoi() is undefined when given input not representable as
64-
* an int. On most systems this means string value between "-2147483648" and
65-
* "2147483647" are well defined (this method will return true). Setting
66-
* -txindex=2147483648 on most systems, however, is probably undefined.
63+
* The return value of LocaleIndependentAtoi<int>(...) is zero when given input not
64+
* representable as an int.
6765
*
6866
* For a more extensive discussion of this topic (and a wide range of opinions
6967
* on the Right Way to change this code), see PR12713.

test/lint/lint-locale-dependence.sh

-2
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,8 @@
55

66
export LC_ALL=C
77
KNOWN_VIOLATIONS=(
8-
"src/util/strencodings.cpp:.*atoi"
98
"src/util/strencodings.cpp:.*strtol"
109
"src/util/strencodings.cpp:.*strtoul"
11-
"src/util/strencodings.h:.*atoi"
1210
"src/logging.h:.*strftime"
1311
"src/gridcoin/backup.cpp:.*strftime"
1412
"src/rpc/protocol.cpp:.*strftime"

0 commit comments

Comments
 (0)