Skip to content

Commit e9d9314

Browse files
authored
make "dec" and ryu functions faster and simpler (#51273)
We had some common code in `Ryu.append_c_digits` that can be combined with Base logic for the same thing. But it turns out all of this duplicated code in Ryu seems to just make it run slightly slower in most cases. The old version had many more branches to check, even though often numbers are small, so only the last check is meaningful. But the assumption that it would be faster even if all of them were used also seems to not hold up in practice. Particularly for a function like `append_nine_digits` which unrolls completely, but the complicated version has slightly more data dependencies because of they way it is written. Similarly, we replace `unsafe_copy` with `@inbounds[]`, since this is better for the optimizer, which doesn't need to treat this operation as an unknown reference escape. Lastly, we use the append_nine_digits trick from Ryu to make printing of arbitrary big numbers much faster. ``` julia> @Btime string(typemax(Int128)) 402.345 ns (2 allocations: 120 bytes) # before 151.139 ns (2 allocations: 120 bytes) # after ```
1 parent 377f9df commit e9d9314

File tree

5 files changed

+200
-285
lines changed

5 files changed

+200
-285
lines changed

base/intfuncs.jl

+67-19
Original file line numberDiff line numberDiff line change
@@ -558,7 +558,7 @@ function bit_ndigits0z(x::Base.BitUnsigned64)
558558
end
559559
function bit_ndigits0z(x::UInt128)
560560
n = 0
561-
while x > 0x8ac7230489e80000
561+
while x > 0x8ac7230489e80000 # 10e18
562562
x = div(x,0x8ac7230489e80000)
563563
n += 19
564564
end
@@ -724,7 +724,7 @@ function bin(x::Unsigned, pad::Int, neg::Bool)
724724
x >>= 0x1
725725
i -= 1
726726
end
727-
if neg; @inbounds a[1]=0x2d; end
727+
neg && (@inbounds a[1] = 0x2d) # UInt8('-')
728728
String(a)
729729
end
730730

@@ -738,29 +738,77 @@ function oct(x::Unsigned, pad::Int, neg::Bool)
738738
x >>= 0x3
739739
i -= 1
740740
end
741-
if neg; @inbounds a[1]=0x2d; end
741+
neg && (@inbounds a[1] = 0x2d) # UInt8('-')
742742
String(a)
743743
end
744744

745745
# 2-digit decimal characters ("00":"99")
746-
const _dec_d100 = UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99]
746+
const _dec_d100 = UInt16[
747+
# generating expression: UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99]
748+
# 0 0, 0 1, 0 2, 0 3, and so on in little-endian
749+
0x3030, 0x3130, 0x3230, 0x3330, 0x3430, 0x3530, 0x3630, 0x3730, 0x3830, 0x3930,
750+
0x3031, 0x3131, 0x3231, 0x3331, 0x3431, 0x3531, 0x3631, 0x3731, 0x3831, 0x3931,
751+
0x3032, 0x3132, 0x3232, 0x3332, 0x3432, 0x3532, 0x3632, 0x3732, 0x3832, 0x3932,
752+
0x3033, 0x3133, 0x3233, 0x3333, 0x3433, 0x3533, 0x3633, 0x3733, 0x3833, 0x3933,
753+
0x3034, 0x3134, 0x3234, 0x3334, 0x3434, 0x3534, 0x3634, 0x3734, 0x3834, 0x3934,
754+
0x3035, 0x3135, 0x3235, 0x3335, 0x3435, 0x3535, 0x3635, 0x3735, 0x3835, 0x3935,
755+
0x3036, 0x3136, 0x3236, 0x3336, 0x3436, 0x3536, 0x3636, 0x3736, 0x3836, 0x3936,
756+
0x3037, 0x3137, 0x3237, 0x3337, 0x3437, 0x3537, 0x3637, 0x3737, 0x3837, 0x3937,
757+
0x3038, 0x3138, 0x3238, 0x3338, 0x3438, 0x3538, 0x3638, 0x3738, 0x3838, 0x3938,
758+
0x3039, 0x3139, 0x3239, 0x3339, 0x3439, 0x3539, 0x3639, 0x3739, 0x3839, 0x3939
759+
]
747760

748-
function dec(x::Unsigned, pad::Int, neg::Bool)
749-
n = neg + ndigits(x, pad=pad)
750-
a = StringVector(n)
751-
i = n
752-
@inbounds while i >= 2
753-
d, r = divrem(x, 0x64)
754-
d100 = _dec_d100[(r % Int)::Int + 1]
755-
a[i-1] = d100 % UInt8
756-
a[i] = (d100 >> 0x8) % UInt8
757-
x = oftype(x, d)
761+
function append_c_digits(olength::Int, digits::Unsigned, buf, pos::Int)
762+
i = olength
763+
while i >= 2
764+
d, c = divrem(digits, 0x64)
765+
digits = oftype(digits, d)
766+
@inbounds d100 = _dec_d100[(c % Int) + 1]
767+
@inbounds buf[pos + i - 2] = d100 % UInt8
768+
@inbounds buf[pos + i - 1] = (d100 >> 0x8) % UInt8
758769
i -= 2
759770
end
760-
if i > neg
761-
@inbounds a[i] = 0x30 + (rem(x, 0xa) % UInt8)::UInt8
771+
if i == 1
772+
@inbounds buf[pos] = UInt8('0') + rem(digits, 0xa) % UInt8
773+
i -= 1
762774
end
763-
if neg; @inbounds a[1]=0x2d; end
775+
return pos + olength
776+
end
777+
778+
function append_nine_digits(digits::Unsigned, buf, pos::Int)
779+
if digits == 0
780+
for _ = 1:9
781+
@inbounds buf[pos] = UInt8('0')
782+
pos += 1
783+
end
784+
return pos
785+
end
786+
return @inline append_c_digits(9, digits, buf, pos) # force loop-unrolling on the length
787+
end
788+
789+
function append_c_digits_fast(olength::Int, digits::Unsigned, buf, pos::Int)
790+
i = olength
791+
# n.b. olength may be larger than required to print all of `digits` (and will be padded
792+
# with zeros), but the printed number will be undefined if it is smaller, and may include
793+
# bits of both the high and low bytes.
794+
maxpow10 = 0x3b9aca00 # 10e9 as UInt32
795+
while i > 9 && digits > typemax(UInt)
796+
# do everything in cheap math chunks, using the processor's native math size
797+
d, c = divrem(digits, maxpow10)
798+
digits = oftype(digits, d)
799+
append_nine_digits(c % UInt32, buf, pos + i - 9)
800+
i -= 9
801+
end
802+
append_c_digits(i, digits % UInt, buf, pos)
803+
return pos + olength
804+
end
805+
806+
807+
function dec(x::Unsigned, pad::Int, neg::Bool)
808+
n = neg + ndigits(x, pad=pad)
809+
a = StringVector(n)
810+
append_c_digits_fast(n, x, a, 1)
811+
neg && (@inbounds a[1] = 0x2d) # UInt8('-')
764812
String(a)
765813
end
766814

@@ -781,7 +829,7 @@ function hex(x::Unsigned, pad::Int, neg::Bool)
781829
d = (x % UInt8)::UInt8 & 0xf
782830
@inbounds a[i] = d + ifelse(d > 0x9, 0x57, 0x30)
783831
end
784-
if neg; @inbounds a[1]=0x2d; end
832+
neg && (@inbounds a[1] = 0x2d) # UInt8('-')
785833
String(a)
786834
end
787835

@@ -806,7 +854,7 @@ function _base(base::Integer, x::Integer, pad::Int, neg::Bool)
806854
end
807855
i -= 1
808856
end
809-
if neg; @inbounds a[1]=0x2d; end
857+
neg && (@inbounds a[1] = 0x2d) # UInt8('-')
810858
String(a)
811859
end
812860

base/ryu/exp.jl

+37-33
Original file line numberDiff line numberDiff line change
@@ -8,33 +8,33 @@ function writeexp(buf, pos, v::T,
88

99
# special cases
1010
if x == 0
11-
buf[pos] = UInt8('0')
11+
@inbounds buf[pos] = UInt8('0')
1212
pos += 1
1313
if precision > 0 && !trimtrailingzeros
14-
buf[pos] = decchar
14+
@inbounds buf[pos] = decchar
1515
pos += 1
1616
for _ = 1:precision
17-
buf[pos] = UInt8('0')
17+
@inbounds buf[pos] = UInt8('0')
1818
pos += 1
1919
end
2020
elseif hash
21-
buf[pos] = decchar
21+
@inbounds buf[pos] = decchar
2222
pos += 1
2323
end
24-
buf[pos] = expchar
25-
buf[pos + 1] = UInt8('+')
26-
buf[pos + 2] = UInt8('0')
27-
buf[pos + 3] = UInt8('0')
24+
@inbounds buf[pos] = expchar
25+
@inbounds buf[pos + 1] = UInt8('+')
26+
@inbounds buf[pos + 2] = UInt8('0')
27+
@inbounds buf[pos + 3] = UInt8('0')
2828
return pos + 4
2929
elseif isnan(x)
30-
buf[pos] = UInt8('N')
31-
buf[pos + 1] = UInt8('a')
32-
buf[pos + 2] = UInt8('N')
30+
@inbounds buf[pos] = UInt8('N')
31+
@inbounds buf[pos + 1] = UInt8('a')
32+
@inbounds buf[pos + 2] = UInt8('N')
3333
return pos + 3
3434
elseif !isfinite(x)
35-
buf[pos] = UInt8('I')
36-
buf[pos + 1] = UInt8('n')
37-
buf[pos + 2] = UInt8('f')
35+
@inbounds buf[pos] = UInt8('I')
36+
@inbounds buf[pos + 1] = UInt8('n')
37+
@inbounds buf[pos + 2] = UInt8('f')
3838
return pos + 3
3939
end
4040

@@ -80,10 +80,10 @@ function writeexp(buf, pos, v::T,
8080
if precision > 1
8181
pos = append_d_digits(availableDigits, digits, buf, pos, decchar)
8282
else
83-
buf[pos] = UInt8('0') + digits
83+
@inbounds buf[pos] = UInt8('0') + digits
8484
pos += 1
8585
if hash
86-
buf[pos] = decchar
86+
@inbounds buf[pos] = decchar
8787
pos += 1
8888
end
8989
end
@@ -121,10 +121,10 @@ function writeexp(buf, pos, v::T,
121121
if precision > 1
122122
pos = append_d_digits(availableDigits, digits, buf, pos, decchar)
123123
else
124-
buf[pos] = UInt8('0') + digits
124+
@inbounds buf[pos] = UInt8('0') + digits
125125
pos += 1
126126
if hash
127-
buf[pos] = decchar
127+
@inbounds buf[pos] = decchar
128128
pos += 1
129129
end
130130
end
@@ -162,7 +162,7 @@ function writeexp(buf, pos, v::T,
162162
if printedDigits != 0
163163
if digits == 0
164164
for _ = 1:maximum
165-
buf[pos] = UInt8('0')
165+
@inbounds buf[pos] = UInt8('0')
166166
pos += 1
167167
end
168168
else
@@ -172,10 +172,10 @@ function writeexp(buf, pos, v::T,
172172
if precision > 1
173173
pos = append_d_digits(maximum, digits, buf, pos, decchar)
174174
else
175-
buf[pos] = UInt8('0') + digits
175+
@inbounds buf[pos] = UInt8('0') + digits
176176
pos += 1
177177
if hash
178-
buf[pos] = decchar
178+
@inbounds buf[pos] = decchar
179179
pos += 1
180180
end
181181
end
@@ -184,52 +184,56 @@ function writeexp(buf, pos, v::T,
184184
roundPos = pos
185185
while true
186186
roundPos -= 1
187-
if roundPos == (startpos - 1) || buf[roundPos] == UInt8('-') || (plus && buf[roundPos] == UInt8('+')) || (space && buf[roundPos] == UInt8(' '))
188-
buf[roundPos + 1] = UInt8('1')
187+
if roundPos == (startpos - 1) || (@inbounds buf[roundPos]) == UInt8('-') || (plus && (@inbounds buf[roundPos]) == UInt8('+')) || (space && (@inbounds buf[roundPos]) == UInt8(' '))
188+
@inbounds buf[roundPos + 1] = UInt8('1')
189189
e += 1
190190
break
191191
end
192-
c = roundPos > 0 ? buf[roundPos] : 0x00
192+
c = roundPos > 0 ? (@inbounds buf[roundPos]) : 0x00
193193
if c == decchar
194194
continue
195195
elseif c == UInt8('9')
196-
buf[roundPos] = UInt8('0')
196+
@inbounds buf[roundPos] = UInt8('0')
197197
roundUp = 1
198198
continue
199199
else
200200
if roundUp == 2 && UInt8(c) % 2 == 0
201201
break
202202
end
203-
buf[roundPos] = c + 1
203+
@inbounds buf[roundPos] = c + 1
204204
break
205205
end
206206
end
207207
end
208208
if trimtrailingzeros
209-
while buf[pos - 1] == UInt8('0')
209+
while @inbounds buf[pos - 1] == UInt8('0')
210210
pos -= 1
211211
end
212-
if buf[pos - 1] == decchar && !hash
212+
if @inbounds buf[pos - 1] == decchar && !hash
213213
pos -= 1
214214
end
215215
end
216216
buf[pos] = expchar
217217
pos += 1
218218
if e < 0
219-
buf[pos] = UInt8('-')
219+
@inbounds buf[pos] = UInt8('-')
220220
pos += 1
221221
e = -e
222222
else
223-
buf[pos] = UInt8('+')
223+
@inbounds buf[pos] = UInt8('+')
224224
pos += 1
225225
end
226226
if e >= 100
227227
c = e % 10
228-
unsafe_copyto!(buf, pos, DIGIT_TABLE, 2 * div(e, 10) + 1, 2)
229-
buf[pos + 2] = UInt8('0') + c
228+
@inbounds d100 = DIGIT_TABLE16[div(e, 10) + 1]
229+
@inbounds buf[pos] = d100 % UInt8
230+
@inbounds buf[pos + 1] = (d100 >> 0x8) % UInt8
231+
@inbounds buf[pos + 2] = UInt8('0') + c
230232
pos += 3
231233
else
232-
unsafe_copyto!(buf, pos, DIGIT_TABLE, 2 * e + 1, 2)
234+
@inbounds d100 = DIGIT_TABLE16[e + 1]
235+
@inbounds buf[pos] = d100 % UInt8
236+
@inbounds buf[pos + 1] = (d100 >> 0x8) % UInt8
233237
pos += 2
234238
end
235239
return pos

base/ryu/fixed.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ function writefixed(buf, pos, v::T,
5959
pos = append_nine_digits(digits, buf, pos)
6060
elseif digits != 0
6161
olength = decimallength(digits)
62-
pos = append_n_digits(olength, digits, buf, pos)
62+
pos = append_c_digits(olength, digits, buf, pos)
6363
nonzero = true
6464
end
6565
i -= 1

0 commit comments

Comments
 (0)