JuliaLang
diff --git a/‎base/intfuncs.jl
+67-19 b/‎base/intfuncs.jl
+67-19
diff --git a/‎base/ryu/exp.jl
+37-33 b/‎base/ryu/exp.jl
+37-33
diff --git a/‎base/ryu/fixed.jl
+1-1 b/‎base/ryu/fixed.jl
+1-1
@@ -558,7 +558,7 @@ function bit_ndigits0z(x::Base.BitUnsigned64)
 end
 function bit_ndigits0z(x::UInt128)
     n = 0
-    while x > 0x8ac7230489e80000
+    while x > 0x8ac7230489e80000 # 10e18
         x = div(x,0x8ac7230489e80000)
         n += 19
     end
@@ -724,7 +724,7 @@ function bin(x::Unsigned, pad::Int, neg::Bool)
         x >>= 0x1
         i -= 1
     end
-    if neg; @inbounds a[1]=0x2d; end
+    neg && (@inbounds a[1] = 0x2d) # UInt8('-')
     String(a)
 end
 
@@ -738,29 +738,77 @@ function oct(x::Unsigned, pad::Int, neg::Bool)
         x >>= 0x3
         i -= 1
     end
-    if neg; @inbounds a[1]=0x2d; end
+    neg && (@inbounds a[1] = 0x2d) # UInt8('-')
     String(a)
 end
 
 # 2-digit decimal characters ("00":"99")
-const _dec_d100 = UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99]
+const _dec_d100 = UInt16[
+# generating expression: UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99]
+#    0 0,    0 1,    0 2,    0 3, and so on in little-endian
+  0x3030, 0x3130, 0x3230, 0x3330, 0x3430, 0x3530, 0x3630, 0x3730, 0x3830, 0x3930,
+  0x3031, 0x3131, 0x3231, 0x3331, 0x3431, 0x3531, 0x3631, 0x3731, 0x3831, 0x3931,
+  0x3032, 0x3132, 0x3232, 0x3332, 0x3432, 0x3532, 0x3632, 0x3732, 0x3832, 0x3932,
+  0x3033, 0x3133, 0x3233, 0x3333, 0x3433, 0x3533, 0x3633, 0x3733, 0x3833, 0x3933,
+  0x3034, 0x3134, 0x3234, 0x3334, 0x3434, 0x3534, 0x3634, 0x3734, 0x3834, 0x3934,
+  0x3035, 0x3135, 0x3235, 0x3335, 0x3435, 0x3535, 0x3635, 0x3735, 0x3835, 0x3935,
+  0x3036, 0x3136, 0x3236, 0x3336, 0x3436, 0x3536, 0x3636, 0x3736, 0x3836, 0x3936,
+  0x3037, 0x3137, 0x3237, 0x3337, 0x3437, 0x3537, 0x3637, 0x3737, 0x3837, 0x3937,
+  0x3038, 0x3138, 0x3238, 0x3338, 0x3438, 0x3538, 0x3638, 0x3738, 0x3838, 0x3938,
+  0x3039, 0x3139, 0x3239, 0x3339, 0x3439, 0x3539, 0x3639, 0x3739, 0x3839, 0x3939
+]
 
-function dec(x::Unsigned, pad::Int, neg::Bool)
-    n = neg + ndigits(x, pad=pad)
-    a = StringVector(n)
-    i = n
-    @inbounds while i >= 2
-        d, r = divrem(x, 0x64)
-        d100 = _dec_d100[(r % Int)::Int + 1]
-        a[i-1] = d100 % UInt8
-        a[i] = (d100 >> 0x8) % UInt8
-        x = oftype(x, d)
+function append_c_digits(olength::Int, digits::Unsigned, buf, pos::Int)
+    i = olength
+    while i >= 2
+        d, c = divrem(digits, 0x64)
+        digits = oftype(digits, d)
+        @inbounds d100 = _dec_d100[(c % Int) + 1]
+        @inbounds buf[pos + i - 2] = d100 % UInt8
+        @inbounds buf[pos + i - 1] = (d100 >> 0x8) % UInt8
         i -= 2
     end
-    if i > neg
-        @inbounds a[i] = 0x30 + (rem(x, 0xa) % UInt8)::UInt8
+    if i == 1
+        @inbounds buf[pos] = UInt8('0') + rem(digits, 0xa) % UInt8
+        i -= 1
     end
-    if neg; @inbounds a[1]=0x2d; end
+    return pos + olength
+end
+
+function append_nine_digits(digits::Unsigned, buf, pos::Int)
+    if digits == 0
+        for _ = 1:9
+            @inbounds buf[pos] = UInt8('0')
+            pos += 1
+        end
+        return pos
+    end
+    return @inline append_c_digits(9, digits, buf, pos) # force loop-unrolling on the length
+end
+
+function append_c_digits_fast(olength::Int, digits::Unsigned, buf, pos::Int)
+    i = olength
+    # n.b. olength may be larger than required to print all of `digits` (and will be padded
+    # with zeros), but the printed number will be undefined if it is smaller, and may include
+    # bits of both the high and low bytes.
+    maxpow10 = 0x3b9aca00 # 10e9 as UInt32
+    while i > 9 && digits > typemax(UInt)
+        # do everything in cheap math chunks, using the processor's native math size
+        d, c = divrem(digits, maxpow10)
+        digits = oftype(digits, d)
+        append_nine_digits(c % UInt32, buf, pos + i - 9)
+        i -= 9
+    end
+    append_c_digits(i, digits % UInt, buf, pos)
+    return pos + olength
+end
+
+
+function dec(x::Unsigned, pad::Int, neg::Bool)
+    n = neg + ndigits(x, pad=pad)
+    a = StringVector(n)
+    append_c_digits_fast(n, x, a, 1)
+    neg && (@inbounds a[1] = 0x2d) # UInt8('-')
     String(a)
 end
 
@@ -781,7 +829,7 @@ function hex(x::Unsigned, pad::Int, neg::Bool)
         d = (x % UInt8)::UInt8 & 0xf
         @inbounds a[i] = d + ifelse(d > 0x9, 0x57, 0x30)
     end
-    if neg; @inbounds a[1]=0x2d; end
+    neg && (@inbounds a[1] = 0x2d) # UInt8('-')
     String(a)
 end
 
@@ -806,7 +854,7 @@ function _base(base::Integer, x::Integer, pad::Int, neg::Bool)
         end
         i -= 1
     end
-    if neg; @inbounds a[1]=0x2d; end
+    neg && (@inbounds a[1] = 0x2d) # UInt8('-')
     String(a)
 end
 
 
@@ -8,33 +8,33 @@ function writeexp(buf, pos, v::T,
 
     # special cases
     if x == 0
-        buf[pos] = UInt8('0')
+        @inbounds buf[pos] = UInt8('0')
         pos += 1
         if precision > 0 && !trimtrailingzeros
-            buf[pos] = decchar
+            @inbounds buf[pos] = decchar
             pos += 1
             for _ = 1:precision
-                buf[pos] = UInt8('0')
+                @inbounds buf[pos] = UInt8('0')
                 pos += 1
             end
         elseif hash
-            buf[pos] = decchar
+            @inbounds buf[pos] = decchar
             pos += 1
         end
-        buf[pos] = expchar
-        buf[pos + 1] = UInt8('+')
-        buf[pos + 2] = UInt8('0')
-        buf[pos + 3] = UInt8('0')
+        @inbounds buf[pos] = expchar
+        @inbounds buf[pos + 1] = UInt8('+')
+        @inbounds buf[pos + 2] = UInt8('0')
+        @inbounds buf[pos + 3] = UInt8('0')
         return pos + 4
     elseif isnan(x)
-        buf[pos] = UInt8('N')
-        buf[pos + 1] = UInt8('a')
-        buf[pos + 2] = UInt8('N')
+        @inbounds buf[pos] = UInt8('N')
+        @inbounds buf[pos + 1] = UInt8('a')
+        @inbounds buf[pos + 2] = UInt8('N')
         return pos + 3
     elseif !isfinite(x)
-        buf[pos] = UInt8('I')
-        buf[pos + 1] = UInt8('n')
-        buf[pos + 2] = UInt8('f')
+        @inbounds buf[pos] = UInt8('I')
+        @inbounds buf[pos + 1] = UInt8('n')
+        @inbounds buf[pos + 2] = UInt8('f')
         return pos + 3
     end
 
@@ -80,10 +80,10 @@ function writeexp(buf, pos, v::T,
                 if precision > 1
                     pos = append_d_digits(availableDigits, digits, buf, pos, decchar)
                 else
-                    buf[pos] = UInt8('0') + digits
+                    @inbounds buf[pos] = UInt8('0') + digits
                     pos += 1
                     if hash
-                        buf[pos] = decchar
+                        @inbounds buf[pos] = decchar
                         pos += 1
                     end
                 end
@@ -121,10 +121,10 @@ function writeexp(buf, pos, v::T,
                 if precision > 1
                     pos = append_d_digits(availableDigits, digits, buf, pos, decchar)
                 else
-                    buf[pos] = UInt8('0') + digits
+                    @inbounds buf[pos] = UInt8('0') + digits
                     pos += 1
                     if hash
-                        buf[pos] = decchar
+                        @inbounds buf[pos] = decchar
                         pos += 1
                     end
                 end
@@ -162,7 +162,7 @@ function writeexp(buf, pos, v::T,
     if printedDigits != 0
         if digits == 0
             for _ = 1:maximum
-                buf[pos] = UInt8('0')
+                @inbounds buf[pos] = UInt8('0')
                 pos += 1
             end
         else
@@ -172,10 +172,10 @@ function writeexp(buf, pos, v::T,
         if precision > 1
             pos = append_d_digits(maximum, digits, buf, pos, decchar)
         else
-            buf[pos] = UInt8('0') + digits
+            @inbounds buf[pos] = UInt8('0') + digits
             pos += 1
             if hash
-                buf[pos] = decchar
+                @inbounds buf[pos] = decchar
                 pos += 1
             end
         end
@@ -184,52 +184,56 @@ function writeexp(buf, pos, v::T,
         roundPos = pos
         while true
             roundPos -= 1
-            if roundPos == (startpos - 1) || buf[roundPos] == UInt8('-') || (plus && buf[roundPos] == UInt8('+')) || (space && buf[roundPos] == UInt8(' '))
-                buf[roundPos + 1] = UInt8('1')
+            if roundPos == (startpos - 1) || (@inbounds buf[roundPos]) == UInt8('-') || (plus && (@inbounds buf[roundPos]) == UInt8('+')) || (space && (@inbounds buf[roundPos]) == UInt8(' '))
+                @inbounds buf[roundPos + 1] = UInt8('1')
                 e += 1
                 break
             end
-            c = roundPos > 0 ? buf[roundPos] : 0x00
+            c = roundPos > 0 ? (@inbounds buf[roundPos]) : 0x00
             if c == decchar
                 continue
             elseif c == UInt8('9')
-                buf[roundPos] = UInt8('0')
+                @inbounds buf[roundPos] = UInt8('0')
                 roundUp = 1
                 continue
             else
                 if roundUp == 2 && UInt8(c) % 2 == 0
                     break
                 end
-                buf[roundPos] = c + 1
+                @inbounds buf[roundPos] = c + 1
                 break
             end
         end
     end
     if trimtrailingzeros
-        while buf[pos - 1] == UInt8('0')
+        while @inbounds buf[pos - 1] == UInt8('0')
             pos -= 1
         end
-        if buf[pos - 1] == decchar && !hash
+        if @inbounds buf[pos - 1] == decchar && !hash
             pos -= 1
         end
     end
     buf[pos] = expchar
     pos += 1
     if e < 0
-        buf[pos] = UInt8('-')
+        @inbounds buf[pos] = UInt8('-')
         pos += 1
         e = -e
     else
-        buf[pos] = UInt8('+')
+        @inbounds buf[pos] = UInt8('+')
         pos += 1
     end
     if e >= 100
         c = e % 10
-        unsafe_copyto!(buf, pos, DIGIT_TABLE, 2 * div(e, 10) + 1, 2)
-        buf[pos + 2] = UInt8('0') + c
+        @inbounds d100 = DIGIT_TABLE16[div(e, 10) + 1]
+        @inbounds buf[pos] = d100 % UInt8
+        @inbounds buf[pos + 1] = (d100 >> 0x8) % UInt8
+        @inbounds buf[pos + 2] = UInt8('0') + c
         pos += 3
     else
-        unsafe_copyto!(buf, pos, DIGIT_TABLE, 2 * e + 1, 2)
+        @inbounds d100 = DIGIT_TABLE16[e + 1]
+        @inbounds buf[pos] = d100 % UInt8
+        @inbounds buf[pos + 1] = (d100 >> 0x8) % UInt8
         pos += 2
     end
     return pos
 
@@ -59,7 +59,7 @@ function writefixed(buf, pos, v::T,
                 pos = append_nine_digits(digits, buf, pos)
             elseif digits != 0
                 olength = decimallength(digits)
-                pos = append_n_digits(olength, digits, buf, pos)
+                pos = append_c_digits(olength, digits, buf, pos)
                 nonzero = true
             end
             i -= 1