Skip to content

Commit f94ab0a

Browse files
committed
titlecase: all non-letters are considered word-separators
1 parent 7032e46 commit f94ab0a

File tree

4 files changed

+34
-8
lines changed

4 files changed

+34
-8
lines changed

NEWS.md

+7-3
Original file line numberDiff line numberDiff line change
@@ -365,9 +365,13 @@ This section lists changes that do not have deprecation warnings.
365365
* `findn(x::AbstractVector)` now return a 1-tuple with the vector of indices, to be
366366
consistent with higher order arrays ([#25365]).
367367

368-
* the default behavior of `titlecase` is changed such that characters not starting
369-
a word are converted to lowercase; a new keyword argument `strict` is added which
370-
allows to get the old behavior when it's `false`.
368+
* the default behavior of `titlecase` is changed in two ways ([#23393]):
369+
+ characters not starting a word are converted to lowercase;
370+
a new keyword argument `strict` is added which
371+
allows to get the old behavior when it's `false`.
372+
+ any non-letter character is considered as a word separator;
373+
to get the old behavior (only "space" characters are considered as
374+
word separators), use the keyword `wordsep=isspace`.
371375

372376

373377
Library improvements

base/strings/unicode.jl

+22-3
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,19 @@ function isupper(c::Char)
384384
cat == UTF8PROC_CATEGORY_LU || cat == UTF8PROC_CATEGORY_LT
385385
end
386386

387+
"""
388+
iscased(c::Char) -> Bool
389+
390+
Tests whether a character is cased, i.e. is lower-, upper- or title-cased.
391+
"""
392+
function iscased(c::Char)
393+
cat = category_code(c)
394+
return cat == UTF8PROC_CATEGORY_LU ||
395+
cat == UTF8PROC_CATEGORY_LT ||
396+
cat == UTF8PROC_CATEGORY_LL
397+
end
398+
399+
387400
"""
388401
isdigit(c::Char) -> Bool
389402
@@ -649,11 +662,14 @@ julia> lowercase("STRINGS AND THINGS")
649662
lowercase(s::AbstractString) = map(lowercase, s)
650663

651664
"""
652-
titlecase(s::AbstractString; strict::Bool=true) -> String
665+
titlecase(s::AbstractString; [wordsep::Function], strict::Bool=true) -> String
653666
654667
Capitalize the first character of each word in `s`;
655668
if `strict` is true, every other character is
656669
converted to lowercase, otherwise they are left unchanged.
670+
By default, all non-letters are considered as word separators;
671+
a predicate can be passed as the `wordsep` keyword to determine
672+
which characters should be considered as word separators.
657673
See also [`ucfirst`](@ref) to capitalize only the first
658674
character in `s`.
659675
@@ -664,13 +680,16 @@ julia> titlecase("the JULIA programming language")
664680
665681
julia> titlecase("ISS - international space station", strict=false)
666682
"ISS - International Space Station"
683+
684+
julia> titlecase("a-a b-b", wordsep = c->c==' ')
685+
"A-a B-b"
667686
```
668687
"""
669-
function titlecase(s::AbstractString; strict::Bool=true)
688+
function titlecase(s::AbstractString; wordsep::Function = !iscased, strict::Bool=true)
670689
startword = true
671690
b = IOBuffer()
672691
for c in s
673-
if isspace(c)
692+
if wordsep(c)
674693
print(b, c)
675694
startword = true
676695
else

stdlib/Unicode/src/Unicode.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ module Unicode
77
using Base.Unicode: normalize, graphemes, isassigned, textwidth, isvalid,
88
islower, isupper, isalpha, isdigit, isxdigit, isnumeric, isalnum,
99
iscntrl, ispunct, isspace, isprint, isgraph,
10-
lowercase, uppercase, titlecase, lcfirst, ucfirst
10+
lowercase, uppercase, titlecase, lcfirst, ucfirst, iscased
1111

1212
export graphemes, textwidth, isvalid,
1313
islower, isupper, isalpha, isdigit, isxdigit, isnumeric, isalnum,

stdlib/Unicode/test/runtests.jl

+4-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
using Test
44
using Unicode
5-
using Unicode: normalize, isassigned
5+
using Unicode: normalize, isassigned, iscased
66

77
@testset "string normalization" begin
88
# normalize (Unicode normalization etc.):
@@ -371,6 +371,9 @@ end
371371
@test titlecase("aBc ABC", strict=false) == "ABc ABC"
372372
@test titlecase("abcD EFG\n\thij", strict=true) == "Abcd Efg\n\tHij"
373373
@test titlecase("abcD EFG\n\thij", strict=false) == "AbcD EFG\n\tHij"
374+
@test titlecase("abc-def") == "Abc-Def"
375+
@test titlecase("abc-def", wordsep = !iscased) == "Abc-Def"
376+
@test titlecase("abc-def", wordsep = isspace) == "Abc-def"
374377
end
375378
end
376379

0 commit comments

Comments
 (0)