Skip to content

Commit 8245356

Browse files
authored
Merge pull request #23393 from JuliaLang/rf/titlecase
titlecase: chars not starting a word can be converted to lowercase
2 parents df91458 + f94ab0a commit 8245356

File tree

4 files changed

+50
-10
lines changed

4 files changed

+50
-10
lines changed

NEWS.md

+10
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,15 @@ This section lists changes that do not have deprecation warnings.
365365
* `findn(x::AbstractVector)` now return a 1-tuple with the vector of indices, to be
366366
consistent with higher order arrays ([#25365]).
367367

368+
* the default behavior of `titlecase` is changed in two ways ([#23393]):
369+
+ characters not starting a word are converted to lowercase;
370+
a new keyword argument `strict` is added which
371+
allows to get the old behavior when it's `false`.
372+
+ any non-letter character is considered as a word separator;
373+
to get the old behavior (only "space" characters are considered as
374+
word separators), use the keyword `wordsep=isspace`.
375+
376+
368377
Library improvements
369378
--------------------
370379

@@ -918,6 +927,7 @@ Deprecated or removed
918927

919928
* `findin(a, b)` has been deprecated in favor of `find(occursin(b), a)` ([#24673]).
920929

930+
921931
Command-line option changes
922932
---------------------------
923933

base/strings/unicode.jl

+30-6
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,19 @@ function isupper(c::Char)
384384
cat == UTF8PROC_CATEGORY_LU || cat == UTF8PROC_CATEGORY_LT
385385
end
386386

387+
"""
388+
iscased(c::Char) -> Bool
389+
390+
Tests whether a character is cased, i.e. is lower-, upper- or title-cased.
391+
"""
392+
function iscased(c::Char)
393+
cat = category_code(c)
394+
return cat == UTF8PROC_CATEGORY_LU ||
395+
cat == UTF8PROC_CATEGORY_LT ||
396+
cat == UTF8PROC_CATEGORY_LL
397+
end
398+
399+
387400
"""
388401
isdigit(c::Char) -> Bool
389402
@@ -649,27 +662,38 @@ julia> lowercase("STRINGS AND THINGS")
649662
lowercase(s::AbstractString) = map(lowercase, s)
650663

651664
"""
652-
titlecase(s::AbstractString) -> String
665+
titlecase(s::AbstractString; [wordsep::Function], strict::Bool=true) -> String
653666
654-
Capitalize the first character of each word in `s`.
667+
Capitalize the first character of each word in `s`;
668+
if `strict` is true, every other character is
669+
converted to lowercase, otherwise they are left unchanged.
670+
By default, all non-letters are considered as word separators;
671+
a predicate can be passed as the `wordsep` keyword to determine
672+
which characters should be considered as word separators.
655673
See also [`ucfirst`](@ref) to capitalize only the first
656674
character in `s`.
657675
658676
# Examples
659677
```jldoctest
660-
julia> titlecase("the Julia programming language")
678+
julia> titlecase("the JULIA programming language")
661679
"The Julia Programming Language"
680+
681+
julia> titlecase("ISS - international space station", strict=false)
682+
"ISS - International Space Station"
683+
684+
julia> titlecase("a-a b-b", wordsep = c->c==' ')
685+
"A-a B-b"
662686
```
663687
"""
664-
function titlecase(s::AbstractString)
688+
function titlecase(s::AbstractString; wordsep::Function = !iscased, strict::Bool=true)
665689
startword = true
666690
b = IOBuffer()
667691
for c in s
668-
if isspace(c)
692+
if wordsep(c)
669693
print(b, c)
670694
startword = true
671695
else
672-
print(b, startword ? titlecase(c) : c)
696+
print(b, startword ? titlecase(c) : strict ? lowercase(c) : c)
673697
startword = false
674698
end
675699
end

stdlib/Unicode/src/Unicode.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ module Unicode
77
using Base.Unicode: normalize, graphemes, isassigned, textwidth, isvalid,
88
islower, isupper, isalpha, isdigit, isxdigit, isnumeric, isalnum,
99
iscntrl, ispunct, isspace, isprint, isgraph,
10-
lowercase, uppercase, titlecase, lcfirst, ucfirst
10+
lowercase, uppercase, titlecase, lcfirst, ucfirst, iscased
1111

1212
export graphemes, textwidth, isvalid,
1313
islower, isupper, isalpha, isdigit, isxdigit, isnumeric, isalnum,

stdlib/Unicode/test/runtests.jl

+9-3
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
using Test
44
using Unicode
5-
using Unicode: normalize, isassigned
5+
using Unicode: normalize, isassigned, iscased
66

77
@testset "string normalization" begin
88
# normalize (Unicode normalization etc.):
@@ -366,8 +366,14 @@ end
366366
@testset "titlecase" begin
367367
@test titlecase('lj') == 'Lj'
368368
@test titlecase("ljubljana") == "Ljubljana"
369-
@test titlecase("aBc ABC") == "ABc ABC"
370-
@test titlecase("abcD EFG\n\thij") == "AbcD EFG\n\tHij"
369+
@test titlecase("aBc ABC") == "Abc Abc"
370+
@test titlecase("aBc ABC", strict=true) == "Abc Abc"
371+
@test titlecase("aBc ABC", strict=false) == "ABc ABC"
372+
@test titlecase("abcD EFG\n\thij", strict=true) == "Abcd Efg\n\tHij"
373+
@test titlecase("abcD EFG\n\thij", strict=false) == "AbcD EFG\n\tHij"
374+
@test titlecase("abc-def") == "Abc-Def"
375+
@test titlecase("abc-def", wordsep = !iscased) == "Abc-Def"
376+
@test titlecase("abc-def", wordsep = isspace) == "Abc-def"
371377
end
372378
end
373379

0 commit comments

Comments
 (0)