Skip to content

Commit 9a06078

Browse files
committed
✨ multilang support
1 parent 92d5d29 commit 9a06078

File tree

4 files changed

+195
-28
lines changed

4 files changed

+195
-28
lines changed

README.md

+3-4
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,17 @@ import "stefanitsky.me/gachinator"
1818
func main() {
1919
// ...
2020
input := "some input message text"
21-
result := gachinator.Gachinate([]byte(input))
21+
result := gachinator.GachinateEN([]byte(input))
2222
fmt.Println(string(result))
2323
// ...
2424
}
2525
```
2626

2727
## Examples
2828

29-
### EN (no yet implemented)
29+
### EN
3030

31-
* `message` -> `mASSage`
31+
* `manufacturable` -> `manuFUCKturable`
3232
* `cool` -> `c♂♂l`
3333

3434
### RU
@@ -38,5 +38,4 @@ func main() {
3838

3939
## TODO
4040

41-
* Multilingual regexp (EN/RU etc.)
4241
* CLI auto ci build

cmd/gachinator/gachinator.go

+11-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package main
22

33
import (
44
"bufio"
5+
"flag"
56
"fmt"
67
"os"
78

@@ -14,11 +15,20 @@ var (
1415
)
1516

1617
func main() {
18+
lang := flag.String("lang", "ru", "select language config")
19+
flag.Parse()
20+
21+
lc, err := gachinator.FindLangConfig(*lang)
22+
if err != nil {
23+
fmt.Fprint(os.Stdout, err)
24+
os.Exit(1)
25+
}
26+
1727
sc := bufio.NewScanner(os.Stdin)
1828

1929
for sc.Scan() {
2030
currentLine = sc.Bytes()
21-
gachinatedLine = gachinator.Gachinate(currentLine)
31+
gachinatedLine = gachinator.Gachinate(currentLine, *lc)
2232
fmt.Fprint(os.Stdout, string(gachinatedLine))
2333
}
2434

gachinate.go

+77-20
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
package gachinator
33

44
import (
5+
"fmt"
56
"regexp"
67
)
78

@@ -12,22 +13,52 @@ type match struct {
1213
found bool
1314
}
1415

16+
type langConfig struct {
17+
re *regexp.Regexp
18+
replacers map[int][]byte
19+
}
20+
1521
var (
16-
re = regexp.MustCompile(`([эЭ]с)|(о)|([кК][ао]м)|([фФ]ак)|(т[её]мн(ый|ое|ая|о|ые|ых))|(гей)|(глубок(ий|ое|ая|о|и|ие|ого))|(доллар(ов|ы))`)
17-
replacers = map[int][]byte{
18-
0: []byte("ASS"),
19-
1: []byte("♂"),
20-
2: []byte("CUM"),
21-
3: []byte("FUCK"),
22-
4: []byte("DARK"),
23-
// 5: useless suffixes group
24-
6: []byte("GAY"),
25-
7: []byte("DEEP"),
26-
// 8: useless suffixes group
27-
9: []byte("BUCKS"),
22+
RussianConfig = langConfig{
23+
re: regexp.MustCompile(`([эЭ]с)|(о)|([кК][ао]м)|([фФ]ак)|(т[её]мн(ый|ое|ая|о|ые|ых))|(гей)|(глубок(ий|ое|ая|о|и|ие|ого))|(доллар(ов|ы))`),
24+
replacers: map[int][]byte{
25+
0: []byte("ASS"),
26+
1: []byte("♂"),
27+
2: []byte("CUM"),
28+
3: []byte("FUCK"),
29+
4: []byte("DARK"),
30+
// 5: useless suffixes group
31+
6: []byte("GAY"),
32+
7: []byte("DEEP"),
33+
// 8: useless suffixes group
34+
9: []byte("BUCKS"),
35+
},
36+
}
37+
EnglishConfig = langConfig{
38+
re: regexp.MustCompile(`([eE]ss)|(o)|([cC][ou]m(e))|([fF]ac)|([dD]ark)`),
39+
replacers: map[int][]byte{
40+
0: []byte("ASS"),
41+
1: []byte("♂"),
42+
2: []byte("CUM"),
43+
// 3: useless suffix group
44+
4: []byte("FUCK"),
45+
5: []byte("DARK"),
46+
},
47+
}
48+
langCodeToLangConfig = map[string]langConfig{
49+
"ru": RussianConfig,
50+
"en": EnglishConfig,
2851
}
2952
)
3053

54+
type LangConfigNotFoundError struct {
55+
lang string
56+
}
57+
58+
func (e *LangConfigNotFoundError) Error() string {
59+
return fmt.Sprintf("language config for \"%v\" is not found (available configs are: ru, en).", e.lang)
60+
}
61+
3162
var (
3263
offset int
3364
m match
@@ -42,16 +73,42 @@ var (
4273
found bool
4374
)
4475

45-
// Gachinates your input text and returns gachinated variant.
46-
func Gachinate(b []byte) []byte {
47-
allSubmatchIndexes := re.FindAllSubmatchIndex(b, -1)
76+
// Gachinates your input text with specified language config and returns gachinated variant.
77+
func Gachinate(b []byte, lc langConfig) []byte {
78+
return *gachinate(&b, lc)
79+
}
80+
81+
// Gachinate russian input
82+
func GachinateRU(b []byte) []byte {
83+
return *gachinate(&b, RussianConfig)
84+
}
85+
86+
// Gachinate english input
87+
func GachinateEN(b []byte) []byte {
88+
return *gachinate(&b, EnglishConfig)
89+
}
90+
91+
// Finds language config by lang code
92+
// Example:
93+
// lc, err := FindLangConfig("ru")
94+
func FindLangConfig(lang string) (*langConfig, error) {
95+
lc, found := langCodeToLangConfig[lang]
96+
if !found {
97+
return nil, &LangConfigNotFoundError{lang}
98+
}
99+
100+
return &lc, nil
101+
}
102+
103+
func gachinate(b *[]byte, lc langConfig) *[]byte {
104+
allSubmatchIndexes := lc.re.FindAllSubmatchIndex(*b, -1)
48105

49106
offset = 0
50107
for _, loc := range allSubmatchIndexes {
51-
m = findMatch(&loc, &b)
108+
m = findMatch(&loc, b, &lc)
52109
if m.found {
53-
orig = b[m.start:m.end]
54-
b = append(b[:m.start+offset], append(m.replacer, b[m.end+offset:]...)...)
110+
orig = (*b)[m.start:m.end]
111+
*b = append((*b)[:m.start+offset], append(m.replacer, (*b)[m.end+offset:]...)...)
55112
offset += len(m.replacer) - len(orig)
56113
}
57114
}
@@ -60,13 +117,13 @@ func Gachinate(b []byte) []byte {
60117
}
61118

62119
// Finds match by found regex submatch indexes and returns match struct.
63-
func findMatch(indexes *[]int, b *[]byte) (m match) {
120+
func findMatch(indexes *[]int, b *[]byte, lc *langConfig) (m match) {
64121
subIndex = 0
65122

66123
for i := 2; i < len(*indexes); i += 2 {
67124
start, end = (*indexes)[i], (*indexes)[i+1]
68125
if start != -1 && end != -1 {
69-
repl, found = replacers[subIndex]
126+
repl, found = lc.replacers[subIndex]
70127
if found {
71128
m.start = start
72129
m.end = end

gachinate_test.go

+104-3
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package gachinator
22

33
import (
44
"bufio"
5+
"bytes"
56
"io"
67
"os"
78
"testing"
@@ -12,7 +13,13 @@ type GachinateTestCase struct {
1213
expectedOutput string
1314
}
1415

15-
func TestGachinate(t *testing.T) {
16+
type GachinateSimpleTestCase struct {
17+
input string
18+
expectedOutput string
19+
langConfig langConfig
20+
}
21+
22+
func TestGachinateRu(t *testing.T) {
1623
cases := []GachinateTestCase{
1724
{
1825
input: "фактор",
@@ -77,14 +84,108 @@ func TestGachinate(t *testing.T) {
7784
}
7885

7986
for _, c := range cases {
80-
result := string(Gachinate([]byte(c.input)))
87+
result := string(GachinateRU([]byte(c.input)))
88+
89+
if result != c.expectedOutput {
90+
t.Errorf("\nExpected: %v\nGot:%v\nOriginal:%v\n", c.expectedOutput, result, c.input)
91+
}
92+
}
93+
}
94+
95+
func TestGachinateEn(t *testing.T) {
96+
cases := []GachinateTestCase{
97+
{
98+
input: "cool",
99+
expectedOutput: "c♂♂l",
100+
},
101+
{
102+
input: "manufacturable",
103+
expectedOutput: "manuFUCKturable",
104+
},
105+
{
106+
input: "message",
107+
expectedOutput: "mASSage",
108+
},
109+
{
110+
input: "come",
111+
expectedOutput: "CUM",
112+
},
113+
{
114+
input: "become",
115+
expectedOutput: "beCUM",
116+
},
117+
{
118+
input: "semidarkness",
119+
expectedOutput: "semiDARKnASS",
120+
},
121+
}
122+
123+
for _, c := range cases {
124+
result := string(GachinateEN([]byte(c.input)))
81125

82126
if result != c.expectedOutput {
83127
t.Errorf("\nExpected: %v\nGot:%v\nOriginal:%v\n", c.expectedOutput, result, c.input)
84128
}
85129
}
86130
}
87131

132+
// Just test that it executes with different configs (no need complex test duplication)
133+
func TestGachinate(t *testing.T) {
134+
cases := []GachinateSimpleTestCase{
135+
{
136+
input: "круто",
137+
expectedOutput: "крут♂",
138+
langConfig: RussianConfig,
139+
},
140+
{
141+
input: "cool",
142+
expectedOutput: "c♂♂l",
143+
langConfig: EnglishConfig,
144+
},
145+
}
146+
147+
for _, c := range cases {
148+
result := string(Gachinate([]byte(c.input), c.langConfig))
149+
150+
if result != c.expectedOutput {
151+
t.Errorf("\nExpected: %v\nGot: %v\nOriginal: %v\nConfig: %v", c.expectedOutput, result, c.input, c.langConfig)
152+
}
153+
}
154+
}
155+
156+
func TestFindLangConfig(t *testing.T) {
157+
for lang, expectedConfig := range langCodeToLangConfig {
158+
lc, err := FindLangConfig(lang)
159+
if err != nil {
160+
t.Errorf("Expected no error, but got %v", err)
161+
}
162+
163+
// reflect.DeepEqual will not work, that's why we need to test manually
164+
if lc.re != expectedConfig.re {
165+
t.Errorf("Regex missmatch, expected %v, got %v", expectedConfig.re, lc.re)
166+
}
167+
168+
foundReplacersLen, expectedReplacersLen := len(lc.replacers), len(expectedConfig.replacers)
169+
if foundReplacersLen != expectedReplacersLen {
170+
t.Errorf("Replacers missmatch, expected %v, got %v", lc.replacers, expectedConfig.replacers)
171+
}
172+
173+
for k, v := range lc.replacers {
174+
if v2, ok := expectedConfig.replacers[k]; !ok || !bytes.Equal(v, v2) {
175+
t.Errorf("Language configs are not match, expected %v, got %v", expectedConfig, lc)
176+
}
177+
}
178+
}
179+
}
180+
181+
func TestFindLangConfigNotFound(t *testing.T) {
182+
expectedErr := `language config for "fake" is not found (available configs are: ru, en).`
183+
lc, err := FindLangConfig("fake")
184+
if lc != nil || err.Error() != expectedErr {
185+
t.Errorf(`Expected error: "%v", got language config: %v and error: %v`, expectedErr, lc, err)
186+
}
187+
}
188+
88189
func BenchmarkGachinate(b *testing.B) {
89190
b.StopTimer()
90191
f, err := os.Open("testdata/benchmark.txt")
@@ -101,7 +202,7 @@ func BenchmarkGachinate(b *testing.B) {
101202

102203
b.StartTimer()
103204
for i := 0; i < b.N; i++ {
104-
Gachinate(input)
205+
GachinateRU(input)
105206
}
106207
b.StopTimer()
107208
}

0 commit comments

Comments
 (0)