Skip to content

Commit 6af3cce

Browse files
committed
Generalize the realText algorithm
1 parent 2e48342 commit 6af3cce

File tree

2 files changed

+48
-93
lines changed

2 files changed

+48
-93
lines changed

src/Faker/Provider/Text.php

+37-8
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
abstract class Text extends \Faker\Provider\Base
66
{
77
protected static $baseText = '';
8+
protected static $separator = ' ';
9+
protected static $separatorLen = 1;
810
protected $explodedText = null;
911
protected $consecutiveWords = array();
1012

@@ -37,6 +39,7 @@ public function realText($maxNbChars = 200, $indexSize = 2)
3739
throw new \InvalidArgumentException('indexSize must be at most 5');
3840
}
3941

42+
4043
$words = $this->getConsecutiveWords($indexSize);
4144
$result = array();
4245
$resultLength = 0;
@@ -47,28 +50,28 @@ public function realText($maxNbChars = 200, $indexSize = 2)
4750
$word = static::randomElement($words[$next]);
4851

4952
// calculate next index
50-
$currentWords = explode(' ', $next);
53+
$currentWords = static::explode($next);
5154
$currentWords[] = $word;
5255
array_shift($currentWords);
53-
$next = implode(' ', $currentWords);
56+
$next = static::implode($currentWords);
5457

5558
// ensure text starts with an uppercase letter
56-
if ($resultLength == 0 && !preg_match('/^\p{Lu}/u', $word)) {
59+
if ($resultLength == 0 && !static::validStart($word)) {
5760
continue;
5861
}
5962

6063
// append the element
6164
$result[] = $word;
62-
$resultLength += strlen($word) + 1;
65+
$resultLength += static::strlen($word) + static::$separatorLen;
6366
}
6467

6568
// remove the element that caused the text to overflow
6669
array_pop($result);
6770

6871
// build result
69-
$result = implode(' ', $result);
72+
$result = static::implode($result);
7073

71-
return $result.'.';
74+
return static::appendEnd($result);
7275
}
7376

7477
protected function getConsecutiveWords($indexSize)
@@ -82,7 +85,7 @@ protected function getConsecutiveWords($indexSize)
8285
}
8386

8487
for ($i = 0, $count = count($parts); $i < $count; $i++) {
85-
$stringIndex = implode(' ', $index);
88+
$stringIndex = static::implode($index);
8689
if (!isset($words[$stringIndex])) {
8790
$words[$stringIndex] = array();
8891
}
@@ -101,9 +104,35 @@ protected function getConsecutiveWords($indexSize)
101104
protected function getExplodedText()
102105
{
103106
if ($this->explodedText === null) {
104-
$this->explodedText = explode(' ', preg_replace('/\s+/u', ' ', static::$baseText));
107+
$this->explodedText = static::explode(preg_replace('/\s+/u', ' ', static::$baseText));
105108
}
106109

107110
return $this->explodedText;
108111
}
112+
113+
protected static function explode($text)
114+
{
115+
return explode(static::$separator, $text);
116+
}
117+
118+
protected static function implode($words)
119+
{
120+
return implode(static::$separator, $words);
121+
}
122+
123+
protected static function strlen($text)
124+
{
125+
return strlen($text);
126+
}
127+
128+
protected static function validStart($word)
129+
{
130+
return preg_match('/^\p{Lu}/u', $word);
131+
}
132+
133+
protected static function appendEnd($text)
134+
{
135+
return $text.'.';
136+
}
137+
109138
}

src/Faker/Provider/zh_TW/Text.php

+11-85
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44

55
class Text extends \Faker\Provider\Text
66
{
7-
protected $explodedText = null;
8-
protected $consecutiveWords = array();
7+
protected static $separator = '';
8+
protected static $separatorLen = 0;
9+
protected static $punct = array('', '', '', '', '', '', '', '', '', '');
910

1011
/**
1112
* Title: 三國演義 Romance of the Three Kingdoms
@@ -77,98 +78,23 @@ class Text extends \Faker\Provider\Text
7778
三人飛馬引軍而出。張角正殺敗董卓,乘勢趕來,忽遇三人衝殺,角軍大亂,敗走五十餘里。三人救了董卓回寨。卓問三人現居何職。玄德曰:「白身。」卓甚輕之,不為禮。玄德出,張飛大怒曰:「我等親赴血戰,救了這廝,他卻如此無禮;若不殺之,難消我氣!」便要提刀入帳來殺董卓。正是:人情勢利古猶今,誰識英雄是白身?安得快人如翼德,盡誅世上負心人!畢竟董卓性命如何,且看下文分解。
7879
EOT;
7980

80-
public function realText($maxNbChars = 200, $indexSize = 2)
81+
protected static function explode($text)
8182
{
82-
if ($maxNbChars < 10) {
83-
throw new \InvalidArgumentException('maxNbChars must be at least 10');
84-
}
85-
if ($indexSize < 1) {
86-
throw new \InvalidArgumentException('indexSize must be at least 1');
87-
}
88-
if ($indexSize > 5) {
89-
throw new \InvalidArgumentException('indexSize must be at most 5');
90-
}
91-
92-
$words = $this->getConsecutiveWords($indexSize);
93-
$result = array();
94-
$resultLength = 0;
95-
// take a random starting point
96-
$punct = array('', '', '', '', '', '', '', '', '', '');
97-
$next = static::randomKey($words);
98-
while ($resultLength < $maxNbChars && isset($words[$next])) {
99-
// fetch a random word to append
100-
$word = static::randomElement($words[$next]);
101-
102-
// calculate next index
103-
$currentWords = static::split($next);
104-
$currentWords[] = $word;
105-
array_shift($currentWords);
106-
$next = implode('', $currentWords);
107-
108-
// ensure the first word is not punctuation
109-
if ($resultLength === 0 and in_array($word, $punct)) {
110-
continue;
111-
}
112-
113-
// append the element
114-
$result[] = $word;
115-
$resultLength += static::strlen($word);
116-
}
117-
118-
// remove the element that caused the text to overflow
119-
array_pop($result);
120-
121-
// build result
122-
$result = implode('', $result);
123-
124-
return $result.static::randomElement(array('', '', '',));
125-
}
126-
127-
protected function getConsecutiveWords($indexSize)
128-
{
129-
if (!isset($this->consecutiveWords[$indexSize])) {
130-
$parts = $this->getExplodedText();
131-
$words = array();
132-
$index = array();
133-
for ($i = 0; $i < $indexSize; $i++) {
134-
$index[] = array_shift($parts);
135-
}
136-
137-
for ($i = 0, $count = count($parts); $i < $count; $i++) {
138-
$stringIndex = implode('', $index);
139-
if (!isset($words[$stringIndex])) {
140-
$words[$stringIndex] = array();
141-
}
142-
$word = $parts[$i];
143-
$words[$stringIndex][] = $word;
144-
array_shift($index);
145-
$index[] = $word;
146-
}
147-
// cache look up words for performance
148-
$this->consecutiveWords[$indexSize] = $words;
149-
}
150-
151-
return $this->consecutiveWords[$indexSize];
83+
return array_values(array_filter(preg_split('//u', preg_replace('/\s+/', '', $text))));
15284
}
15385

154-
protected function getExplodedText()
86+
protected static function strlen($text)
15587
{
156-
if ($this->explodedText === null) {
157-
$this->explodedText = static::split(static::$baseText);
158-
}
159-
return $this->explodedText;
88+
return function_exists('mb_get_info') ? mb_strlen($text) : count(static::split($text));
16089
}
16190

162-
public static function split($text)
91+
protected static function validStart($word)
16392
{
164-
return array_values(array_filter(preg_split('//u', preg_replace('/\s+/', '', $text))));
93+
return !in_array($word, static::$punct);
16594
}
16695

167-
public static function strlen($text)
96+
protected static function appendEnd($text)
16897
{
169-
if (function_exists('mb_get_info')) {
170-
return mb_strlen($text);
171-
}
172-
return count(static::split($text));
98+
return $text.static::randomElement(array('', '', '',));
17399
}
174100
}

0 commit comments

Comments
 (0)