You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

364 lines
11 KiB

3 years ago
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\Translation;
  11. use Symfony\Contracts\Translation\TranslatorInterface;
  12. /**
  13. * This translator should only be used in a development environment.
  14. */
  15. final class PseudoLocalizationTranslator implements TranslatorInterface
  16. {
  17. private const EXPANSION_CHARACTER = '~';
  18. private $translator;
  19. private $accents;
  20. private $expansionFactor;
  21. private $brackets;
  22. private $parseHTML;
  23. private $localizableHTMLAttributes;
  24. /**
  25. * Available options:
  26. * * accents:
  27. * type: boolean
  28. * default: true
  29. * description: replace ASCII characters of the translated string with accented versions or similar characters
  30. * example: if true, "foo" => "ƒöö".
  31. *
  32. * * expansion_factor:
  33. * type: float
  34. * default: 1
  35. * validation: it must be greater than or equal to 1
  36. * description: expand the translated string by the given factor with spaces and tildes
  37. * example: if 2, "foo" => "~foo ~"
  38. *
  39. * * brackets:
  40. * type: boolean
  41. * default: true
  42. * description: wrap the translated string with brackets
  43. * example: if true, "foo" => "[foo]"
  44. *
  45. * * parse_html:
  46. * type: boolean
  47. * default: false
  48. * description: parse the translated string as HTML - looking for HTML tags has a performance impact but allows to preserve them from alterations - it also allows to compute the visible translated string length which is useful to correctly expand ot when it contains HTML
  49. * warning: unclosed tags are unsupported, they will be fixed (closed) by the parser - eg, "foo <div>bar" => "foo <div>bar</div>"
  50. *
  51. * * localizable_html_attributes:
  52. * type: string[]
  53. * default: []
  54. * description: the list of HTML attributes whose values can be altered - it is only useful when the "parse_html" option is set to true
  55. * example: if ["title"], and with the "accents" option set to true, "<a href="#" title="Go to your profile">Profile</a>" => "<a href="#" title="Ĝö ţö ýöûŕ þŕöƒîļé">Þŕöƒîļé</a>" - if "title" was not in the "localizable_html_attributes" list, the title attribute data would be left unchanged.
  56. */
  57. public function __construct(TranslatorInterface $translator, array $options = [])
  58. {
  59. $this->translator = $translator;
  60. $this->accents = $options['accents'] ?? true;
  61. if (1.0 > ($this->expansionFactor = $options['expansion_factor'] ?? 1.0)) {
  62. throw new \InvalidArgumentException('The expansion factor must be greater than or equal to 1.');
  63. }
  64. $this->brackets = $options['brackets'] ?? true;
  65. $this->parseHTML = $options['parse_html'] ?? false;
  66. if ($this->parseHTML && !$this->accents && 1.0 === $this->expansionFactor) {
  67. $this->parseHTML = false;
  68. }
  69. $this->localizableHTMLAttributes = $options['localizable_html_attributes'] ?? [];
  70. }
  71. /**
  72. * {@inheritdoc}
  73. */
  74. public function trans(string $id, array $parameters = [], string $domain = null, string $locale = null)
  75. {
  76. $trans = '';
  77. $visibleText = '';
  78. foreach ($this->getParts($this->translator->trans($id, $parameters, $domain, $locale)) as [$visible, $localizable, $text]) {
  79. if ($visible) {
  80. $visibleText .= $text;
  81. }
  82. if (!$localizable) {
  83. $trans .= $text;
  84. continue;
  85. }
  86. $this->addAccents($trans, $text);
  87. }
  88. $this->expand($trans, $visibleText);
  89. $this->addBrackets($trans);
  90. return $trans;
  91. }
  92. public function getLocale(): string
  93. {
  94. return $this->translator->getLocale();
  95. }
  96. private function getParts(string $originalTrans): array
  97. {
  98. if (!$this->parseHTML) {
  99. return [[true, true, $originalTrans]];
  100. }
  101. $html = mb_convert_encoding($originalTrans, 'HTML-ENTITIES', mb_detect_encoding($originalTrans, null, true) ?: 'UTF-8');
  102. $useInternalErrors = libxml_use_internal_errors(true);
  103. $dom = new \DOMDocument();
  104. $dom->loadHTML('<trans>'.$html.'</trans>');
  105. libxml_clear_errors();
  106. libxml_use_internal_errors($useInternalErrors);
  107. return $this->parseNode($dom->childNodes->item(1)->childNodes->item(0)->childNodes->item(0));
  108. }
  109. private function parseNode(\DOMNode $node): array
  110. {
  111. $parts = [];
  112. foreach ($node->childNodes as $childNode) {
  113. if (!$childNode instanceof \DOMElement) {
  114. $parts[] = [true, true, $childNode->nodeValue];
  115. continue;
  116. }
  117. $parts[] = [false, false, '<'.$childNode->tagName];
  118. /** @var \DOMAttr $attribute */
  119. foreach ($childNode->attributes as $attribute) {
  120. $parts[] = [false, false, ' '.$attribute->nodeName.'="'];
  121. $localizableAttribute = \in_array($attribute->nodeName, $this->localizableHTMLAttributes, true);
  122. foreach (preg_split('/(&(?:amp|quot|#039|lt|gt);+)/', htmlspecialchars($attribute->nodeValue, \ENT_QUOTES, 'UTF-8'), -1, \PREG_SPLIT_DELIM_CAPTURE) as $i => $match) {
  123. if ('' === $match) {
  124. continue;
  125. }
  126. $parts[] = [false, $localizableAttribute && 0 === $i % 2, $match];
  127. }
  128. $parts[] = [false, false, '"'];
  129. }
  130. $parts[] = [false, false, '>'];
  131. $parts = array_merge($parts, $this->parseNode($childNode, $parts));
  132. $parts[] = [false, false, '</'.$childNode->tagName.'>'];
  133. }
  134. return $parts;
  135. }
  136. private function addAccents(string &$trans, string $text): void
  137. {
  138. $trans .= $this->accents ? strtr($text, [
  139. ' ' => ' ',
  140. '!' => '¡',
  141. '"' => '″',
  142. '#' => '♯',
  143. '$' => '€',
  144. '%' => '‰',
  145. '&' => '⅋',
  146. '\'' => '´',
  147. '(' => '{',
  148. ')' => '}',
  149. '*' => '⁎',
  150. '+' => '⁺',
  151. ',' => '،',
  152. '-' => '‐',
  153. '.' => '·',
  154. '/' => '⁄',
  155. '0' => '⓪',
  156. '1' => '①',
  157. '2' => '②',
  158. '3' => '③',
  159. '4' => '④',
  160. '5' => '⑤',
  161. '6' => '⑥',
  162. '7' => '⑦',
  163. '8' => '⑧',
  164. '9' => '⑨',
  165. ':' => '∶',
  166. ';' => '⁏',
  167. '<' => '≤',
  168. '=' => '≂',
  169. '>' => '≥',
  170. '?' => '¿',
  171. '@' => '՞',
  172. 'A' => 'Å',
  173. 'B' => 'Ɓ',
  174. 'C' => 'Ç',
  175. 'D' => 'Ð',
  176. 'E' => 'É',
  177. 'F' => 'Ƒ',
  178. 'G' => 'Ĝ',
  179. 'H' => 'Ĥ',
  180. 'I' => 'Î',
  181. 'J' => 'Ĵ',
  182. 'K' => 'Ķ',
  183. 'L' => 'Ļ',
  184. 'M' => 'Ṁ',
  185. 'N' => 'Ñ',
  186. 'O' => 'Ö',
  187. 'P' => 'Þ',
  188. 'Q' => 'Ǫ',
  189. 'R' => 'Ŕ',
  190. 'S' => 'Š',
  191. 'T' => 'Ţ',
  192. 'U' => 'Û',
  193. 'V' => 'Ṽ',
  194. 'W' => 'Ŵ',
  195. 'X' => 'Ẋ',
  196. 'Y' => 'Ý',
  197. 'Z' => 'Ž',
  198. '[' => '⁅',
  199. '\\' => '∖',
  200. ']' => '⁆',
  201. '^' => '˄',
  202. '_' => '‿',
  203. '`' => '‵',
  204. 'a' => 'å',
  205. 'b' => 'ƀ',
  206. 'c' => 'ç',
  207. 'd' => 'ð',
  208. 'e' => 'é',
  209. 'f' => 'ƒ',
  210. 'g' => 'ĝ',
  211. 'h' => 'ĥ',
  212. 'i' => 'î',
  213. 'j' => 'ĵ',
  214. 'k' => 'ķ',
  215. 'l' => 'ļ',
  216. 'm' => 'ɱ',
  217. 'n' => 'ñ',
  218. 'o' => 'ö',
  219. 'p' => 'þ',
  220. 'q' => 'ǫ',
  221. 'r' => 'ŕ',
  222. 's' => 'š',
  223. 't' => 'ţ',
  224. 'u' => 'û',
  225. 'v' => 'ṽ',
  226. 'w' => 'ŵ',
  227. 'x' => 'ẋ',
  228. 'y' => 'ý',
  229. 'z' => 'ž',
  230. '{' => '(',
  231. '|' => '¦',
  232. '}' => ')',
  233. '~' => '˞',
  234. ]) : $text;
  235. }
  236. private function expand(string &$trans, string $visibleText): void
  237. {
  238. if (1.0 >= $this->expansionFactor) {
  239. return;
  240. }
  241. $visibleLength = $this->strlen($visibleText);
  242. $missingLength = (int) (ceil($visibleLength * $this->expansionFactor)) - $visibleLength;
  243. if ($this->brackets) {
  244. $missingLength -= 2;
  245. }
  246. if (0 >= $missingLength) {
  247. return;
  248. }
  249. $words = [];
  250. $wordsCount = 0;
  251. foreach (preg_split('/ +/', $visibleText, -1, \PREG_SPLIT_NO_EMPTY) as $word) {
  252. $wordLength = $this->strlen($word);
  253. if ($wordLength >= $missingLength) {
  254. continue;
  255. }
  256. if (!isset($words[$wordLength])) {
  257. $words[$wordLength] = 0;
  258. }
  259. ++$words[$wordLength];
  260. ++$wordsCount;
  261. }
  262. if (!$words) {
  263. $trans .= 1 === $missingLength ? self::EXPANSION_CHARACTER : ' '.str_repeat(self::EXPANSION_CHARACTER, $missingLength - 1);
  264. return;
  265. }
  266. arsort($words, \SORT_NUMERIC);
  267. $longestWordLength = max(array_keys($words));
  268. while (true) {
  269. $r = mt_rand(1, $wordsCount);
  270. foreach ($words as $length => $count) {
  271. $r -= $count;
  272. if ($r <= 0) {
  273. break;
  274. }
  275. }
  276. $trans .= ' '.str_repeat(self::EXPANSION_CHARACTER, $length);
  277. $missingLength -= $length + 1;
  278. if (0 === $missingLength) {
  279. return;
  280. }
  281. while ($longestWordLength >= $missingLength) {
  282. $wordsCount -= $words[$longestWordLength];
  283. unset($words[$longestWordLength]);
  284. if (!$words) {
  285. $trans .= 1 === $missingLength ? self::EXPANSION_CHARACTER : ' '.str_repeat(self::EXPANSION_CHARACTER, $missingLength - 1);
  286. return;
  287. }
  288. $longestWordLength = max(array_keys($words));
  289. }
  290. }
  291. }
  292. private function addBrackets(string &$trans): void
  293. {
  294. if (!$this->brackets) {
  295. return;
  296. }
  297. $trans = '['.$trans.']';
  298. }
  299. private function strlen(string $s): int
  300. {
  301. return false === ($encoding = mb_detect_encoding($s, null, true)) ? \strlen($s) : mb_strlen($s, $encoding);
  302. }
  303. }