You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

336 lines
8.9 KiB

3 years ago
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\Translation\Extractor;
  11. use Symfony\Component\Finder\Finder;
  12. use Symfony\Component\Translation\MessageCatalogue;
  13. /**
  14. * PhpExtractor extracts translation messages from a PHP template.
  15. *
  16. * @author Michel Salib <michelsalib@hotmail.com>
  17. */
  18. class PhpExtractor extends AbstractFileExtractor implements ExtractorInterface
  19. {
  20. public const MESSAGE_TOKEN = 300;
  21. public const METHOD_ARGUMENTS_TOKEN = 1000;
  22. public const DOMAIN_TOKEN = 1001;
  23. /**
  24. * Prefix for new found message.
  25. *
  26. * @var string
  27. */
  28. private $prefix = '';
  29. /**
  30. * The sequence that captures translation messages.
  31. *
  32. * @var array
  33. */
  34. protected $sequences = [
  35. [
  36. '->',
  37. 'trans',
  38. '(',
  39. self::MESSAGE_TOKEN,
  40. ',',
  41. self::METHOD_ARGUMENTS_TOKEN,
  42. ',',
  43. self::DOMAIN_TOKEN,
  44. ],
  45. [
  46. '->',
  47. 'trans',
  48. '(',
  49. self::MESSAGE_TOKEN,
  50. ],
  51. [
  52. 'new',
  53. 'TranslatableMessage',
  54. '(',
  55. self::MESSAGE_TOKEN,
  56. ',',
  57. self::METHOD_ARGUMENTS_TOKEN,
  58. ',',
  59. self::DOMAIN_TOKEN,
  60. ],
  61. [
  62. 'new',
  63. 'TranslatableMessage',
  64. '(',
  65. self::MESSAGE_TOKEN,
  66. ],
  67. [
  68. 'new',
  69. '\\',
  70. 'Symfony',
  71. '\\',
  72. 'Component',
  73. '\\',
  74. 'Translation',
  75. '\\',
  76. 'TranslatableMessage',
  77. '(',
  78. self::MESSAGE_TOKEN,
  79. ',',
  80. self::METHOD_ARGUMENTS_TOKEN,
  81. ',',
  82. self::DOMAIN_TOKEN,
  83. ],
  84. [
  85. 'new',
  86. '\Symfony\Component\Translation\TranslatableMessage',
  87. '(',
  88. self::MESSAGE_TOKEN,
  89. ',',
  90. self::METHOD_ARGUMENTS_TOKEN,
  91. ',',
  92. self::DOMAIN_TOKEN,
  93. ],
  94. [
  95. 'new',
  96. '\\',
  97. 'Symfony',
  98. '\\',
  99. 'Component',
  100. '\\',
  101. 'Translation',
  102. '\\',
  103. 'TranslatableMessage',
  104. '(',
  105. self::MESSAGE_TOKEN,
  106. ],
  107. [
  108. 'new',
  109. '\Symfony\Component\Translation\TranslatableMessage',
  110. '(',
  111. self::MESSAGE_TOKEN,
  112. ],
  113. [
  114. 't',
  115. '(',
  116. self::MESSAGE_TOKEN,
  117. ',',
  118. self::METHOD_ARGUMENTS_TOKEN,
  119. ',',
  120. self::DOMAIN_TOKEN,
  121. ],
  122. [
  123. 't',
  124. '(',
  125. self::MESSAGE_TOKEN,
  126. ],
  127. ];
  128. /**
  129. * {@inheritdoc}
  130. */
  131. public function extract($resource, MessageCatalogue $catalog)
  132. {
  133. $files = $this->extractFiles($resource);
  134. foreach ($files as $file) {
  135. $this->parseTokens(token_get_all(file_get_contents($file)), $catalog, $file);
  136. gc_mem_caches();
  137. }
  138. }
  139. /**
  140. * {@inheritdoc}
  141. */
  142. public function setPrefix(string $prefix)
  143. {
  144. $this->prefix = $prefix;
  145. }
  146. /**
  147. * Normalizes a token.
  148. *
  149. * @param mixed $token
  150. *
  151. * @return string|null
  152. */
  153. protected function normalizeToken($token)
  154. {
  155. if (isset($token[1]) && 'b"' !== $token) {
  156. return $token[1];
  157. }
  158. return $token;
  159. }
  160. /**
  161. * Seeks to a non-whitespace token.
  162. */
  163. private function seekToNextRelevantToken(\Iterator $tokenIterator)
  164. {
  165. for (; $tokenIterator->valid(); $tokenIterator->next()) {
  166. $t = $tokenIterator->current();
  167. if (\T_WHITESPACE !== $t[0]) {
  168. break;
  169. }
  170. }
  171. }
  172. private function skipMethodArgument(\Iterator $tokenIterator)
  173. {
  174. $openBraces = 0;
  175. for (; $tokenIterator->valid(); $tokenIterator->next()) {
  176. $t = $tokenIterator->current();
  177. if ('[' === $t[0] || '(' === $t[0]) {
  178. ++$openBraces;
  179. }
  180. if (']' === $t[0] || ')' === $t[0]) {
  181. --$openBraces;
  182. }
  183. if ((0 === $openBraces && ',' === $t[0]) || (-1 === $openBraces && ')' === $t[0])) {
  184. break;
  185. }
  186. }
  187. }
  188. /**
  189. * Extracts the message from the iterator while the tokens
  190. * match allowed message tokens.
  191. */
  192. private function getValue(\Iterator $tokenIterator)
  193. {
  194. $message = '';
  195. $docToken = '';
  196. $docPart = '';
  197. for (; $tokenIterator->valid(); $tokenIterator->next()) {
  198. $t = $tokenIterator->current();
  199. if ('.' === $t) {
  200. // Concatenate with next token
  201. continue;
  202. }
  203. if (!isset($t[1])) {
  204. break;
  205. }
  206. switch ($t[0]) {
  207. case \T_START_HEREDOC:
  208. $docToken = $t[1];
  209. break;
  210. case \T_ENCAPSED_AND_WHITESPACE:
  211. case \T_CONSTANT_ENCAPSED_STRING:
  212. if ('' === $docToken) {
  213. $message .= PhpStringTokenParser::parse($t[1]);
  214. } else {
  215. $docPart = $t[1];
  216. }
  217. break;
  218. case \T_END_HEREDOC:
  219. if ($indentation = strspn($t[1], ' ')) {
  220. $docPartWithLineBreaks = $docPart;
  221. $docPart = '';
  222. foreach (preg_split('~(\r\n|\n|\r)~', $docPartWithLineBreaks, -1, \PREG_SPLIT_DELIM_CAPTURE) as $str) {
  223. if (\in_array($str, ["\r\n", "\n", "\r"], true)) {
  224. $docPart .= $str;
  225. } else {
  226. $docPart .= substr($str, $indentation);
  227. }
  228. }
  229. }
  230. $message .= PhpStringTokenParser::parseDocString($docToken, $docPart);
  231. $docToken = '';
  232. $docPart = '';
  233. break;
  234. case \T_WHITESPACE:
  235. break;
  236. default:
  237. break 2;
  238. }
  239. }
  240. return $message;
  241. }
  242. /**
  243. * Extracts trans message from PHP tokens.
  244. */
  245. protected function parseTokens(array $tokens, MessageCatalogue $catalog, string $filename)
  246. {
  247. $tokenIterator = new \ArrayIterator($tokens);
  248. for ($key = 0; $key < $tokenIterator->count(); ++$key) {
  249. foreach ($this->sequences as $sequence) {
  250. $message = '';
  251. $domain = 'messages';
  252. $tokenIterator->seek($key);
  253. foreach ($sequence as $sequenceKey => $item) {
  254. $this->seekToNextRelevantToken($tokenIterator);
  255. if ($this->normalizeToken($tokenIterator->current()) === $item) {
  256. $tokenIterator->next();
  257. continue;
  258. } elseif (self::MESSAGE_TOKEN === $item) {
  259. $message = $this->getValue($tokenIterator);
  260. if (\count($sequence) === ($sequenceKey + 1)) {
  261. break;
  262. }
  263. } elseif (self::METHOD_ARGUMENTS_TOKEN === $item) {
  264. $this->skipMethodArgument($tokenIterator);
  265. } elseif (self::DOMAIN_TOKEN === $item) {
  266. $domainToken = $this->getValue($tokenIterator);
  267. if ('' !== $domainToken) {
  268. $domain = $domainToken;
  269. }
  270. break;
  271. } else {
  272. break;
  273. }
  274. }
  275. if ($message) {
  276. $catalog->set($message, $this->prefix.$message, $domain);
  277. $metadata = $catalog->getMetadata($message, $domain) ?? [];
  278. $normalizedFilename = preg_replace('{[\\\\/]+}', '/', $filename);
  279. $metadata['sources'][] = $normalizedFilename.':'.$tokens[$key][2];
  280. $catalog->setMetadata($message, $metadata, $domain);
  281. break;
  282. }
  283. }
  284. }
  285. }
  286. /**
  287. * @return bool
  288. *
  289. * @throws \InvalidArgumentException
  290. */
  291. protected function canBeExtracted(string $file)
  292. {
  293. return $this->isFile($file) && 'php' === pathinfo($file, \PATHINFO_EXTENSION);
  294. }
  295. /**
  296. * {@inheritdoc}
  297. */
  298. protected function extractFromDirectory($directory)
  299. {
  300. $finder = new Finder();
  301. return $finder->files()->name('*.php')->in($directory);
  302. }
  303. }