Tokenizer.php 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. <?php
  2. /**
  3. * This file is part of FPDI
  4. *
  5. * @package Fpdi
  6. * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
  7. * @license http://opensource.org/licenses/mit-license The MIT License
  8. */
  9. namespace Fpdi\PdfParser;
  10. /**
  11. * A tokenizer class.
  12. */
  13. class Tokenizer
  14. {
  15. /**
  16. * @var StreamReader
  17. */
  18. protected $streamReader;
  19. /**
  20. * A token stack.
  21. *
  22. * @var string[]
  23. */
  24. protected $stack = [];
  25. /**
  26. * Tokenizer constructor.
  27. *
  28. * @param StreamReader $streamReader
  29. */
  30. public function __construct(StreamReader $streamReader)
  31. {
  32. $this->streamReader = $streamReader;
  33. }
  34. /**
  35. * Get the stream reader instance.
  36. *
  37. * @return StreamReader
  38. */
  39. public function getStreamReader()
  40. {
  41. return $this->streamReader;
  42. }
  43. /**
  44. * Clear the token stack.
  45. */
  46. public function clearStack()
  47. {
  48. $this->stack = [];
  49. }
  50. /**
  51. * Push a token onto the stack.
  52. *
  53. * @param string $token
  54. */
  55. public function pushStack($token)
  56. {
  57. $this->stack[] = $token;
  58. }
  59. /**
  60. * Get next token.
  61. *
  62. * @return bool|string
  63. */
  64. public function getNextToken()
  65. {
  66. $token = \array_pop($this->stack);
  67. if ($token !== null) {
  68. return $token;
  69. }
  70. if (($byte = $this->streamReader->readByte()) === false) {
  71. return false;
  72. }
  73. if (\in_array($byte, ["\x20", "\x0A", "\x0D", "\x0C", "\x09", "\x00"], true)) {
  74. if ($this->leapWhiteSpaces() === false) {
  75. return false;
  76. }
  77. $byte = $this->streamReader->readByte();
  78. }
  79. switch ($byte) {
  80. case '/':
  81. case '[':
  82. case ']':
  83. case '(':
  84. case ')':
  85. case '{':
  86. case '}':
  87. case '<':
  88. case '>':
  89. return $byte;
  90. case '%':
  91. $this->streamReader->readLine();
  92. return $this->getNextToken();
  93. }
  94. /* This way is faster than checking single bytes.
  95. */
  96. $bufferOffset = $this->streamReader->getOffset();
  97. do {
  98. $lastBuffer = $this->streamReader->getBuffer(false);
  99. $pos = \strcspn(
  100. $lastBuffer,
  101. "\x00\x09\x0A\x0C\x0D\x20()<>[]{}/%",
  102. $bufferOffset
  103. );
  104. } while (
  105. // Break the loop if a delimiter or white space char is matched
  106. // in the current buffer or increase the buffers length
  107. $lastBuffer !== false &&
  108. (
  109. $bufferOffset + $pos === \strlen($lastBuffer) &&
  110. $this->streamReader->increaseLength()
  111. )
  112. );
  113. $result = \substr($lastBuffer, $bufferOffset - 1, $pos + 1);
  114. $this->streamReader->setOffset($bufferOffset + $pos);
  115. return $result;
  116. }
  117. /**
  118. * Leap white spaces.
  119. *
  120. * @return boolean
  121. */
  122. public function leapWhiteSpaces()
  123. {
  124. do {
  125. if (!$this->streamReader->ensureContent()) {
  126. return false;
  127. }
  128. $buffer = $this->streamReader->getBuffer(false);
  129. $matches = \strspn($buffer, "\x20\x0A\x0C\x0D\x09\x00", $this->streamReader->getOffset());
  130. if ($matches > 0) {
  131. $this->streamReader->addOffset($matches);
  132. }
  133. } while ($this->streamReader->getOffset() >= $this->streamReader->getBufferLength());
  134. return true;
  135. }
  136. }