FixedReader.php 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. <?php
  2. /**
  3. * This file is part of FPDI
  4. *
  5. * @package Fpdi
  6. * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
  7. * @license http://opensource.org/licenses/mit-license The MIT License
  8. */
  9. namespace Fpdi\PdfParser\CrossReference;
  10. use Fpdi\PdfParser\PdfParser;
  11. use Fpdi\PdfParser\StreamReader;
  12. /**
  13. * Class FixedReader
  14. *
  15. * This reader allows a very less overhead parsing of single entries of the cross-reference, because the main entries
  16. * are only read when needed and not in a single run.
  17. */
  18. class FixedReader extends AbstractReader implements ReaderInterface
  19. {
  20. /**
  21. * @var StreamReader
  22. */
  23. protected $reader;
  24. /**
  25. * Data of subsections.
  26. *
  27. * @var array
  28. */
  29. protected $subSections;
  30. /**
  31. * FixedReader constructor.
  32. *
  33. * @param PdfParser $parser
  34. * @throws CrossReferenceException
  35. */
  36. public function __construct(PdfParser $parser)
  37. {
  38. $this->reader = $parser->getStreamReader();
  39. $this->read();
  40. parent::__construct($parser);
  41. }
  42. /**
  43. * Get all subsection data.
  44. *
  45. * @return array
  46. */
  47. public function getSubSections()
  48. {
  49. return $this->subSections;
  50. }
  51. /**
  52. * @inheritdoc
  53. */
  54. public function getOffsetFor($objectNumber)
  55. {
  56. foreach ($this->subSections as $offset => list($startObject, $objectCount)) {
  57. /**
  58. * @var int $startObject
  59. * @var int $objectCount
  60. */
  61. if ($objectNumber >= $startObject && $objectNumber < ($startObject + $objectCount)) {
  62. $position = $offset + 20 * ($objectNumber - $startObject);
  63. $this->reader->ensure($position, 20);
  64. $line = $this->reader->readBytes(20);
  65. if ($line[17] === 'f') {
  66. return false;
  67. }
  68. return (int) \substr($line, 0, 10);
  69. }
  70. }
  71. return false;
  72. }
  73. /**
  74. * Read the cross-reference.
  75. *
  76. * This reader will only read the subsections in this method. The offsets were resolved individually by this
  77. * information.
  78. *
  79. * @throws CrossReferenceException
  80. */
  81. protected function read()
  82. {
  83. $subSections = [];
  84. $startObject = $entryCount = $lastLineStart = null;
  85. $validityChecked = false;
  86. while (($line = $this->reader->readLine(20)) !== false) {
  87. if (\strpos($line, 'trailer') !== false) {
  88. $this->reader->reset($lastLineStart);
  89. break;
  90. }
  91. // jump over if line content doesn't match the expected string
  92. if (\sscanf($line, '%d %d', $startObject, $entryCount) !== 2) {
  93. continue;
  94. }
  95. $oldPosition = $this->reader->getPosition();
  96. $position = $oldPosition + $this->reader->getOffset();
  97. if (!$validityChecked && $entryCount > 0) {
  98. $nextLine = $this->reader->readBytes(21);
  99. /* Check the next line for maximum of 20 bytes and not longer
  100. * By catching 21 bytes and trimming the length should be still 21.
  101. */
  102. if (\strlen(\trim($nextLine)) !== 21) {
  103. throw new CrossReferenceException(
  104. 'Cross-reference entries are larger than 20 bytes.',
  105. CrossReferenceException::ENTRIES_TOO_LARGE
  106. );
  107. }
  108. /* Check for less than 20 bytes: cut the line to 20 bytes and trim; have to result in exactly 18 bytes.
  109. * If it would have less bytes the substring would get the first bytes of the next line which would
  110. * evaluate to a 20 bytes long string after trimming.
  111. */
  112. if (\strlen(\trim(\substr($nextLine, 0, 20))) !== 18) {
  113. throw new CrossReferenceException(
  114. 'Cross-reference entries are less than 20 bytes.',
  115. CrossReferenceException::ENTRIES_TOO_SHORT
  116. );
  117. }
  118. $validityChecked = true;
  119. }
  120. $subSections[$position] = [$startObject, $entryCount];
  121. $lastLineStart = $position + $entryCount * 20;
  122. $this->reader->reset($lastLineStart);
  123. }
  124. // reset after the last correct parsed line
  125. $this->reader->reset($lastLineStart);
  126. if (\count($subSections) === 0) {
  127. throw new CrossReferenceException(
  128. 'No entries found in cross-reference.',
  129. CrossReferenceException::NO_ENTRIES
  130. );
  131. }
  132. $this->subSections = $subSections;
  133. }
  134. /**
  135. * Fixes an invalid object number shift.
  136. *
  137. * This method can be used to repair documents with an invalid subsection header:
  138. *
  139. * <code>
  140. * xref
  141. * 1 7
  142. * 0000000000 65535 f
  143. * 0000000009 00000 n
  144. * 0000412075 00000 n
  145. * 0000412172 00000 n
  146. * 0000412359 00000 n
  147. * 0000412417 00000 n
  148. * 0000412468 00000 n
  149. * </code>
  150. *
  151. * It shall only be called on the first table.
  152. *
  153. * @return bool
  154. */
  155. public function fixFaultySubSectionShift()
  156. {
  157. $subSections = $this->getSubSections();
  158. if (\count($subSections) > 1) {
  159. return false;
  160. }
  161. $subSection = \current($subSections);
  162. if ($subSection[0] != 1) {
  163. return false;
  164. }
  165. if ($this->getOffsetFor(1) === false) {
  166. foreach ($subSections as $offset => list($startObject, $objectCount)) {
  167. $this->subSections[$offset] = [$startObject - 1, $objectCount];
  168. }
  169. return true;
  170. }
  171. return false;
  172. }
  173. }