PdfReader.php 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. <?php
  2. /**
  3. * This file is part of FPDI
  4. *
  5. * @package Fpdi
  6. * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
  7. * @license http://opensource.org/licenses/mit-license The MIT License
  8. */
  9. namespace Fpdi\PdfReader;
  10. use Fpdi\PdfParser\CrossReference\CrossReferenceException;
  11. use Fpdi\PdfParser\PdfParser;
  12. use Fpdi\PdfParser\PdfParserException;
  13. use Fpdi\PdfParser\Type\PdfArray;
  14. use Fpdi\PdfParser\Type\PdfDictionary;
  15. use Fpdi\PdfParser\Type\PdfIndirectObject;
  16. use Fpdi\PdfParser\Type\PdfIndirectObjectReference;
  17. use Fpdi\PdfParser\Type\PdfNumeric;
  18. use Fpdi\PdfParser\Type\PdfType;
  19. use Fpdi\PdfParser\Type\PdfTypeException;
  20. /**
  21. * A PDF reader class
  22. */
  23. class PdfReader
  24. {
  25. /**
  26. * @var PdfParser
  27. */
  28. protected $parser;
  29. /**
  30. * @var int
  31. */
  32. protected $pageCount;
  33. /**
  34. * Indirect objects of resolved pages.
  35. *
  36. * @var PdfIndirectObjectReference[]|PdfIndirectObject[]
  37. */
  38. protected $pages = [];
  39. /**
  40. * PdfReader constructor.
  41. *
  42. * @param PdfParser $parser
  43. */
  44. public function __construct(PdfParser $parser)
  45. {
  46. $this->parser = $parser;
  47. }
  48. /**
  49. * PdfReader destructor.
  50. */
  51. public function __destruct()
  52. {
  53. if ($this->parser !== null) {
  54. $this->parser->cleanUp();
  55. }
  56. }
  57. /**
  58. * Get the pdf parser instance.
  59. *
  60. * @return PdfParser
  61. */
  62. public function getParser()
  63. {
  64. return $this->parser;
  65. }
  66. /**
  67. * Get the PDF version.
  68. *
  69. * @return string
  70. * @throws PdfParserException
  71. */
  72. public function getPdfVersion()
  73. {
  74. return \implode('.', $this->parser->getPdfVersion());
  75. }
  76. /**
  77. * Get the page count.
  78. *
  79. * @return int
  80. * @throws PdfTypeException
  81. * @throws CrossReferenceException
  82. * @throws PdfParserException
  83. */
  84. public function getPageCount()
  85. {
  86. if ($this->pageCount === null) {
  87. $catalog = $this->parser->getCatalog();
  88. $pages = PdfType::resolve(PdfDictionary::get($catalog, 'Pages'), $this->parser);
  89. $count = PdfType::resolve(PdfDictionary::get($pages, 'Count'), $this->parser);
  90. $this->pageCount = PdfNumeric::ensure($count)->value;
  91. }
  92. return $this->pageCount;
  93. }
  94. /**
  95. * Get a page instance.
  96. *
  97. * @param int $pageNumber
  98. * @return Page
  99. * @throws PdfTypeException
  100. * @throws CrossReferenceException
  101. * @throws PdfParserException
  102. * @throws \InvalidArgumentException
  103. */
  104. public function getPage($pageNumber)
  105. {
  106. if (!\is_numeric($pageNumber)) {
  107. throw new \InvalidArgumentException(
  108. 'Page number needs to be a number.'
  109. );
  110. }
  111. if ($pageNumber < 1 || $pageNumber > $this->getPageCount()) {
  112. throw new \InvalidArgumentException(
  113. \sprintf(
  114. 'Page number "%s" out of available page range (1 - %s)',
  115. $pageNumber,
  116. $this->getPageCount()
  117. )
  118. );
  119. }
  120. $this->readPages();
  121. $page = $this->pages[$pageNumber - 1];
  122. if ($page instanceof PdfIndirectObjectReference) {
  123. $readPages = function ($kids) use (&$readPages) {
  124. $kids = PdfArray::ensure($kids);
  125. /** @noinspection LoopWhichDoesNotLoopInspection */
  126. foreach ($kids->value as $reference) {
  127. $reference = PdfIndirectObjectReference::ensure($reference);
  128. $object = $this->parser->getIndirectObject($reference->value);
  129. $type = PdfDictionary::get($object->value, 'Type');
  130. if ($type->value === 'Pages') {
  131. return $readPages(PdfDictionary::get($object->value, 'Kids'));
  132. }
  133. return $object;
  134. }
  135. throw new PdfReaderException(
  136. 'Kids array cannot be empty.',
  137. PdfReaderException::KIDS_EMPTY
  138. );
  139. };
  140. $page = $this->parser->getIndirectObject($page->value);
  141. $dict = PdfType::resolve($page, $this->parser);
  142. $type = PdfDictionary::get($dict, 'Type');
  143. if ($type->value === 'Pages') {
  144. $kids = PdfType::resolve(PdfDictionary::get($dict, 'Kids'), $this->parser);
  145. try {
  146. $page = $this->pages[$pageNumber - 1] = $readPages($kids);
  147. } catch (PdfReaderException $e) {
  148. if ($e->getCode() !== PdfReaderException::KIDS_EMPTY) {
  149. throw $e;
  150. }
  151. // let's reset the pages array and read all page objects
  152. $this->pages = [];
  153. $this->readPages(true);
  154. // @phpstan-ignore-next-line
  155. $page = $this->pages[$pageNumber - 1];
  156. }
  157. } else {
  158. $this->pages[$pageNumber - 1] = $page;
  159. }
  160. }
  161. return new Page($page, $this->parser);
  162. }
  163. /**
  164. * Walk the page tree and resolve all indirect objects of all pages.
  165. *
  166. * @param bool $readAll
  167. * @throws CrossReferenceException
  168. * @throws PdfParserException
  169. * @throws PdfTypeException
  170. */
  171. protected function readPages($readAll = false)
  172. {
  173. if (\count($this->pages) > 0) {
  174. return;
  175. }
  176. $readPages = function ($kids, $count) use (&$readPages, $readAll) {
  177. $kids = PdfArray::ensure($kids);
  178. $isLeaf = ($count->value === \count($kids->value));
  179. foreach ($kids->value as $reference) {
  180. $reference = PdfIndirectObjectReference::ensure($reference);
  181. if (!$readAll && $isLeaf) {
  182. $this->pages[] = $reference;
  183. continue;
  184. }
  185. $object = $this->parser->getIndirectObject($reference->value);
  186. $type = PdfDictionary::get($object->value, 'Type');
  187. if ($type->value === 'Pages') {
  188. $readPages(PdfDictionary::get($object->value, 'Kids'), PdfDictionary::get($object->value, 'Count'));
  189. } else {
  190. $this->pages[] = $object;
  191. }
  192. }
  193. };
  194. $catalog = $this->parser->getCatalog();
  195. $pages = PdfType::resolve(PdfDictionary::get($catalog, 'Pages'), $this->parser);
  196. $count = PdfType::resolve(PdfDictionary::get($pages, 'Count'), $this->parser);
  197. $kids = PdfType::resolve(PdfDictionary::get($pages, 'Kids'), $this->parser);
  198. $readPages($kids, $count);
  199. }
  200. }