123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381 |
- <?php
- /**
- * This file is part of FPDI
- *
- * @package Fpdi
- * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
- * @license http://opensource.org/licenses/mit-license The MIT License
- */
- namespace Fpdi\PdfParser;
- use Fpdi\PdfParser\CrossReference\CrossReference;
- use Fpdi\PdfParser\CrossReference\CrossReferenceException;
- use Fpdi\PdfParser\Type\PdfArray;
- use Fpdi\PdfParser\Type\PdfBoolean;
- use Fpdi\PdfParser\Type\PdfDictionary;
- use Fpdi\PdfParser\Type\PdfHexString;
- use Fpdi\PdfParser\Type\PdfIndirectObject;
- use Fpdi\PdfParser\Type\PdfIndirectObjectReference;
- use Fpdi\PdfParser\Type\PdfName;
- use Fpdi\PdfParser\Type\PdfNull;
- use Fpdi\PdfParser\Type\PdfNumeric;
- use Fpdi\PdfParser\Type\PdfStream;
- use Fpdi\PdfParser\Type\PdfString;
- use Fpdi\PdfParser\Type\PdfToken;
- use Fpdi\PdfParser\Type\PdfType;
- /**
- * A PDF parser class
- */
- class PdfParser
- {
- /**
- * @var StreamReader
- */
- protected $streamReader;
- /**
- * @var Tokenizer
- */
- protected $tokenizer;
- /**
- * The file header.
- *
- * @var string
- */
- protected $fileHeader;
- /**
- * The offset to the file header.
- *
- * @var int
- */
- protected $fileHeaderOffset;
- /**
- * @var CrossReference|null
- */
- protected $xref;
- /**
- * All read objects.
- *
- * @var array
- */
- protected $objects = [];
- /**
- * PdfParser constructor.
- *
- * @param StreamReader $streamReader
- */
- public function __construct(StreamReader $streamReader)
- {
- $this->streamReader = $streamReader;
- $this->tokenizer = new Tokenizer($streamReader);
- }
- /**
- * Removes cycled references.
- *
- * @internal
- */
- public function cleanUp()
- {
- $this->xref = null;
- }
- /**
- * Get the stream reader instance.
- *
- * @return StreamReader
- */
- public function getStreamReader()
- {
- return $this->streamReader;
- }
- /**
- * Get the tokenizer instance.
- *
- * @return Tokenizer
- */
- public function getTokenizer()
- {
- return $this->tokenizer;
- }
- /**
- * Resolves the file header.
- *
- * @throws PdfParserException
- * @return int
- */
- protected function resolveFileHeader()
- {
- if ($this->fileHeader) {
- return $this->fileHeaderOffset;
- }
- $this->streamReader->reset(0);
- $maxIterations = 1000;
- while (true) {
- $buffer = $this->streamReader->getBuffer(false);
- $offset = \strpos($buffer, '%PDF-');
- if ($offset === false) {
- if (!$this->streamReader->increaseLength(100) || (--$maxIterations === 0)) {
- throw new PdfParserException(
- 'Unable to find PDF file header.',
- PdfParserException::FILE_HEADER_NOT_FOUND
- );
- }
- continue;
- }
- break;
- }
- $this->fileHeaderOffset = $offset;
- $this->streamReader->setOffset($offset);
- $this->fileHeader = \trim($this->streamReader->readLine());
- return $this->fileHeaderOffset;
- }
- /**
- * Get the cross reference instance.
- *
- * @return CrossReference
- * @throws CrossReferenceException
- * @throws PdfParserException
- */
- public function getCrossReference()
- {
- if ($this->xref === null) {
- $this->xref = new CrossReference($this, $this->resolveFileHeader());
- }
- return $this->xref;
- }
- /**
- * Get the PDF version.
- *
- * @return int[] An array of major and minor version.
- * @throws PdfParserException
- */
- public function getPdfVersion()
- {
- $this->resolveFileHeader();
- if (\preg_match('/%PDF-(\d)\.(\d)/', $this->fileHeader, $result) === 0) {
- throw new PdfParserException(
- 'Unable to extract PDF version from file header.',
- PdfParserException::PDF_VERSION_NOT_FOUND
- );
- }
- list(, $major, $minor) = $result;
- $catalog = $this->getCatalog();
- if (isset($catalog->value['Version'])) {
- $versionParts = \explode(
- '.',
- PdfName::unescape(PdfType::resolve($catalog->value['Version'], $this)->value)
- );
- if (count($versionParts) === 2) {
- list($major, $minor) = $versionParts;
- }
- }
- return [(int) $major, (int) $minor];
- }
- /**
- * Get the catalog dictionary.
- *
- * @return PdfDictionary
- * @throws Type\PdfTypeException
- * @throws CrossReferenceException
- * @throws PdfParserException
- */
- public function getCatalog()
- {
- $trailer = $this->getCrossReference()->getTrailer();
- $catalog = PdfType::resolve(PdfDictionary::get($trailer, 'Root'), $this);
- return PdfDictionary::ensure($catalog);
- }
- /**
- * Get an indirect object by its object number.
- *
- * @param int $objectNumber
- * @param bool $cache
- * @return PdfIndirectObject
- * @throws CrossReferenceException
- * @throws PdfParserException
- */
- public function getIndirectObject($objectNumber, $cache = false)
- {
- $objectNumber = (int) $objectNumber;
- if (isset($this->objects[$objectNumber])) {
- return $this->objects[$objectNumber];
- }
- $object = $this->getCrossReference()->getIndirectObject($objectNumber);
- if ($cache) {
- $this->objects[$objectNumber] = $object;
- }
- return $object;
- }
- /**
- * Read a PDF value.
- *
- * @param null|bool|string $token
- * @param null|string $expectedType
- * @return false|PdfArray|PdfBoolean|PdfDictionary|PdfHexString|PdfIndirectObject|PdfIndirectObjectReference|PdfName|PdfNull|PdfNumeric|PdfStream|PdfString|PdfToken
- * @throws Type\PdfTypeException
- */
- public function readValue($token = null, $expectedType = null)
- {
- if ($token === null) {
- $token = $this->tokenizer->getNextToken();
- }
- if ($token === false) {
- if ($expectedType !== null) {
- throw new Type\PdfTypeException('Got unexpected token type.', Type\PdfTypeException::INVALID_DATA_TYPE);
- }
- return false;
- }
- switch ($token) {
- case '(':
- $this->ensureExpectedType($token, $expectedType);
- return PdfString::parse($this->streamReader);
- case '<':
- if ($this->streamReader->getByte() === '<') {
- $this->ensureExpectedType('<<', $expectedType);
- $this->streamReader->addOffset(1);
- return PdfDictionary::parse($this->tokenizer, $this->streamReader, $this);
- }
- $this->ensureExpectedType($token, $expectedType);
- return PdfHexString::parse($this->streamReader);
- case '/':
- $this->ensureExpectedType($token, $expectedType);
- return PdfName::parse($this->tokenizer, $this->streamReader);
- case '[':
- $this->ensureExpectedType($token, $expectedType);
- return PdfArray::parse($this->tokenizer, $this);
- default:
- if (\is_numeric($token)) {
- if (($token2 = $this->tokenizer->getNextToken()) !== false) {
- if (\is_numeric($token2) && ($token3 = $this->tokenizer->getNextToken()) !== false) {
- switch ($token3) {
- case 'obj':
- if ($expectedType !== null && $expectedType !== PdfIndirectObject::class) {
- throw new Type\PdfTypeException(
- 'Got unexpected token type.',
- Type\PdfTypeException::INVALID_DATA_TYPE
- );
- }
- return PdfIndirectObject::parse(
- (int) $token,
- (int) $token2,
- $this,
- $this->tokenizer,
- $this->streamReader
- );
- case 'R':
- if (
- $expectedType !== null &&
- $expectedType !== PdfIndirectObjectReference::class
- ) {
- throw new Type\PdfTypeException(
- 'Got unexpected token type.',
- Type\PdfTypeException::INVALID_DATA_TYPE
- );
- }
- return PdfIndirectObjectReference::create((int) $token, (int) $token2);
- }
- $this->tokenizer->pushStack($token3);
- }
- $this->tokenizer->pushStack($token2);
- }
- if ($expectedType !== null && $expectedType !== PdfNumeric::class) {
- throw new Type\PdfTypeException(
- 'Got unexpected token type.',
- Type\PdfTypeException::INVALID_DATA_TYPE
- );
- }
- return PdfNumeric::create($token + 0);
- }
- if ($token === 'true' || $token === 'false') {
- $this->ensureExpectedType($token, $expectedType);
- return PdfBoolean::create($token === 'true');
- }
- if ($token === 'null') {
- $this->ensureExpectedType($token, $expectedType);
- return new PdfNull();
- }
- if ($expectedType !== null && $expectedType !== PdfToken::class) {
- throw new Type\PdfTypeException(
- 'Got unexpected token type.',
- Type\PdfTypeException::INVALID_DATA_TYPE
- );
- }
- $v = new PdfToken();
- $v->value = $token;
- return $v;
- }
- }
- /**
- * Ensures that the token will evaluate to an expected object type (or not).
- *
- * @param string $token
- * @param string|null $expectedType
- * @return bool
- * @throws Type\PdfTypeException
- */
- private function ensureExpectedType($token, $expectedType)
- {
- static $mapping = [
- '(' => PdfString::class,
- '<' => PdfHexString::class,
- '<<' => PdfDictionary::class,
- '/' => PdfName::class,
- '[' => PdfArray::class,
- 'true' => PdfBoolean::class,
- 'false' => PdfBoolean::class,
- 'null' => PdfNull::class
- ];
- if ($expectedType === null || $mapping[$token] === $expectedType) {
- return true;
- }
- throw new Type\PdfTypeException('Got unexpected token type.', Type\PdfTypeException::INVALID_DATA_TYPE);
- }
- }
|