123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326 |
- <?php
- /**
- * This file is part of FPDI
- *
- * @package Fpdi
- * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
- * @license http://opensource.org/licenses/mit-license The MIT License
- */
- namespace Fpdi\PdfParser\Type;
- use Fpdi\PdfParser\CrossReference\CrossReferenceException;
- use Fpdi\PdfParser\Filter\Ascii85;
- use Fpdi\PdfParser\Filter\AsciiHex;
- use Fpdi\PdfParser\Filter\FilterException;
- use Fpdi\PdfParser\Filter\Flate;
- use Fpdi\PdfParser\Filter\Lzw;
- use Fpdi\PdfParser\PdfParser;
- use Fpdi\PdfParser\PdfParserException;
- use Fpdi\PdfParser\StreamReader;
- use FpdiPdfParser\PdfParser\Filter\Predictor;
- /**
- * Class representing a PDF stream object
- */
- class PdfStream extends PdfType
- {
- /**
- * Parses a stream from a stream reader.
- *
- * @param PdfDictionary $dictionary
- * @param StreamReader $reader
- * @param PdfParser $parser Optional to keep backwards compatibility
- * @return self
- * @throws PdfTypeException
- */
- public static function parse(PdfDictionary $dictionary, StreamReader $reader, PdfParser $parser = null)
- {
- $v = new self();
- $v->value = $dictionary;
- $v->reader = $reader;
- $v->parser = $parser;
- $offset = $reader->getOffset();
- // Find the first "newline"
- while (($firstByte = $reader->getByte($offset)) !== false) {
- if ($firstByte !== "\n" && $firstByte !== "\r") {
- $offset++;
- } else {
- break;
- }
- }
- if ($firstByte === false) {
- throw new PdfTypeException(
- 'Unable to parse stream data. No newline after the stream keyword found.',
- PdfTypeException::NO_NEWLINE_AFTER_STREAM_KEYWORD
- );
- }
- $sndByte = $reader->getByte($offset + 1);
- if ($firstByte === "\n" || $firstByte === "\r") {
- $offset++;
- }
- if ($sndByte === "\n" && $firstByte !== "\n") {
- $offset++;
- }
- $reader->setOffset($offset);
- // let's only save the byte-offset and read the stream only when needed
- $v->stream = $reader->getPosition() + $reader->getOffset();
- return $v;
- }
- /**
- * Helper method to create an instance.
- *
- * @param PdfDictionary $dictionary
- * @param string $stream
- * @return self
- */
- public static function create(PdfDictionary $dictionary, $stream)
- {
- $v = new self();
- $v->value = $dictionary;
- $v->stream = (string) $stream;
- return $v;
- }
- /**
- * Ensures that the passed value is a PdfStream instance.
- *
- * @param mixed $stream
- * @return self
- * @throws PdfTypeException
- */
- public static function ensure($stream)
- {
- return PdfType::ensureType(self::class, $stream, 'Stream value expected.');
- }
- /**
- * The stream or its byte-offset position.
- *
- * @var int|string
- */
- protected $stream;
- /**
- * The stream reader instance.
- *
- * @var StreamReader|null
- */
- protected $reader;
- /**
- * The PDF parser instance.
- *
- * @var PdfParser
- */
- protected $parser;
- /**
- * Get the stream data.
- *
- * @param bool $cache Whether cache the stream data or not.
- * @return bool|string
- * @throws PdfTypeException
- * @throws CrossReferenceException
- * @throws PdfParserException
- */
- public function getStream($cache = false)
- {
- if (\is_int($this->stream)) {
- $length = PdfDictionary::get($this->value, 'Length');
- if ($this->parser !== null) {
- $length = PdfType::resolve($length, $this->parser);
- }
- if (!($length instanceof PdfNumeric) || $length->value === 0) {
- $this->reader->reset($this->stream, 100000);
- $buffer = $this->extractStream();
- } else {
- $this->reader->reset($this->stream, $length->value);
- $buffer = $this->reader->getBuffer(false);
- if ($this->parser !== null) {
- $this->reader->reset($this->stream + strlen($buffer));
- $this->parser->getTokenizer()->clearStack();
- $token = $this->parser->readValue();
- if ($token === false || !($token instanceof PdfToken) || $token->value !== 'endstream') {
- $this->reader->reset($this->stream, 100000);
- $buffer = $this->extractStream();
- $this->reader->reset($this->stream + strlen($buffer));
- }
- }
- }
- if ($cache === false) {
- return $buffer;
- }
- $this->stream = $buffer;
- $this->reader = null;
- }
- return $this->stream;
- }
- /**
- * Extract the stream "manually".
- *
- * @return string
- * @throws PdfTypeException
- */
- protected function extractStream()
- {
- while (true) {
- $buffer = $this->reader->getBuffer(false);
- $length = \strpos($buffer, 'endstream');
- if ($length === false) {
- if (!$this->reader->increaseLength(100000)) {
- throw new PdfTypeException('Cannot extract stream.');
- }
- continue;
- }
- break;
- }
- $buffer = \substr($buffer, 0, $length);
- $lastByte = \substr($buffer, -1);
- /* Check for EOL marker =
- * CARRIAGE RETURN (\r) and a LINE FEED (\n) or just a LINE FEED (\n},
- * and not by a CARRIAGE RETURN (\r) alone
- */
- if ($lastByte === "\n") {
- $buffer = \substr($buffer, 0, -1);
- $lastByte = \substr($buffer, -1);
- if ($lastByte === "\r") {
- $buffer = \substr($buffer, 0, -1);
- }
- }
- // There are streams in the wild, which have only white signs in them but need to be parsed manually due
- // to a problem encountered before (e.g. Length === 0). We should set them to empty streams to avoid problems
- // in further processing (e.g. applying of filters).
- if (trim($buffer) === '') {
- $buffer = '';
- }
- return $buffer;
- }
- /**
- * Get the unfiltered stream data.
- *
- * @return string
- * @throws FilterException
- * @throws PdfParserException
- */
- public function getUnfilteredStream()
- {
- $stream = $this->getStream();
- $filters = PdfDictionary::get($this->value, 'Filter');
- if ($filters instanceof PdfNull) {
- return $stream;
- }
- if ($filters instanceof PdfArray) {
- $filters = $filters->value;
- } else {
- $filters = [$filters];
- }
- $decodeParams = PdfDictionary::get($this->value, 'DecodeParms');
- if ($decodeParams instanceof PdfArray) {
- $decodeParams = $decodeParams->value;
- } else {
- $decodeParams = [$decodeParams];
- }
- foreach ($filters as $key => $filter) {
- if (!($filter instanceof PdfName)) {
- continue;
- }
- $decodeParam = null;
- if (isset($decodeParams[$key])) {
- $decodeParam = ($decodeParams[$key] instanceof PdfDictionary ? $decodeParams[$key] : null);
- }
- switch ($filter->value) {
- case 'FlateDecode':
- case 'Fl':
- case 'LZWDecode':
- case 'LZW':
- if (\strpos($filter->value, 'LZW') === 0) {
- $filterObject = new Lzw();
- } else {
- $filterObject = new Flate();
- }
- $stream = $filterObject->decode($stream);
- if ($decodeParam instanceof PdfDictionary) {
- $predictor = PdfDictionary::get($decodeParam, 'Predictor', PdfNumeric::create(1));
- if ($predictor->value !== 1) {
- if (!\class_exists(Predictor::class)) {
- throw new PdfParserException(
- 'This PDF document makes use of features which are only implemented in the ' .
- 'commercial "FPDI PDF-Parser" add-on (see https://www.setasign.com/fpdi-pdf-' .
- 'parser).',
- PdfParserException::IMPLEMENTED_IN_FPDI_PDF_PARSER
- );
- }
- $colors = PdfDictionary::get($decodeParam, 'Colors', PdfNumeric::create(1));
- $bitsPerComponent = PdfDictionary::get(
- $decodeParam,
- 'BitsPerComponent',
- PdfNumeric::create(8)
- );
- $columns = PdfDictionary::get($decodeParam, 'Columns', PdfNumeric::create(1));
- $filterObject = new Predictor(
- $predictor->value,
- $colors->value,
- $bitsPerComponent->value,
- $columns->value
- );
- $stream = $filterObject->decode($stream);
- }
- }
- break;
- case 'ASCII85Decode':
- case 'A85':
- $filterObject = new Ascii85();
- $stream = $filterObject->decode($stream);
- break;
- case 'ASCIIHexDecode':
- case 'AHx':
- $filterObject = new AsciiHex();
- $stream = $filterObject->decode($stream);
- break;
- default:
- throw new FilterException(
- \sprintf('Unsupported filter "%s".', $filter->value),
- FilterException::UNSUPPORTED_FILTER
- );
- }
- }
- return $stream;
- }
- }
|