119 lines
4.8 KiB
Python
119 lines
4.8 KiB
Python
|
|
# All rights reserved.
|
||
|
|
#
|
||
|
|
# Redistribution and use in source and binary forms, with or without
|
||
|
|
# modification, are permitted provided that the following conditions are
|
||
|
|
# met:
|
||
|
|
#
|
||
|
|
# * Redistributions of source code must retain the above copyright notice,
|
||
|
|
# this list of conditions and the following disclaimer.
|
||
|
|
# * Redistributions in binary form must reproduce the above copyright notice,
|
||
|
|
# this list of conditions and the following disclaimer in the documentation
|
||
|
|
# and/or other materials provided with the distribution.
|
||
|
|
# * The name of the author may not be used to endorse or promote products
|
||
|
|
# derived from this software without specific prior written permission.
|
||
|
|
#
|
||
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||
|
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||
|
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||
|
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||
|
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||
|
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||
|
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||
|
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||
|
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||
|
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||
|
|
# POSSIBILITY OF SUCH DAMAGE.
|
||
|
|
|
||
|
|
|
||
|
|
# This module contains code used by _writer.py to track links in pages
|
||
|
|
# being added to the writer until the links can be resolved.
|
||
|
|
|
||
|
|
from typing import TYPE_CHECKING, Optional, Union, cast
|
||
|
|
|
||
|
|
from . import ArrayObject, DictionaryObject, IndirectObject, PdfObject, TextStringObject
|
||
|
|
|
||
|
|
if TYPE_CHECKING:
|
||
|
|
from .._page import PageObject
|
||
|
|
from .._reader import PdfReader
|
||
|
|
from .._writer import PdfWriter
|
||
|
|
|
||
|
|
|
||
|
|
class NamedReferenceLink:
|
||
|
|
"""Named reference link being preserved until we can resolve it correctly."""
|
||
|
|
|
||
|
|
def __init__(self, reference: TextStringObject, source_pdf: "PdfReader") -> None:
|
||
|
|
"""reference: TextStringObject with named reference"""
|
||
|
|
self._reference = reference
|
||
|
|
self._source_pdf = source_pdf
|
||
|
|
|
||
|
|
def find_referenced_page(self) -> Union[IndirectObject, None]:
|
||
|
|
destination = self._source_pdf.named_destinations.get(str(self._reference))
|
||
|
|
return destination.page if destination else None
|
||
|
|
|
||
|
|
def patch_reference(self, target_pdf: "PdfWriter", new_page: IndirectObject) -> None:
|
||
|
|
"""target_pdf: PdfWriter which the new link went into"""
|
||
|
|
# point named destination in new PDF to the new page
|
||
|
|
if str(self._reference) not in target_pdf.named_destinations:
|
||
|
|
target_pdf.add_named_destination(str(self._reference), new_page.page_number)
|
||
|
|
|
||
|
|
|
||
|
|
class DirectReferenceLink:
|
||
|
|
"""Direct reference link being preserved until we can resolve it correctly."""
|
||
|
|
|
||
|
|
def __init__(self, reference: ArrayObject) -> None:
|
||
|
|
"""reference: an ArrayObject whose first element is the Page indirect object"""
|
||
|
|
self._reference = reference
|
||
|
|
|
||
|
|
def find_referenced_page(self) -> IndirectObject:
|
||
|
|
return self._reference[0]
|
||
|
|
|
||
|
|
def patch_reference(self, target_pdf: "PdfWriter", new_page: IndirectObject) -> None:
|
||
|
|
"""target_pdf: PdfWriter which the new link went into"""
|
||
|
|
self._reference[0] = new_page
|
||
|
|
|
||
|
|
|
||
|
|
ReferenceLink = Union[NamedReferenceLink, DirectReferenceLink]
|
||
|
|
|
||
|
|
|
||
|
|
def extract_links(new_page: "PageObject", old_page: "PageObject") -> list[tuple[ReferenceLink, ReferenceLink]]:
|
||
|
|
"""Extracts links from two pages on the assumption that the two pages are
|
||
|
|
the same. Produces one list of (new link, old link) tuples.
|
||
|
|
"""
|
||
|
|
new_links = [_build_link(link, new_page) for link in new_page.get("/Annots", [])]
|
||
|
|
old_links = [_build_link(link, old_page) for link in old_page.get("/Annots", [])]
|
||
|
|
|
||
|
|
return [
|
||
|
|
(new_link, old_link) for (new_link, old_link)
|
||
|
|
in zip(new_links, old_links)
|
||
|
|
if new_link and old_link
|
||
|
|
]
|
||
|
|
|
||
|
|
|
||
|
|
def _build_link(indirect_object: IndirectObject, page: "PageObject") -> Optional[ReferenceLink]:
|
||
|
|
src = cast("PdfReader", page.pdf)
|
||
|
|
link = cast(DictionaryObject, indirect_object.get_object())
|
||
|
|
if (not isinstance(link, DictionaryObject)) or link.get("/Subtype") != "/Link":
|
||
|
|
return None
|
||
|
|
|
||
|
|
if "/A" in link:
|
||
|
|
action = cast(DictionaryObject, link["/A"])
|
||
|
|
if action.get("/S") != "/GoTo":
|
||
|
|
return None
|
||
|
|
|
||
|
|
if "/D" not in action:
|
||
|
|
return None
|
||
|
|
return _create_link(action["/D"], src)
|
||
|
|
|
||
|
|
if "/Dest" in link:
|
||
|
|
return _create_link(link["/Dest"], src)
|
||
|
|
|
||
|
|
return None # Nothing to do here
|
||
|
|
|
||
|
|
|
||
|
|
def _create_link(reference: PdfObject, source_pdf: "PdfReader")-> Optional[ReferenceLink]:
|
||
|
|
if isinstance(reference, TextStringObject):
|
||
|
|
return NamedReferenceLink(reference, source_pdf)
|
||
|
|
if isinstance(reference, ArrayObject):
|
||
|
|
return DirectReferenceLink(reference)
|
||
|
|
return None
|