tags 980426 +patch
thanks

Here's a patch that applies cleanly against upstream v1.17.3 after which the 
resulting pikepdf builds and passes its test with qpdf 10.1. Please let me know 
if this is sufficient. I put some comments at the top of the patch citing 
original commits in case you want to do the DEP-3 thing.

--Jay
Patch created as follows:

 cp /dev/null /tmp/a.patch
 git show 7ac9b058104219b26747f3fc9761ac6b3c037402 >> /tmp/a.patch
 git show 5394b7855eb87f637316432b2b597294adcb974e >> /tmp/a.patch
 git show fe4b568ac88ba551458578613b013ac17edf5c4e -- tests/test_filters.py >> /tmp/a.patch
 git show 7ca375cb6ff82011d716da1614ef5721a97f91b0 >> /tmp/a.patch

commit 7ac9b058104219b26747f3fc9761ac6b3c037402
Author: James R. Barlow <ja...@purplerock.ca>
Date:   Mon Jan 4 20:21:51 2021 -0800

    Fix externalize_inline_images for qpdf 10.1.0

diff --git a/src/qpdf/page.cpp b/src/qpdf/page.cpp
index 64b641d..6f70d20 100644
--- a/src/qpdf/page.cpp
+++ b/src/qpdf/page.cpp
@@ -106,7 +106,10 @@ void init_page(py::module_& m)
         .def("_get_mediabox", &QPDFPageObjectHelper::getMediaBox)
         .def("_get_cropbox", &QPDFPageObjectHelper::getCropBox)
         .def("_get_trimbox", &QPDFPageObjectHelper::getTrimBox)
-        .def("externalize_inline_images", &QPDFPageObjectHelper::externalizeInlineImages,
+        .def("externalize_inline_images",
+            [](QPDFPageObjectHelper &poh, size_t min_size = 0) {
+                return poh.externalizeInlineImages(min_size);
+            },
             py::arg("min_size") = 0,
             R"~~~(
                 Convert inlines image to normal (external) images.
commit 5394b7855eb87f637316432b2b597294adcb974e
Author: James R. Barlow <ja...@purplerock.ca>
Date:   Tue Aug 18 00:36:46 2020 -0700

    Fix potential undefined behavior in memoryview
    
    Apply patch based on pybind11 PR2223 to change how Python
    memory views on C++ buffers are declared - avoids a possible use-
    after-free in Python's code.
    
    This change is not in a released pybind11 version yet so we are
    pulling it forward.

diff --git a/src/qpdf/pipeline.cpp b/src/qpdf/pipeline.cpp
index 1856987..8a4fe79 100644
--- a/src/qpdf/pipeline.cpp
+++ b/src/qpdf/pipeline.cpp
@@ -22,6 +22,7 @@
 
 #include "pikepdf.h"
 #include "pipeline.h"
+#include "utils.h"
 
 
 void Pl_PythonOutput::write(unsigned char *buf, size_t len)
@@ -29,8 +30,7 @@ void Pl_PythonOutput::write(unsigned char *buf, size_t len)
     py::gil_scoped_acquire gil;
     ssize_t so_far = 0;
     while (len > 0) {
-        py::buffer_info buffer(buf, len);
-        py::memoryview view_buffer(buffer);
+        py::memoryview view_buffer = memoryview_from_memory(buf, len);
         py::object result = this->stream.attr("write")(view_buffer);
         try {
             so_far = result.cast<ssize_t>();
diff --git a/src/qpdf/qpdf_inputsource.h b/src/qpdf/qpdf_inputsource.h
index 4a96f6b..8ec6a55 100644
--- a/src/qpdf/qpdf_inputsource.h
+++ b/src/qpdf/qpdf_inputsource.h
@@ -82,8 +82,7 @@ public:
     {
         py::gil_scoped_acquire gil;
 
-        py::buffer_info buffer_info(buffer, length);
-        py::memoryview view_buffer_info(buffer_info);
+        py::memoryview view_buffer_info = memoryview_from_memory(buffer, length);
 
         this->last_offset = this->tell();
         py::object result = this->stream.attr("readinto")(view_buffer_info);
diff --git a/src/qpdf/utils.cpp b/src/qpdf/utils.cpp
index bf189f6..36769ca 100644
--- a/src/qpdf/utils.cpp
+++ b/src/qpdf/utils.cpp
@@ -46,3 +46,20 @@
     }
 
 #endif
+
+// Copied from pybind11 master branch (pre-2.6), can remove when we require
+// pybind11 v2.6 and replace with py::memoryview::from_memory
+py::memoryview memoryview_from_memory(void *mem, ssize_t size, bool readonly)
+{
+    PyObject* ptr = PyMemoryView_FromMemory(
+        reinterpret_cast<char*>(mem), size,
+        (readonly) ? PyBUF_READ : PyBUF_WRITE);
+    if (!ptr)
+        py::pybind11_fail("Could not allocate memoryview object!");
+    return py::reinterpret_steal<py::memoryview>(ptr);
+}
+
+py::memoryview memoryview_from_memory(const void *mem, ssize_t size)
+{
+    return memoryview_from_memory(const_cast<void*>(mem), size, true);
+}
diff --git a/src/qpdf/utils.h b/src/qpdf/utils.h
index 9f6a411..f26c5bd 100644
--- a/src/qpdf/utils.h
+++ b/src/qpdf/utils.h
@@ -17,3 +17,6 @@ inline bool str_startswith(T haystack, S needle)
 {
     return std::string(haystack).rfind(needle, 0) == 0;
 }
+
+py::memoryview memoryview_from_memory(void *mem, ssize_t size, bool readonly = false);
+py::memoryview memoryview_from_memory(const void *mem, ssize_t size);
commit fe4b568ac88ba551458578613b013ac17edf5c4e
Author: James R. Barlow <ja...@purplerock.ca>
Date:   Wed Jan 6 00:22:16 2021 -0800

    libqpdf 10.1.0 raises different exception
    
    The different errors are acceptable to us; actually they are more
    correct than the original behavior.

diff --git a/tests/test_filters.py b/tests/test_filters.py
index 90c0ca5..36d2a4c 100644
--- a/tests/test_filters.py
+++ b/tests/test_filters.py
@@ -74,19 +74,18 @@ class FilterInvalid(pikepdf.TokenFilter):
 
 def test_invalid_handle_token(pal):
     page = pikepdf.Page(pal.pages[0])
-    with pytest.raises(pikepdf.PdfError):
-        result = page.get_filtered_contents(FilterInvalid())
+    with pytest.raises((TypeError, pikepdf.PdfError)):
+        page.get_filtered_contents(FilterInvalid())
 
 
 def test_invalid_tokenfilter(pal):
     page = pikepdf.Page(pal.pages[0])
     with pytest.raises(TypeError):
-        result = page.get_filtered_contents(list())
+        page.get_filtered_contents(list())
 
 
 def test_tokenfilter_is_abstract(pal):
     page = pikepdf.Page(pal.pages[0])
-    try:
-        result = page.get_filtered_contents(pikepdf.TokenFilter())
-    except pikepdf.PdfError:
-        assert 'Tried to call pure virtual' in pal.get_warnings()[0]
+    with pytest.raises((RuntimeError, pikepdf.PdfError)):
+        page.get_filtered_contents(pikepdf.TokenFilter())
+    assert 'Tried to call pure virtual' in pal.get_warnings()[0]
commit 7ca375cb6ff82011d716da1614ef5721a97f91b0
Author: James R. Barlow <ja...@purplerock.ca>
Date:   Wed Jan 6 03:39:50 2021 -0800

    Fix test_tokenfilter_is_abstract

diff --git a/tests/test_filters.py b/tests/test_filters.py
index 36d2a4c..f6421c9 100644
--- a/tests/test_filters.py
+++ b/tests/test_filters.py
@@ -88,4 +88,3 @@ def test_tokenfilter_is_abstract(pal):
     page = pikepdf.Page(pal.pages[0])
     with pytest.raises((RuntimeError, pikepdf.PdfError)):
         page.get_filtered_contents(pikepdf.TokenFilter())
-    assert 'Tried to call pure virtual' in pal.get_warnings()[0]

Reply via email to