commit:     a52e465a71b97dd14b803d5e2ac86735aca84d99
Author:     Louis Sautier <sbraz <AT> gentoo <DOT> org>
AuthorDate: Mon Mar 11 21:23:03 2024 +0000
Commit:     Louis Sautier <sbraz <AT> gentoo <DOT> org>
CommitDate: Mon Mar 11 21:26:21 2024 +0000
URL:        https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=a52e465a

www-misc/urlwatch: fix CSS selectors with dev-python/lxml >= 5.0.0

Closes: https://bugs.gentoo.org/926767
Signed-off-by: Louis Sautier <sbraz <AT> gentoo.org>

 www-misc/urlwatch/files/urlwatch-2.28-lxml-5.patch | 28 ++++++++
 www-misc/urlwatch/urlwatch-2.28-r1.ebuild          | 76 ++++++++++++++++++++++
 2 files changed, 104 insertions(+)

diff --git a/www-misc/urlwatch/files/urlwatch-2.28-lxml-5.patch 
b/www-misc/urlwatch/files/urlwatch-2.28-lxml-5.patch
new file mode 100644
index 000000000000..d5f80410859f
--- /dev/null
+++ b/www-misc/urlwatch/files/urlwatch-2.28-lxml-5.patch
@@ -0,0 +1,28 @@
+https://github.com/thp/urlwatch/commit/123de66d019aef7fc18fab6d56cc2a54d81fea3f
+
+From: James Hewitt <[email protected]>
+Date: Wed, 17 Jan 2024 13:50:28 +0000
+Subject: [PATCH] Update CSS Selector to use new style
+
+New style of calling the CSSSelector directly instead of using the
+evaluate function. This has been supported since lxml 1.1 [1] and the
+evaluate method has been deprecated since lxml 2.1 [2].
+
+[1] https://github.com/lxml/lxml/blob/lxml-1.1/src/lxml/xpath.pxi#L66
+[2] https://github.com/lxml/lxml/blob/lxml-2.1/src/lxml/xpath.pxi#L143
+
+Signed-off-by: James Hewitt <[email protected]>
+--- a/lib/urlwatch/filters.py
++++ b/lib/urlwatch/filters.py
+@@ -761,9 +761,9 @@ def _get_filtered_elements(self):
+         excluded_elems = None
+         if self.filter_kind == 'css':
+             selected_elems = CSSSelector(self.expression,
+-                                         
namespaces=self.namespaces).evaluate(root)
++                                         namespaces=self.namespaces)(root)
+             excluded_elems = CSSSelector(self.exclude,
+-                                         
namespaces=self.namespaces).evaluate(root) if self.exclude else None
++                                         namespaces=self.namespaces)(root) if 
self.exclude else None
+         elif self.filter_kind == 'xpath':
+             selected_elems = root.xpath(self.expression, 
namespaces=self.namespaces)
+             excluded_elems = root.xpath(self.exclude, 
namespaces=self.namespaces) if self.exclude else None

diff --git a/www-misc/urlwatch/urlwatch-2.28-r1.ebuild 
b/www-misc/urlwatch/urlwatch-2.28-r1.ebuild
new file mode 100644
index 000000000000..ae6e35c21106
--- /dev/null
+++ b/www-misc/urlwatch/urlwatch-2.28-r1.ebuild
@@ -0,0 +1,76 @@
+# Copyright 1999-2024 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=8
+
+DISTUTILS_USE_PEP517=setuptools
+PYTHON_COMPAT=( python3_{10..12} )
+
+inherit distutils-r1 pypi
+
+DESCRIPTION="A tool for monitoring webpages for updates"
+HOMEPAGE="
+       https://thp.io/2008/urlwatch/
+       https://github.com/thp/urlwatch/
+       https://pypi.org/project/urlwatch/
+"
+
+LICENSE="BSD"
+SLOT="0"
+KEYWORDS="~amd64 ~x86"
+
+RDEPEND="
+       dev-python/appdirs[${PYTHON_USEDEP}]
+       dev-python/cssselect[${PYTHON_USEDEP}]
+       dev-python/keyring[${PYTHON_USEDEP}]
+       dev-python/lxml[${PYTHON_USEDEP}]
+       >=dev-python/minidb-2.0.6[${PYTHON_USEDEP}]
+       dev-python/pyyaml[${PYTHON_USEDEP}]
+       dev-python/requests[${PYTHON_USEDEP}]
+"
+BDEPEND="
+       test? (
+               app-text/tesseract[png]
+               dev-python/docutils[${PYTHON_USEDEP}]
+               dev-python/jq[${PYTHON_USEDEP}]
+               dev-python/pytesseract[${PYTHON_USEDEP}]
+       )
+"
+
+PATCHES=(
+       "${FILESDIR}/${P}-lxml-5.patch"
+)
+
+DOCS=( CHANGELOG.md README.md )
+
+distutils_enable_sphinx docs/source dev-python/alabaster
+distutils_enable_tests pytest
+
+EPYTEST_DESELECT=(
+       # Require the pdftotext module
+       
"lib/urlwatch/tests/test_filter_documentation.py::test_url[https://example.net/pdf-test.pdf]";
+       
"lib/urlwatch/tests/test_filter_documentation.py::test_url[https://example.net/pdf-test-password.pdf]";
+       # Skip code quality check
+       "lib/urlwatch/tests/test_handler.py::test_pep8_conformance"
+)
+
+pkg_postinst() {
+       if [[ -z "${REPLACING_VERSIONS}" ]]; then
+               if ! has_version dev-python/chump; then
+                       elog "Install 'dev-python/chump' to enable Pushover" \
+                               "notifications support"
+               fi
+               if ! has_version dev-python/jq; then
+                       elog "Install 'dev-python/jq' to enable jq filtering 
support"
+               fi
+               if ! has_version dev-python/pytesseract; then
+                       elog "Install 'dev-python/pytesseract' to enable OCR 
support"
+               fi
+               elog "HTML parsing can be improved by installing one of the 
following packages"
+               elog "and changing the html2text subfilter parameter:"
+               elog "dev-python/beautifulsoup4"
+               elog "app-text/html2text"
+               elog "dev-python/html2text"
+               elog "www-client/lynx"
+       fi
+}

Reply via email to