[gentoo-commits] repo/gentoo:master commit in: media-libs/zimg/files/, media-libs/zimg/

Andreas Sturmlechner Tue, 21 May 2019 12:30:07 -0700

commit:     dcc0c10f45442c42c2bdbf10564a8eb39196edee
Author:     Andreas Sturmlechner <asturm <AT> gentoo <DOT> org>
AuthorDate: Tue May 21 19:28:14 2019 +0000
Commit:     Andreas Sturmlechner <asturm <AT> gentoo <DOT> org>
CommitDate: Tue May 21 19:29:45 2019 +0000
URL:        https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=dcc0c10f


media-libs/zimg: EAPI-7 bump

Package-Manager: Portage-2.3.66, Repoman-2.3.12
Signed-off-by: Andreas Sturmlechner <asturm <AT> gentoo.org>

 media-libs/zimg/files/zimg-2.7.5-sse2.patch | 231 ++++++++++++++++++++++++++++
 media-libs/zimg/zimg-2.7.5.ebuild           |  35 ++---
 2 files changed, 247 insertions(+), 19 deletions(-)

diff --git a/media-libs/zimg/files/zimg-2.7.5-sse2.patch 
b/media-libs/zimg/files/zimg-2.7.5-sse2.patch
new file mode 100644
index 00000000000..77483ab59e7
--- /dev/null
+++ b/media-libs/zimg/files/zimg-2.7.5-sse2.patch
@@ -0,0 +1,231 @@
+From e30112df0ca703be82ed2c852511916fc46defbd Mon Sep 17 00:00:00 2001
+From: sekrit-twc <[email protected]>
+Date: Fri, 22 Mar 2019 18:51:14 -0700
+Subject: [PATCH] colorspace: use bfloat16 for SSE2 linear-to-gamma LUT
+
+On Skylake, processing 512 pixel array:
+
+direction      cycles/sample   cycles/pxvector
+     g->l               2.35              9.43
+     l->g               2.49              9.97
+---
+ .../colorspace/x86/operation_impl_avx2.cpp    |   3 +-
+ .../colorspace/x86/operation_impl_sse2.cpp    | 104 ++++++++++++++++--
+ test/colorspace/x86/colorspace_sse2_test.cpp  |  14 +--
+ 3 files changed, 100 insertions(+), 21 deletions(-)
+
+diff --git a/src/zimg/colorspace/x86/operation_impl_avx2.cpp 
b/src/zimg/colorspace/x86/operation_impl_avx2.cpp
+index bbbbf896..f0e7f792 100644
+--- a/src/zimg/colorspace/x86/operation_impl_avx2.cpp
++++ b/src/zimg/colorspace/x86/operation_impl_avx2.cpp
+@@ -114,8 +114,7 @@ class ToGammaLutOperationAVX2 final : public Operation {
+       {
+               EnsureSinglePrecision x87;
+ 
+-              // Allocate an extra LUT entry so that indexing can be done by 
multipying by a power of 2.
+-              for (unsigned long i = 0; i <= UINT16_MAX; ++i) {
++              for (size_t i = 0; i <= UINT16_MAX; ++i) {
+                       uint16_t half = static_cast<uint16_t>(i);
+                       float x = 
_mm_cvtss_f32(_mm_cvtph_ps(_mm_set1_epi16(half)));
+                       m_lut[i] = func(x * prescale);
+diff --git a/src/zimg/colorspace/x86/operation_impl_sse2.cpp 
b/src/zimg/colorspace/x86/operation_impl_sse2.cpp
+index 48645031..da9d4dbc 100644
+--- a/src/zimg/colorspace/x86/operation_impl_sse2.cpp
++++ b/src/zimg/colorspace/x86/operation_impl_sse2.cpp
+@@ -3,11 +3,13 @@
+ #include <algorithm>
+ #include <cstddef>
+ #include <cstdint>
++#include <type_traits>
+ #include <vector>
+ #include <emmintrin.h>
+ #include "common/align.h"
+ #include "common/ccdep.h"
+ #include "common/make_unique.h"
++#include "common/x86/sse2_util.h"
+ #include "colorspace/gamma.h"
+ #include "colorspace/operation.h"
+ #include "colorspace/operation_impl.h"
+@@ -20,14 +22,25 @@ namespace {
+ 
+ constexpr unsigned LUT_DEPTH = 16;
+ 
+-void lut_filter_line(const float *RESTRICT lut, unsigned lut_depth, float 
prescale, const float *src, float *dst, unsigned left, unsigned right)
++template <class T, class U>
++T bit_cast(const U &x) noexcept
++{
++      static_assert(sizeof(T) == sizeof(U), "object sizes must match");
++      static_assert(std::is_pod<T>::value && std::is_pod<U>::value, "object 
types must be POD");
++
++      T ret;
++      std::copy_n(reinterpret_cast<const char *>(&x), sizeof(x), 
reinterpret_cast<char *>(&ret));
++      return ret;
++}
++
++void to_linear_lut_filter_line(const float *RESTRICT lut, unsigned lut_depth, 
const float *src, float *dst, unsigned left, unsigned right)
+ {
+       unsigned vec_left = ceil_n(left, 4);
+       unsigned vec_right = floor_n(right, 4);
+ 
+       const int32_t lut_limit = static_cast<int32_t>(1) << lut_depth;
+ 
+-      const __m128 scale = _mm_set_ps1(0.5f * prescale * lut_limit);
++      const __m128 scale = _mm_set_ps1(0.5f * lut_limit);
+       const __m128 offset = _mm_set_ps1(0.25f * lut_limit);
+       const __m128i limit = _mm_set1_epi16(std::min(lut_limit + INT16_MIN, 
static_cast<int32_t>(INT16_MAX)));
+       const __m128i bias_epi16 = _mm_set1_epi16(INT16_MIN);
+@@ -73,16 +86,61 @@ void lut_filter_line(const float *RESTRICT lut, unsigned 
lut_depth, float presca
+       }
+ }
+ 
++void to_gamma_lut_filter_line(const float *RESTRICT lut, const float *src, 
float *dst, unsigned left, unsigned right)
++{
++      unsigned vec_left = ceil_n(left, 4);
++      unsigned vec_right = floor_n(right, 4);
++
++      for (unsigned j = left; j < vec_left; ++j) {
++              __m128i x = _mm_castps_si128(_mm_load_ss(src + j));
++              __m128i msb = _mm_srli_epi32(x, 16);
++              __m128i lsb = _mm_and_si128(_mm_srli_epi32(x, 15), 
_mm_set1_epi32(1));
++              x = mm_packus_epi32(msb, lsb);
++              x = _mm_adds_epi16(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 
2)));
++
++              dst[j] = lut[_mm_cvtsi128_si32(x)];
++      }
++      for (unsigned j = vec_left; j < vec_right; j += 4) {
++              __m128i x = _mm_castps_si128(_mm_load_ps(src + j));
++              __m128i msb = _mm_srli_epi32(x, 16);
++              __m128i lsb = _mm_and_si128(_mm_srli_epi32(x, 15), 
_mm_set1_epi32(1));
++              x = mm_packus_epi32(msb, lsb);
++              x = _mm_adds_epi16(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 
2)));
++
++#if SIZE_MAX >= UINT64_MAX
++              uint64_t tmp = _mm_cvtsi128_si64(x);
++              dst[j + 0] = lut[tmp & 0xFFFFU];
++              dst[j + 1] = lut[(tmp >> 16) & 0xFFFFU];
++              dst[j + 2] = lut[(tmp >> 32) & 0xFFFFU];
++              dst[j + 3] = lut[tmp >> 48];
++#else
++              uint32_t tmp0 = _mm_cvtsi128_si32(x);
++              uint32_t tmp1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(x, 
_MM_SHUFFLE(3, 2, 0, 1)));
++              dst[j + 0] = lut[tmp0 & 0xFFFFU];
++              dst[j + 1] = lut[tmp0 >> 16];
++              dst[j + 2] = lut[tmp1 & 0xFFFFU];
++              dst[j + 3] = lut[tmp1 >> 16];
++#endif
++      }
++      for (unsigned j = vec_right; j < right; ++j) {
++              __m128i x = _mm_castps_si128(_mm_load_ss(src + j));
++              __m128i msb = _mm_srli_epi32(x, 16);
++              __m128i lsb = _mm_and_si128(_mm_srli_epi32(x, 15), 
_mm_set1_epi32(1));
++              x = mm_packus_epi32(msb, lsb);
++              x = _mm_adds_epi16(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 
2)));
++
++              dst[j] = lut[_mm_cvtsi128_si32(x)];
++      }
++}
++
+ 
+-class LutOperationSSE2 final : public Operation {
++class ToLinearLutOperationSSE2 final : public Operation {
+       std::vector<float> m_lut;
+       unsigned m_lut_depth;
+-      float m_prescale;
+ public:
+-      LutOperationSSE2(gamma_func func, unsigned lut_depth, float prescale, 
float postscale) :
++      ToLinearLutOperationSSE2(gamma_func func, unsigned lut_depth, float 
postscale) :
+               m_lut((1UL << lut_depth) + 1),
+-              m_lut_depth{ lut_depth },
+-              m_prescale{ static_cast<float>(prescale) }
++              m_lut_depth{ lut_depth }
+       {
+               EnsureSinglePrecision x87;
+ 
+@@ -95,9 +153,31 @@ class LutOperationSSE2 final : public Operation {
+ 
+       void process(const float * const *src, float * const *dst, unsigned 
left, unsigned right) const override
+       {
+-              lut_filter_line(m_lut.data(), m_lut_depth, m_prescale, src[0], 
dst[0], left, right);
+-              lut_filter_line(m_lut.data(), m_lut_depth, m_prescale, src[1], 
dst[1], left, right);
+-              lut_filter_line(m_lut.data(), m_lut_depth, m_prescale, src[2], 
dst[2], left, right);
++              to_linear_lut_filter_line(m_lut.data(), m_lut_depth, src[0], 
dst[0], left, right);
++              to_linear_lut_filter_line(m_lut.data(), m_lut_depth, src[1], 
dst[1], left, right);
++              to_linear_lut_filter_line(m_lut.data(), m_lut_depth, src[2], 
dst[2], left, right);
++      }
++};
++
++class ToGammaLutOperationSSE2 final : public Operation {
++      std::vector<float> m_lut;
++public:
++      ToGammaLutOperationSSE2(gamma_func func, float prescale) :
++              m_lut(static_cast<uint32_t>(UINT16_MAX) + 1)
++      {
++              EnsureSinglePrecision x87;
++
++              for (size_t i = 0; i <= UINT16_MAX; ++i) {
++                      float x = bit_cast<float>(static_cast<uint32_t>(i << 
16));
++                      m_lut[i] = func(x * prescale);
++              }
++      }
++
++      void process(const float * const *src, float * const *dst, unsigned 
left, unsigned right) const override
++      {
++              to_gamma_lut_filter_line(m_lut.data(), src[0], dst[0], left, 
right);
++              to_gamma_lut_filter_line(m_lut.data(), src[1], dst[1], left, 
right);
++              to_gamma_lut_filter_line(m_lut.data(), src[2], dst[2], left, 
right);
+       }
+ };
+ 
+@@ -109,7 +189,7 @@ std::unique_ptr<Operation> 
create_gamma_operation_sse2(const TransferFunction &t
+       if (!params.approximate_gamma)
+               return nullptr;
+ 
+-      return ztd::make_unique<LutOperationSSE2>(transfer.to_gamma, LUT_DEPTH, 
transfer.to_gamma_scale, 1.0f);
++      return ztd::make_unique<ToGammaLutOperationSSE2>(transfer.to_gamma, 
transfer.to_gamma_scale);
+ }
+ 
+ std::unique_ptr<Operation> create_inverse_gamma_operation_sse2(const 
TransferFunction &transfer, const OperationParams &params)
+@@ -117,7 +197,7 @@ std::unique_ptr<Operation> 
create_inverse_gamma_operation_sse2(const TransferFun
+       if (!params.approximate_gamma)
+               return nullptr;
+ 
+-      return ztd::make_unique<LutOperationSSE2>(transfer.to_linear, 
LUT_DEPTH, 1.0f, transfer.to_linear_scale);
++      return ztd::make_unique<ToLinearLutOperationSSE2>(transfer.to_linear, 
LUT_DEPTH, transfer.to_linear_scale);
+ }
+ 
+ } // namespace colorspace
+diff --git a/test/colorspace/x86/colorspace_sse2_test.cpp 
b/test/colorspace/x86/colorspace_sse2_test.cpp
+index d5130868..ecaa05e7 100644
+--- a/test/colorspace/x86/colorspace_sse2_test.cpp
++++ b/test/colorspace/x86/colorspace_sse2_test.cpp
+@@ -53,9 +53,9 @@ TEST(ColorspaceConversionSSE2Test, test_transfer_lut)
+                       "162687e701627cdc17283a32c36ea711d28a953e"
+               },
+               {
+-                      "492587e7ed75b7e3ab868bead6ade7a4137c6ea1",
+-                      "3b0694e9fbce61466cb5a575f300d784089b6cad",
+-                      "b68f103f52ccafae867d664d7f27fe56ae9208af"
++                      "95f2715bd0d417028bebd5c5377180fcd5b01119",
++                      "76f7c88b198f1ab08167f8162c1237b54f22007a",
++                      "1099c3ae187c0a9f79acb9445761b6056218c779"
+               },
+               {
+                       "4c0b5ffe768a7812d1ef102b4d8d52614838bc8e",
+@@ -63,13 +63,13 @@ TEST(ColorspaceConversionSSE2Test, test_transfer_lut)
+                       "85a277a80dfca2e21789cedd76aaee307dbc4562"
+               },
+               {
+-                      "df546ce0ad6f859499a96d2d697d896067e60e38",
+-                      "f0041b8a008ab45f0ea1319090ac7e8be0990d92",
+-                      "06880efb598e41f96fa79e04dbdfcccd50d6dc6f"
++                      "5e35786d313e936566d9873ba7a08a8d6005b2ee",
++                      "829fa88acfbbb26801871bf3cadf5cc2eb6830c9",
++                      "f82fcad18a19b548d419a1952b6a7a423a684b62"
+               },
+       };
+       const double expected_tolinear_snr = 80.0;
+-      const double expected_togamma_snr = 40.0;
++      const double expected_togamma_snr = 60.0;
+ 
+       SCOPED_TRACE("tolinear 709");
+       test_case({ MatrixCoefficients::RGB, TransferCharacteristics::REC_709, 
ColorPrimaries::UNSPECIFIED },

diff --git a/media-libs/zimg/zimg-2.7.5.ebuild 
b/media-libs/zimg/zimg-2.7.5.ebuild
index 1775f6b60bb..7bac59befc3 100644
--- a/media-libs/zimg/zimg-2.7.5.ebuild
+++ b/media-libs/zimg/zimg-2.7.5.ebuild
@@ -1,36 +1,33 @@
 # Copyright 1999-2019 Gentoo Authors
 # Distributed under the terms of the GNU General Public License v2
 
-EAPI=5
+EAPI=7
 
-AUTOTOOLS_AUTORECONF=yes
-
-SCM=""
-
-if [ "${PV#9999}" != "${PV}" ] ; then
-       SCM="git-r3"
+if [[ ${PV} = *9999* ]] ; then
        EGIT_REPO_URI="https://github.com/sekrit-twc/zimg";
-fi
-
-inherit autotools-multilib ${SCM}
-
-DESCRIPTION="Scaling, colorspace conversion, and dithering library"
-HOMEPAGE="https://github.com/sekrit-twc/zimg";
-
-if [ "${PV#9999}" = "${PV}" ] ; then
+       inherit git-r3
+else
        
SRC_URI="https://github.com/sekrit-twc/zimg/archive/release-${PV}.tar.gz -> 
${P}.tar.gz"
        KEYWORDS="~alpha amd64 ~arm arm64 ~hppa ~ia64 ~ppc ~ppc64 ~sparc ~x86"
        S="${WORKDIR}/${PN}-release-${PV}/"
 fi
+inherit autotools multilib-minimal
+
+DESCRIPTION="Scaling, colorspace conversion, and dithering library"
+HOMEPAGE="https://github.com/sekrit-twc/zimg";
 
 LICENSE="WTFPL-2"
 SLOT="0"
 IUSE="static-libs cpu_flags_x86_sse"
 
-DEPEND=""
-RDEPEND="${DEPEND}"
+PATCHES=( "${FILESDIR}/${P}-sse2.patch" )
+
+src_prepare() {
+       default
+       eautoreconf
+}
 
-src_configure() {
-       autotools-multilib_src_configure \
+multilib_src_configure() {
+       ECONF_SOURCE="${S}" econf \
                $(use_enable cpu_flags_x86_sse x86simd)
 }

[gentoo-commits] repo/gentoo:master commit in: media-libs/zimg/files/, media-libs/zimg/

Reply via email to