vcl/Library_vcl.mk | 16 ++++++ vcl/inc/ScanlineTools.hxx | 28 +++++++++++ vcl/source/bitmap/ScanlineTools.cxx | 30 ++++++++++++ vcl/source/bitmap/ScanlineToolsAVX2.cxx | 64 ++++++++++++++++++++++++++ vcl/source/bitmap/ScanlineToolsSSE2.cxx | 76 +++++++++++++++++++++++++++++++ vcl/source/bitmap/ScanlineToolsSSSE3.cxx | 61 ++++++++++++++++++++++++ 6 files changed, 275 insertions(+)
New commits: commit 81910410d22c060f9901b129697ea43a25cfbd99 Author: Tomaž Vajngerl <[email protected]> Date: Thu Jul 13 22:02:19 2017 +0200 scanline tools: convert RGBA <-> BGRA + vectorized fast paths This adds a tool to convert a scanline from RGBA color channel order to BGRA color channel order and back. It also includes the vectorized fast path to accelerate it with SSE2 (~1.7x faster), SSSE3 (~4x faster), AVX2 (~8x faster). Change-Id: Ic427eed15d3cef40f9ad87220fb6b71770673c92 diff --git a/vcl/Library_vcl.mk b/vcl/Library_vcl.mk index a0113a2e85d4..93981505a600 100644 --- a/vcl/Library_vcl.mk +++ b/vcl/Library_vcl.mk @@ -411,6 +411,22 @@ $(eval $(call gb_Library_add_exception_objects,vcl,\ vcl/backendtest/outputdevice/rectangle \ )) +$(eval $(call gb_Library_add_cxxobjects,vcl,\ + vcl/source/bitmap/ScanlineTools, $(gb_LinkTarget_EXCEPTIONFLAGS) \ +)) + +$(eval $(call gb_Library_add_cxxobjects,vcl,\ + vcl/source/bitmap/ScanlineToolsSSE2, $(gb_LinkTarget_EXCEPTIONFLAGS) $(INTRINSICS_CXXFLAGS)\ +)) + +$(eval $(call gb_Library_add_cxxobjects,vcl,\ + vcl/source/bitmap/ScanlineToolsSSSE3, $(gb_LinkTarget_EXCEPTIONFLAGS) $(INTRINSICS_CXXFLAGS)\ +)) + +$(eval $(call gb_Library_add_cxxobjects,vcl,\ + vcl/source/bitmap/ScanlineToolsAVX2, $(gb_LinkTarget_EXCEPTIONFLAGS) $(INTRINSICS_CXXFLAGS)\ +)) + $(eval $(call gb_Library_add_cobjects,vcl,\ vcl/source/filter/jpeg/transupp \ )) diff --git a/vcl/inc/ScanlineTools.hxx b/vcl/inc/ScanlineTools.hxx new file mode 100644 index 000000000000..898019903a15 --- /dev/null +++ b/vcl/inc/ScanlineTools.hxx @@ -0,0 +1,28 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + */ + +#ifndef INCLUDED_VCL_INC_SCANLINETOOLS_HXX +#define INCLUDED_VCL_INC_SCANLINETOOLS_HXX + +#include <vcl/dllapi.h> + +namespace vcl { +namespace scanline { + +bool VCL_DLLPUBLIC swapABCDtoCBAD(sal_uInt8* pSource, sal_uInt8* pDestination, sal_Int32 nScanlineSize); +bool VCL_DLLPUBLIC swapABCDtoCBAD_SSE2(sal_uInt8* pSource, sal_uInt8* pDestination, sal_Int32 nScanlineSize); +bool VCL_DLLPUBLIC swapABCDtoCBAD_SSSE3(sal_uInt8* pSource, sal_uInt8* pDestination, sal_Int32 nScanlineSize); +bool VCL_DLLPUBLIC swapABCDtoCBAD_AVX2(sal_uInt8* pSource, sal_uInt8* pDestination, sal_Int32 nScanlineSize); + +}} // end vcl::scanline + +#endif // INCLUDED_VCL_INC_BITMAPSYMMETRYCHECK_HXX + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/vcl/source/bitmap/ScanlineTools.cxx b/vcl/source/bitmap/ScanlineTools.cxx new file mode 100644 index 000000000000..ceefd0f2c30f --- /dev/null +++ b/vcl/source/bitmap/ScanlineTools.cxx @@ -0,0 +1,30 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + */ + +#include "ScanlineTools.hxx" + +namespace vcl { +namespace scanline { + +bool swapABCDtoCBAD(sal_uInt8* pSource, sal_uInt8* pDestination, sal_Int32 nScanlineSize) +{ + for (sal_Int32 i = 0; i < nScanlineSize; i += 4, pSource += 4, pDestination += 4) + { + pDestination[0] = pSource[2]; + pDestination[1] = pSource[1]; + pDestination[2] = pSource[0]; + pDestination[3] = pSource[3]; + } + return true; +} + +}} // end vcl::scanline + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/vcl/source/bitmap/ScanlineToolsAVX2.cxx b/vcl/source/bitmap/ScanlineToolsAVX2.cxx new file mode 100644 index 000000000000..1ef386f8256b --- /dev/null +++ b/vcl/source/bitmap/ScanlineToolsAVX2.cxx @@ -0,0 +1,64 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + */ + +#include "ScanlineTools.hxx" + +#include <tools/simdsupport.hxx> + +#if defined(LO_AVX2_AVAILABLE) +#include <immintrin.h> +#endif + +namespace vcl { +namespace scanline { + +#if defined(LO_AVX2_AVAILABLE) +bool swapABCDtoCBAD_AVX2(sal_uInt8* pSource, sal_uInt8* pDestination, sal_Int32 nScanlineSize) +{ + __m256i aShuffleMask = _mm256_set_epi8(31, 28, 29, 30, 27, 24, 25, 26, + 23, 20, 21, 22, 19, 16, 17, 18, + 15, 12, 13, 14, 11, 8, 9, 10, + 7, 4, 5, 6, 3, 0, 1, 2); + + sal_Int32 nBlocks = nScanlineSize / 32; + + if (nBlocks > 0) + { + __m256i* pSource256 = reinterpret_cast<__m256i*>(pSource); + __m256i* pDestination256 = reinterpret_cast<__m256i*>(pDestination); + + for (sal_Int32 x = 0; x < nBlocks; ++x, ++pDestination256, ++pSource256) + { + _mm256_storeu_si256(pDestination256, _mm256_shuffle_epi8(_mm256_loadu_si256(pSource256), aShuffleMask)); + } + } + + pSource += nBlocks * 32; + pDestination += nBlocks * 32; + + for (sal_Int32 i = nBlocks * 32; i < nScanlineSize; i += 4, pSource += 4, pDestination += 4) + { + pDestination[0] = pSource[2]; + pDestination[1] = pSource[1]; + pDestination[2] = pSource[0]; + pDestination[3] = pSource[3]; + } + return true; +} +#else +bool swapABCDtoCBAD_AVX2(sal_uInt8* pSource, sal_uInt8* pDestination, sal_Int32 nScanlineSize) +{ + return false; +} +#endif + +}} // end vcl::scanline + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/vcl/source/bitmap/ScanlineToolsSSE2.cxx b/vcl/source/bitmap/ScanlineToolsSSE2.cxx new file mode 100644 index 000000000000..1f52fb6c5ec7 --- /dev/null +++ b/vcl/source/bitmap/ScanlineToolsSSE2.cxx @@ -0,0 +1,76 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + */ + +#include "ScanlineTools.hxx" + +#include <tools/simdsupport.hxx> + +#if defined(LO_SSE2_AVAILABLE) +#include <emmintrin.h> +#endif + +namespace vcl { +namespace scanline { + +#if defined(LO_SSE2_AVAILABLE) +bool swapABCDtoCBAD_SSE2(sal_uInt8* pSource, sal_uInt8* pDestination, sal_Int32 nScanlineSize) +{ + + sal_Int32 nBlocks = nScanlineSize / 16; + + if (nBlocks > 0) + { + __m128i* pSource128 = reinterpret_cast<__m128i*>(pSource); + __m128i* pDestination128 = reinterpret_cast<__m128i*>(pDestination); + + __m128i agmask = _mm_set1_epi32(0xFF00FF00); + + for (sal_Int32 x = 0; x < nBlocks; ++x, ++pDestination128, ++pSource128) + { + // RGBA RGBA RGBA RGBA + __m128i rgba = _mm_loadu_si128(pSource128); + + // 0G0A 0G0A 0G0A 0G0A + __m128i ag = _mm_and_si128(agmask, rgba); + // R0B0 R0B0 R0B0 R0B0 + __m128i rb = _mm_andnot_si128(agmask, rgba); + + // Swap R and B + // B0R0 B0R0 B0R0 B0R0 + __m128i br = _mm_shufflehi_epi16(_mm_shufflelo_epi16(rb, _MM_SHUFFLE(2, 3, 0, 1)), _MM_SHUFFLE(2, 3, 0, 1)); + + // B0R0 B0R0 B0R0 B0R0 + // or 0G0A 0G0A 0G0A 0G0A + _mm_storeu_si128(pDestination128, _mm_or_si128(ag, br)); + } + } + + pSource += nBlocks * 16; + pDestination += nBlocks * 16; + + for (sal_Int32 i = nBlocks * 16; i < nScanlineSize; i += 4, pSource += 4, pDestination += 4) + { + pDestination[0] = pSource[2]; + pDestination[1] = pSource[1]; + pDestination[2] = pSource[0]; + pDestination[3] = pSource[3]; + } + return true; +} +#else +bool swapABCDtoCBAD_SSE2(sal_uInt8* pSource, sal_uInt8* pDestination, sal_Int32 nScanlineSize) +{ + return false; +} +#endif + +}} // end vcl::scanline + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/vcl/source/bitmap/ScanlineToolsSSSE3.cxx b/vcl/source/bitmap/ScanlineToolsSSSE3.cxx new file mode 100644 index 000000000000..bced320b4809 --- /dev/null +++ b/vcl/source/bitmap/ScanlineToolsSSSE3.cxx @@ -0,0 +1,61 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + */ + +#include "ScanlineTools.hxx" + +#include <tools/simdsupport.hxx> + +#if defined(LO_SSSE3_AVAILABLE) +#include <tmmintrin.h> +#endif + +namespace vcl { +namespace scanline { + +#if defined(LO_SSSE3_AVAILABLE) +bool swapABCDtoCBAD_SSSE3(sal_uInt8* pSource, sal_uInt8* pDestination, sal_Int32 nScanlineSize) +{ + __m128i aShuffleMask = _mm_set_epi8(15, 12, 13, 14, 11, 8, 9, 10, 7, 4, 5, 6, 3, 0, 1, 2); + + sal_Int32 nBlocks = nScanlineSize / 16; + + if (nBlocks > 0) + { + __m128i* pSource128 = reinterpret_cast<__m128i*>(pSource); + __m128i* pDestination128 = reinterpret_cast<__m128i*>(pDestination); + + for (sal_Int32 x = 0; x < nBlocks; ++x, ++pDestination128, ++pSource128) + { + // _mm_lddqu_si128 - faster unaligned load with ssse3 + _mm_storeu_si128(pDestination128, _mm_shuffle_epi8(_mm_lddqu_si128(pSource128), aShuffleMask)); + } + } + + pSource += nBlocks * 16; + pDestination += nBlocks * 16; + + for (sal_Int32 i = nBlocks * 16; i < nScanlineSize; i += 4, pSource += 4, pDestination += 4) + { + pDestination[0] = pSource[2]; + pDestination[1] = pSource[1]; + pDestination[2] = pSource[0]; + pDestination[3] = pSource[3]; + } + return true; +} +#else +bool swapABCDtoCBAD_SSSE3(sal_uInt8* pSource, sal_uInt8* pDestination, sal_Int32 nScanlineSize) +{ + return false; +} +#endif +}} // end vcl::scanline + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
_______________________________________________ Libreoffice-commits mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/libreoffice-commits
