commit:     f42f8ed5e0eb29a8d633471921aebf9db4f9bb08
Author:     Alessandro Barbieri <lssndrbarbieri <AT> gmail <DOT> com>
AuthorDate: Thu Apr 29 03:21:10 2021 +0000
Commit:     Michał Górny <mgorny <AT> gentoo <DOT> org>
CommitDate: Thu Apr 29 03:24:54 2021 +0000
URL:        https://gitweb.gentoo.org/repo/proj/guru.git/commit/?id=f42f8ed5

dev-libs/fsst: new package

Package-Manager: Portage-3.0.18, Repoman-3.0.3
Signed-off-by: Alessandro Barbieri <lssndrbarbieri <AT> gmail.com>

 dev-libs/fsst/Manifest                  |  1 +
 dev-libs/fsst/fsst-0_pre20200830.ebuild | 29 +++++++++++++++++++++++++++++
 dev-libs/fsst/metadata.xml              | 22 ++++++++++++++++++++++
 3 files changed, 52 insertions(+)

diff --git a/dev-libs/fsst/Manifest b/dev-libs/fsst/Manifest
new file mode 100644
index 000000000..417198ca8
--- /dev/null
+++ b/dev-libs/fsst/Manifest
@@ -0,0 +1 @@
+DIST fsst-0_pre20200830.tar.gz 32289281 BLAKE2B 
21184f7d80193ebcc279f38b8fdc2be563a65a7296ce226c8ae4da19cbd946b1bb412c5f4c661e3ad0405b03b57f83b4257ecf78f9642fb09a9eccd56616a8b1
 SHA512 
9dd416d0a711a6c38e8e0d8b445f328e5826096293dc1f1152ae3e67470d2f8f1d9df2bb88815f1178b67c8cd0ad130f9fa9b59a9547bcc272d37782c239d7b7

diff --git a/dev-libs/fsst/fsst-0_pre20200830.ebuild 
b/dev-libs/fsst/fsst-0_pre20200830.ebuild
new file mode 100644
index 000000000..6c49b03ba
--- /dev/null
+++ b/dev-libs/fsst/fsst-0_pre20200830.ebuild
@@ -0,0 +1,29 @@
+# Copyright 2021 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=7
+
+inherit cmake
+
+COMMIT="fffb613071cb44319c0d6b743a8d6eafc2ed2ad7"
+DESCRIPTION="Fast Static Symbol Table: fast text compression that allows 
random access"
+HOMEPAGE="https://github.com/cwida/fsst";
+SRC_URI="https://github.com/cwida/fsst/archive/${COMMIT}.tar.gz -> ${P}.tar.gz"
+
+LICENSE="MIT"
+SLOT="0"
+KEYWORDS="~amd64"
+
+BDEPEND="app-admin/chrpath"
+RDEPEND="${DEPEND}"
+
+S="${WORKDIR}/${PN}-${COMMIT}"
+
+src_install() {
+       chrpath -d "${BUILD_DIR}/fsst" || die
+
+       doheader fsst.h libfsst.hpp
+       dolib.so "${BUILD_DIR}/libfsst.so"
+       dobin "${BUILD_DIR}/fsst"
+       dodoc -r README.md fsst-presentation* fsstcompression.pdf
+}

diff --git a/dev-libs/fsst/metadata.xml b/dev-libs/fsst/metadata.xml
new file mode 100644
index 000000000..2f7e6891c
--- /dev/null
+++ b/dev-libs/fsst/metadata.xml
@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE pkgmetadata SYSTEM 'http://www.gentoo.org/dtd/metadata.dtd'>
+<pkgmetadata>
+       <longdescription lang="en">
+FSST: Fast Static Symbol Table compression
+see the PVLDB paper 
https://github.com/cwida/fsst/raw/master/fsstcompression.pdf
+
+FSST is a compression scheme focused on string/text data: it can compress 
strings from distributions with many different values (i.e. where dictionary 
compression will not work well). It allows *random-access* to compressed data: 
it is not block-based, so individual strings can be decompressed without 
touching the surrounding data in a compressed block. When compared to e.g. LZ4 
(which is block-based), FSST further achieves similar decompression speed and 
compression speed, and better compression ratio.
+
+FSST encodes strings using a symbol table -- but it works on pieces of the 
string, as it maps "symbols" (1-8 byte sequences) onto "codes" (single-bytes). 
FSST can also represent a byte as an exception (255 followed by the original 
byte). Hence, compression transforms a sequence of bytes into a (supposedly 
shorter) sequence of codes or escaped bytes. These shorter byte-sequences could 
be seen as strings again and fit in whatever your program is that manipulates 
strings. An optional 0-terminated mode (like, C-strings) is also supported.
+
+FSST ensures that strings that are equal, are also equal in their compressed 
form. This means equality comparisons can be performed without decompressing 
the strings.
+
+FSST compression is quite useful in database systems and data file formats. It 
e.g., allows fine-grained decompression of values in case of selection 
predicates that are pushed down into a scan operator. But, very often FSST even 
allows to postpone decompression of string data. This means hash tables (in 
joins and aggregations) become smaller, and network communication (in case of 
distributed query processing) is reduced. All of this without requiring much 
structural changes to existing systems: after all, FSST compressed strings 
still remain strings.
+
+The implementation of FSST is quite portable, using CMake and has been 
verified to work on 64-bits x86 computers running Linux, MacOS and Windows.
+       </longdescription>
+       <upstream>
+               <bugs-to>https://github.com/cwida/fsst/issues</bugs-to>
+               <remote-id type="github">cwida/fsst</remote-id>
+       </upstream>
+</pkgmetadata>

Reply via email to