This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 8740049402c [third-party](faiss) Enable FAISS integration in Doris. (#49644) 8740049402c is described below commit 8740049402c77282e8950fdc8f2144452f2003bc Author: zhiqiang <hezhiqi...@selectdb.com> AuthorDate: Tue Apr 1 09:01:46 2025 +0800 [third-party](faiss) Enable FAISS integration in Doris. (#49644) ### What problem does this PR solve? Enable FAISS integration in Doris. Dependency of faiss is OpenMP, BLAS and LAPACK. OpenMP is distributed with gcc/llvm. OpenBLAS could supply BLAS & LAPACK impl, so we introduced OpenBLAS. If you are using ldb-toolchain, and version is before https://github.com/amosbird/ldb_toolchain_gen/releases/tag/v0.24, gcc should be used to compile openblas and faiss, since libopm.a is missing. Build new thirdparty: ``` sh build-thirdparty.sh openblas sh build-thirdparty.sh faiss ``` `export ENABLE_BUILD_FAISS=ON` to make doris link with faiss. --- be/CMakeLists.txt | 3 ++ be/cmake/thirdparty.cmake | 5 +++ build.sh | 8 +++++ thirdparty/build-thirdparty.sh | 60 +++++++++++++++++++++++++++++++ thirdparty/download-thirdparty.sh | 13 +++++++ thirdparty/patches/faiss-1.10.0.patch | 66 +++++++++++++++++++++++++++++++++++ thirdparty/vars.sh | 15 ++++++++ 7 files changed, 170 insertions(+) diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index a77c796b381..e7dc2961a4b 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -181,6 +181,9 @@ endif() set(GPERFTOOLS_HOME "${THIRDPARTY_DIR}/gperftools") +option(BUILD_FAISS "Link doris with faiss for vector similarity search" OFF) +message(STATUS "build faiss: ${BUILD_FAISS}") + include (cmake/thirdparty.cmake) find_program(THRIFT_COMPILER thrift ${CMAKE_SOURCE_DIR}/bin) diff --git a/be/cmake/thirdparty.cmake b/be/cmake/thirdparty.cmake index a165c4ab203..1250e8ab1f5 100644 --- a/be/cmake/thirdparty.cmake +++ b/be/cmake/thirdparty.cmake @@ -175,3 +175,8 @@ endif() add_thirdparty(icuuc LIB64) add_thirdparty(icui18n LIB64) add_thirdparty(icudata LIB64) + +if (BUILD_FAISS) + add_thirdparty(openblas LIB64) + add_thirdparty(faiss LIB64) +endif() diff --git a/build.sh b/build.sh index 3774803c533..3fbdc4df6c1 100755 --- a/build.sh +++ b/build.sh @@ -70,6 +70,7 @@ Usage: $0 <options> DISABLE_BE_JAVA_EXTENSIONS If set DISABLE_BE_JAVA_EXTENSIONS=ON, we will do not build binary with java-udf,hudi-scanner,jdbc-scanner and so on Default is OFF. DISABLE_JAVA_CHECK_STYLE If set DISABLE_JAVA_CHECK_STYLE=ON, it will skip style check of java code in FE. DISABLE_BUILD_AZURE If set DISABLE_BUILD_AZURE=ON, it will not build azure into BE. + ENABLE_BUILD_FAISS If set BUILD_FAISS=ON, it will link BE with faiss. Eg. $0 build all @@ -173,6 +174,7 @@ PARAMETER_COUNT="$#" PARAMETER_FLAG=0 DENABLE_CLANG_COVERAGE='OFF' BUILD_AZURE='ON' +BUILD_FAISS='OFF' BUILD_UI=1 if [[ "$#" == 1 ]]; then # default @@ -472,6 +474,10 @@ if [[ -n "${DISABLE_BUILD_AZURE}" ]]; then BUILD_AZURE='OFF' fi +if [[ -n "${ENABLE_BUILD_FAISS}" ]]; then + BUILD_FAISS='ON' +fi + if [[ -z "${ENABLE_INJECTION_POINT}" ]]; then ENABLE_INJECTION_POINT='OFF' fi @@ -640,6 +646,7 @@ if [[ "${BUILD_BE}" -eq 1 ]]; then -DENABLE_CLANG_COVERAGE="${DENABLE_CLANG_COVERAGE}" \ -DDORIS_JAVA_HOME="${JAVA_HOME}" \ -DBUILD_AZURE="${BUILD_AZURE}" \ + -DBUILD_FAISS="${BUILD_FAISS}" \ "${DORIS_HOME}/be" if [[ "${OUTPUT_BE_BINARY}" -eq 1 ]]; then @@ -681,6 +688,7 @@ if [[ "${BUILD_CLOUD}" -eq 1 ]]; then -DEXTRA_CXX_FLAGS="${EXTRA_CXX_FLAGS}" \ -DBUILD_AZURE="${BUILD_AZURE}" \ -DBUILD_CHECK_META="${BUILD_CHECK_META:-OFF}" \ + -DBUILD_FAISS="${BUILD_FAISS}" \ "${DORIS_HOME}/cloud/" "${BUILD_SYSTEM}" -j "${PARALLEL}" "${BUILD_SYSTEM}" install diff --git a/thirdparty/build-thirdparty.sh b/thirdparty/build-thirdparty.sh index b409349eec3..cdd5bec050c 100755 --- a/thirdparty/build-thirdparty.sh +++ b/thirdparty/build-thirdparty.sh @@ -1885,6 +1885,66 @@ build_pugixml() { cp "${TP_SOURCE_DIR}/${PUGIXML_SOURCE}/src/pugiconfig.hpp" "${TP_INSTALL_DIR}/include/" } +build_openblas() { + check_if_source_exist "${OPENBLAS_SOURCE}" + cd "${TP_SOURCE_DIR}/${OPENBLAS_SOURCE}" + + rm -rf "${BUILD_DIR}" + mkdir -p "${BUILD_DIR}" + cd "${BUILD_DIR}" + OPENBLAS_CMAKE_OPTIONS=( + "-DCMAKE_PREFIX_PATH=${TP_INSTALL_DIR}" + "-DCMAKE_INSTALL_PREFIX=${TP_INSTALL_DIR}" + "-DCMAKE_BUILD_TYPE=Release" + "-DBUILD_WITHOUT_LAPACK=OFF" + "-DNO_SHARED=TRUE" + "-DNO_AVX512=TRUE" + "-DC_LAPACK=TRUE" + "-DUSE_OPENMP=TRUE" + "-DBUILD_STATIC_LIBS=ON" + "-DNOFORTRAN=TRUE" + "-DBUILD_TESTING=OFF" + "-DBUILD_RELAPACK=ON" + "-DBUILD_BENCHMARKS=OFF" + ) + + echo "Building openblas at $(pwd) with cmake parameters: ${OPENBLAS_CMAKE_OPTIONS[*]}" + + "${CMAKE_CMD}" -G "${GENERATOR}" "${OPENBLAS_CMAKE_OPTIONS[@]}" .. + "${BUILD_SYSTEM}" -j "${PARALLEL}" + "${BUILD_SYSTEM}" install +} + +build_faiss() { + check_if_source_exist "${FAISS_SOURCE}" + echo "Building faiss ${FAISS_SOURCE}" + cd "${TP_SOURCE_DIR}" + # if faiss dir not exists, create a symlink to faiss source dir + # this symlink is necessary since faiss source code must be compiled in a directory named faiss. + if [[ ! -d "${TP_SOURCE_DIR}/faiss" ]]; then + ln -s "${FAISS_SOURCE}" faiss + fi + cd "${TP_SOURCE_DIR}/faiss" + + rm -rf "${BUILD_DIR}" + mkdir -p "${BUILD_DIR}" + cd "${BUILD_DIR}" + + FAISS_CMAKE_OPTIONS=( + "-DDORIS_THIRD_LIB_INSTALL_DIR=${TP_INSTALL_DIR}" + "-DCMAKE_INSTALL_PREFIX=${TP_INSTALL_DIR}" + "-DCMAKE_BUILD_TYPE=Release" + "-DFAISS_ENABLE_GPU=OFF" + "-DFAISS_ENABLE_PYTHON=OFF" + ) + + echo "Building faiss at $(pwd) with cmake parameters: ${FAISS_CMAKE_OPTIONS[*]}" + + "${CMAKE_CMD}" -G "${GENERATOR}" "${FAISS_CMAKE_OPTIONS[@]}" .. + "${BUILD_SYSTEM}" -j "${PARALLEL}" + "${BUILD_SYSTEM}" install +} + if [[ "${#packages[@]}" -eq 0 ]]; then packages=( jindofs diff --git a/thirdparty/download-thirdparty.sh b/thirdparty/download-thirdparty.sh index b80048025f7..89c04f6fea9 100755 --- a/thirdparty/download-thirdparty.sh +++ b/thirdparty/download-thirdparty.sh @@ -590,5 +590,18 @@ if [[ " ${TP_ARCHIVES[*]} " =~ " THRIFT " ]]; then echo "Finished patching ${THRIFT_SOURCE}" fi +# patch faiss cmake so that we can use openblas +if [[ " ${TP_ARCHIVES[*]} " =~ " FAISS " ]]; then + if [[ "${FAISS_SOURCE}" = "faiss-1.10.0" ]]; then + cd "${TP_SOURCE_DIR}/${FAISS_SOURCE}" + if [[ ! -f "${PATCHED_MARK}" ]]; then + patch -p2 <"${TP_PATCH_DIR}/faiss-1.10.0.patch" + touch "${PATCHED_MARK}" + fi + cd - + fi + echo "Finished patching ${FAISS_SOURCE}" +fi + # vim: ts=4 sw=4 ts=4 tw=100: diff --git a/thirdparty/patches/faiss-1.10.0.patch b/thirdparty/patches/faiss-1.10.0.patch new file mode 100644 index 00000000000..8279f4d71a1 --- /dev/null +++ b/thirdparty/patches/faiss-1.10.0.patch @@ -0,0 +1,66 @@ +--- src/faiss-1.10.0/faiss/CMakeLists.txt 2025-02-01 05:52:00.000000000 +0800 ++++ src/faiss-1.10.0/faiss/CMakeLists.txt.new 2025-03-28 19:45:37.513624103 +0800 +@@ -381,19 +381,51 @@ + target_link_libraries(faiss_avx512 PRIVATE ${MKL_LIBRARIES}) + target_link_libraries(faiss_avx512_spr PRIVATE ${MKL_LIBRARIES}) + else() +- find_package(BLAS REQUIRED) +- target_link_libraries(faiss PRIVATE ${BLAS_LIBRARIES}) +- target_link_libraries(faiss_avx2 PRIVATE ${BLAS_LIBRARIES}) +- target_link_libraries(faiss_avx512 PRIVATE ${BLAS_LIBRARIES}) +- target_link_libraries(faiss_avx512_spr PRIVATE ${BLAS_LIBRARIES}) +- target_link_libraries(faiss_sve PRIVATE ${BLAS_LIBRARIES}) ++ # If not found through find_package, look in the DORIS_THIRD_LIB_INSTALL_DIR ++ if(DEFINED DORIS_THIRD_LIB_INSTALL_DIR) ++ set(OpenBLAS_ROOT ${DORIS_THIRD_LIB_INSTALL_DIR}) ++ ++ # Check if libopenblas exists in DORIS_THIRD_LIB_INSTALL_DIR ++ if(EXISTS "${DORIS_THIRD_LIB_INSTALL_DIR}/lib/libopenblas.a") ++ set(OpenBLAS_LIB "${DORIS_THIRD_LIB_INSTALL_DIR}/lib/libopenblas.a") ++ endif() ++ # Terminate if OpenBLAS_LIB is not found ++ if(NOT OpenBLAS_LIB) ++ message(WARNING "OpenBLAS not found in DORIS_THIRD_LIB_INSTALL_DIR: ${DORIS_THIRD_LIB_INSTALL_DIR}") ++ endif() + +- find_package(LAPACK REQUIRED) +- target_link_libraries(faiss PRIVATE ${LAPACK_LIBRARIES}) +- target_link_libraries(faiss_avx2 PRIVATE ${LAPACK_LIBRARIES}) +- target_link_libraries(faiss_avx512 PRIVATE ${LAPACK_LIBRARIES}) +- target_link_libraries(faiss_avx512_spr PRIVATE ${LAPACK_LIBRARIES}) +- target_link_libraries(faiss_sve PRIVATE ${LAPACK_LIBRARIES}) ++ if(OpenBLAS_LIB) ++ set(OpenBLAS_LIBRARIES ${OpenBLAS_LIB}) ++ set(OpenBLAS_FOUND TRUE) ++ message(STATUS "Found OpenBLAS in DORIS_THIRD_LIB_INSTALL_DIR: ${OpenBLAS_LIB}") ++ endif() ++ else() ++ message(WARNING "DORIS_THIRD_LIB_INSTALL_DIR is not defined. Please set it to the directory where OpenBLAS is installed.") ++ endif() ++ ++ if(OpenBLAS_FOUND) ++ message(STATUS "Using OpenBLAS: ${OpenBLAS_LIBRARIES}") ++ target_link_libraries(faiss PRIVATE ${OpenBLAS_LIBRARIES}) ++ target_link_libraries(faiss_avx2 PRIVATE ${OpenBLAS_LIBRARIES}) ++ target_link_libraries(faiss_avx512 PRIVATE ${OpenBLAS_LIBRARIES}) ++ target_link_libraries(faiss_avx512_spr PRIVATE ${OpenBLAS_LIBRARIES}) ++ target_link_libraries(faiss_sve PRIVATE ${OpenBLAS_LIBRARIES}) ++ else() ++ # Fall back to separate BLAS and LAPACK if OpenBLAS is not found ++ find_package(BLAS REQUIRED) ++ target_link_libraries(faiss PRIVATE ${BLAS_LIBRARIES}) ++ target_link_libraries(faiss_avx2 PRIVATE ${BLAS_LIBRARIES}) ++ target_link_libraries(faiss_avx512 PRIVATE ${BLAS_LIBRARIES}) ++ target_link_libraries(faiss_avx512_spr PRIVATE ${BLAS_LIBRARIES}) ++ target_link_libraries(faiss_sve PRIVATE ${BLAS_LIBRARIES}) ++ ++ find_package(LAPACK REQUIRED) ++ target_link_libraries(faiss PRIVATE ${LAPACK_LIBRARIES}) ++ target_link_libraries(faiss_avx2 PRIVATE ${LAPACK_LIBRARIES}) ++ target_link_libraries(faiss_avx512 PRIVATE ${LAPACK_LIBRARIES}) ++ target_link_libraries(faiss_avx512_spr PRIVATE ${LAPACK_LIBRARIES}) ++ target_link_libraries(faiss_sve PRIVATE ${LAPACK_LIBRARIES}) ++ endif() + endif() + + install(TARGETS faiss diff --git a/thirdparty/vars.sh b/thirdparty/vars.sh index 33d34782861..ce4d3c370fb 100644 --- a/thirdparty/vars.sh +++ b/thirdparty/vars.sh @@ -538,6 +538,19 @@ PUGIXML_NAME=pugixml-1.15.tar.gz PUGIXML_SOURCE=pugixml-1.15 PUGIXML_MD5SUM="3b894c29455eb33a40b165c6e2de5895" +# openblas +OPENBLAS_DOWNLOAD="https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.29/OpenBLAS-0.3.29.tar.gz" +OPENBLAS_NAME="OpenBLAS-0.3.29.tar.gz" +OPENBLAS_SOURCE="OpenBLAS-0.3.29" +OPENBLAS_MD5SUM="853a0c5c0747c5943e7ef4bbb793162d" + +# faiss +FAISS_DOWNLOAD="https://github.com/facebookresearch/faiss/archive/refs/tags/v1.10.0.tar.gz" +FAISS_NAME="faiss-1.10.0.tar.gz" +FAISS_SOURCE="faiss-1.10.0" +FAISS_MD5SUM="f31edf2492808b27cc963d0ab316a205" + + # all thirdparties which need to be downloaded is set in array TP_ARCHIVES export TP_ARCHIVES=( 'LIBEVENT' @@ -618,6 +631,8 @@ export TP_ARCHIVES=( 'ICU' 'JINDOFS' 'PUGIXML' + 'OPENBLAS' + 'FAISS' ) if [[ "$(uname -s)" == 'Darwin' ]]; then --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org