This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 8740049402c [third-party](faiss) Enable FAISS integration in Doris. 
(#49644)
8740049402c is described below

commit 8740049402c77282e8950fdc8f2144452f2003bc
Author: zhiqiang <hezhiqi...@selectdb.com>
AuthorDate: Tue Apr 1 09:01:46 2025 +0800

    [third-party](faiss) Enable FAISS integration in Doris. (#49644)
    
    ### What problem does this PR solve?
    
    Enable FAISS integration in Doris.
    
    Dependency of faiss is OpenMP, BLAS and LAPACK.
    
    OpenMP is distributed with gcc/llvm.
    OpenBLAS could supply BLAS & LAPACK impl, so we introduced OpenBLAS.
    
    If you are using ldb-toolchain, and version is before
    https://github.com/amosbird/ldb_toolchain_gen/releases/tag/v0.24, gcc
    should be used to compile openblas and faiss, since libopm.a is missing.
    
    Build new thirdparty:
    ```
    sh build-thirdparty.sh openblas
    sh build-thirdparty.sh faiss
    ```
    
    `export ENABLE_BUILD_FAISS=ON` to make doris link with faiss.
---
 be/CMakeLists.txt                     |  3 ++
 be/cmake/thirdparty.cmake             |  5 +++
 build.sh                              |  8 +++++
 thirdparty/build-thirdparty.sh        | 60 +++++++++++++++++++++++++++++++
 thirdparty/download-thirdparty.sh     | 13 +++++++
 thirdparty/patches/faiss-1.10.0.patch | 66 +++++++++++++++++++++++++++++++++++
 thirdparty/vars.sh                    | 15 ++++++++
 7 files changed, 170 insertions(+)

diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt
index a77c796b381..e7dc2961a4b 100644
--- a/be/CMakeLists.txt
+++ b/be/CMakeLists.txt
@@ -181,6 +181,9 @@ endif()
 
 set(GPERFTOOLS_HOME "${THIRDPARTY_DIR}/gperftools")
 
+option(BUILD_FAISS "Link doris with faiss for vector similarity search" OFF)
+message(STATUS "build faiss: ${BUILD_FAISS}")
+
 include (cmake/thirdparty.cmake)
 
 find_program(THRIFT_COMPILER thrift ${CMAKE_SOURCE_DIR}/bin)
diff --git a/be/cmake/thirdparty.cmake b/be/cmake/thirdparty.cmake
index a165c4ab203..1250e8ab1f5 100644
--- a/be/cmake/thirdparty.cmake
+++ b/be/cmake/thirdparty.cmake
@@ -175,3 +175,8 @@ endif()
 add_thirdparty(icuuc LIB64)
 add_thirdparty(icui18n LIB64)
 add_thirdparty(icudata LIB64)
+
+if (BUILD_FAISS)
+    add_thirdparty(openblas LIB64)
+    add_thirdparty(faiss LIB64)
+endif()
diff --git a/build.sh b/build.sh
index 3774803c533..3fbdc4df6c1 100755
--- a/build.sh
+++ b/build.sh
@@ -70,6 +70,7 @@ Usage: $0 <options>
     DISABLE_BE_JAVA_EXTENSIONS  If set DISABLE_BE_JAVA_EXTENSIONS=ON, we will 
do not build binary with java-udf,hudi-scanner,jdbc-scanner and so on Default 
is OFF.
     DISABLE_JAVA_CHECK_STYLE    If set DISABLE_JAVA_CHECK_STYLE=ON, it will 
skip style check of java code in FE.
     DISABLE_BUILD_AZURE         If set DISABLE_BUILD_AZURE=ON, it will not 
build azure into BE.
+    ENABLE_BUILD_FAISS          If set BUILD_FAISS=ON, it will link BE with 
faiss.
 
   Eg.
     $0                                      build all
@@ -173,6 +174,7 @@ PARAMETER_COUNT="$#"
 PARAMETER_FLAG=0
 DENABLE_CLANG_COVERAGE='OFF'
 BUILD_AZURE='ON'
+BUILD_FAISS='OFF'
 BUILD_UI=1
 if [[ "$#" == 1 ]]; then
     # default
@@ -472,6 +474,10 @@ if [[ -n "${DISABLE_BUILD_AZURE}" ]]; then
     BUILD_AZURE='OFF'
 fi
 
+if [[ -n "${ENABLE_BUILD_FAISS}" ]]; then
+    BUILD_FAISS='ON'
+fi
+
 if [[ -z "${ENABLE_INJECTION_POINT}" ]]; then
     ENABLE_INJECTION_POINT='OFF'
 fi
@@ -640,6 +646,7 @@ if [[ "${BUILD_BE}" -eq 1 ]]; then
         -DENABLE_CLANG_COVERAGE="${DENABLE_CLANG_COVERAGE}" \
         -DDORIS_JAVA_HOME="${JAVA_HOME}" \
         -DBUILD_AZURE="${BUILD_AZURE}" \
+        -DBUILD_FAISS="${BUILD_FAISS}" \
         "${DORIS_HOME}/be"
 
     if [[ "${OUTPUT_BE_BINARY}" -eq 1 ]]; then
@@ -681,6 +688,7 @@ if [[ "${BUILD_CLOUD}" -eq 1 ]]; then
         -DEXTRA_CXX_FLAGS="${EXTRA_CXX_FLAGS}" \
         -DBUILD_AZURE="${BUILD_AZURE}" \
         -DBUILD_CHECK_META="${BUILD_CHECK_META:-OFF}" \
+        -DBUILD_FAISS="${BUILD_FAISS}" \
         "${DORIS_HOME}/cloud/"
     "${BUILD_SYSTEM}" -j "${PARALLEL}"
     "${BUILD_SYSTEM}" install
diff --git a/thirdparty/build-thirdparty.sh b/thirdparty/build-thirdparty.sh
index b409349eec3..cdd5bec050c 100755
--- a/thirdparty/build-thirdparty.sh
+++ b/thirdparty/build-thirdparty.sh
@@ -1885,6 +1885,66 @@ build_pugixml() {
     cp "${TP_SOURCE_DIR}/${PUGIXML_SOURCE}/src/pugiconfig.hpp" 
"${TP_INSTALL_DIR}/include/"
 }
 
+build_openblas() {
+    check_if_source_exist "${OPENBLAS_SOURCE}"
+    cd "${TP_SOURCE_DIR}/${OPENBLAS_SOURCE}"
+
+    rm -rf "${BUILD_DIR}"
+    mkdir -p "${BUILD_DIR}"
+    cd "${BUILD_DIR}"
+    OPENBLAS_CMAKE_OPTIONS=(
+        "-DCMAKE_PREFIX_PATH=${TP_INSTALL_DIR}"
+        "-DCMAKE_INSTALL_PREFIX=${TP_INSTALL_DIR}"
+        "-DCMAKE_BUILD_TYPE=Release"
+        "-DBUILD_WITHOUT_LAPACK=OFF"
+        "-DNO_SHARED=TRUE"
+        "-DNO_AVX512=TRUE"
+        "-DC_LAPACK=TRUE"
+        "-DUSE_OPENMP=TRUE"
+        "-DBUILD_STATIC_LIBS=ON"
+        "-DNOFORTRAN=TRUE"
+        "-DBUILD_TESTING=OFF"
+        "-DBUILD_RELAPACK=ON"
+        "-DBUILD_BENCHMARKS=OFF"
+    )
+
+    echo "Building openblas at $(pwd) with cmake parameters: 
${OPENBLAS_CMAKE_OPTIONS[*]}"
+
+    "${CMAKE_CMD}" -G "${GENERATOR}" "${OPENBLAS_CMAKE_OPTIONS[@]}" ..
+    "${BUILD_SYSTEM}" -j "${PARALLEL}"
+    "${BUILD_SYSTEM}" install
+}
+
+build_faiss() {
+    check_if_source_exist "${FAISS_SOURCE}"
+    echo "Building faiss ${FAISS_SOURCE}"
+    cd "${TP_SOURCE_DIR}"
+    # if faiss dir not exists, create a symlink to faiss source dir
+    # this symlink is necessary since faiss source code must be compiled in a 
directory named faiss.
+    if [[ ! -d "${TP_SOURCE_DIR}/faiss" ]]; then
+        ln -s "${FAISS_SOURCE}" faiss
+    fi
+    cd "${TP_SOURCE_DIR}/faiss"
+
+    rm -rf "${BUILD_DIR}"
+    mkdir -p "${BUILD_DIR}"
+    cd "${BUILD_DIR}"
+
+    FAISS_CMAKE_OPTIONS=(
+        "-DDORIS_THIRD_LIB_INSTALL_DIR=${TP_INSTALL_DIR}"
+        "-DCMAKE_INSTALL_PREFIX=${TP_INSTALL_DIR}"
+        "-DCMAKE_BUILD_TYPE=Release"
+        "-DFAISS_ENABLE_GPU=OFF"
+        "-DFAISS_ENABLE_PYTHON=OFF"
+    )
+
+    echo "Building faiss at $(pwd) with cmake parameters: 
${FAISS_CMAKE_OPTIONS[*]}"
+
+    "${CMAKE_CMD}" -G "${GENERATOR}" "${FAISS_CMAKE_OPTIONS[@]}" ..
+    "${BUILD_SYSTEM}" -j "${PARALLEL}"
+    "${BUILD_SYSTEM}" install
+}
+
 if [[ "${#packages[@]}" -eq 0 ]]; then
     packages=(
         jindofs
diff --git a/thirdparty/download-thirdparty.sh 
b/thirdparty/download-thirdparty.sh
index b80048025f7..89c04f6fea9 100755
--- a/thirdparty/download-thirdparty.sh
+++ b/thirdparty/download-thirdparty.sh
@@ -590,5 +590,18 @@ if [[ " ${TP_ARCHIVES[*]} " =~ " THRIFT " ]]; then
     echo "Finished patching ${THRIFT_SOURCE}"
 fi
 
+# patch faiss cmake so that we can use openblas
+if [[ " ${TP_ARCHIVES[*]} " =~ " FAISS " ]]; then
+    if [[ "${FAISS_SOURCE}" = "faiss-1.10.0" ]]; then
+        cd "${TP_SOURCE_DIR}/${FAISS_SOURCE}"
+        if [[ ! -f "${PATCHED_MARK}" ]]; then
+            patch -p2 <"${TP_PATCH_DIR}/faiss-1.10.0.patch"
+            touch "${PATCHED_MARK}"
+        fi
+        cd -
+    fi
+    echo "Finished patching ${FAISS_SOURCE}"
+fi
+
 
 # vim: ts=4 sw=4 ts=4 tw=100:
diff --git a/thirdparty/patches/faiss-1.10.0.patch 
b/thirdparty/patches/faiss-1.10.0.patch
new file mode 100644
index 00000000000..8279f4d71a1
--- /dev/null
+++ b/thirdparty/patches/faiss-1.10.0.patch
@@ -0,0 +1,66 @@
+--- src/faiss-1.10.0/faiss/CMakeLists.txt      2025-02-01 05:52:00.000000000 
+0800
++++ src/faiss-1.10.0/faiss/CMakeLists.txt.new  2025-03-28 19:45:37.513624103 
+0800
+@@ -381,19 +381,51 @@
+   target_link_libraries(faiss_avx512 PRIVATE ${MKL_LIBRARIES})
+   target_link_libraries(faiss_avx512_spr PRIVATE ${MKL_LIBRARIES})
+ else()
+-  find_package(BLAS REQUIRED)
+-  target_link_libraries(faiss PRIVATE ${BLAS_LIBRARIES})
+-  target_link_libraries(faiss_avx2 PRIVATE ${BLAS_LIBRARIES})
+-  target_link_libraries(faiss_avx512 PRIVATE ${BLAS_LIBRARIES})
+-  target_link_libraries(faiss_avx512_spr PRIVATE ${BLAS_LIBRARIES})
+-  target_link_libraries(faiss_sve PRIVATE ${BLAS_LIBRARIES})
++  # If not found through find_package, look in the DORIS_THIRD_LIB_INSTALL_DIR
++  if(DEFINED DORIS_THIRD_LIB_INSTALL_DIR)
++    set(OpenBLAS_ROOT ${DORIS_THIRD_LIB_INSTALL_DIR})
++    
++    # Check if libopenblas exists in DORIS_THIRD_LIB_INSTALL_DIR
++    if(EXISTS "${DORIS_THIRD_LIB_INSTALL_DIR}/lib/libopenblas.a")
++      set(OpenBLAS_LIB "${DORIS_THIRD_LIB_INSTALL_DIR}/lib/libopenblas.a")
++    endif()
++    # Terminate if OpenBLAS_LIB is not found
++    if(NOT OpenBLAS_LIB)
++      message(WARNING "OpenBLAS not found in DORIS_THIRD_LIB_INSTALL_DIR: 
${DORIS_THIRD_LIB_INSTALL_DIR}")
++    endif() 
+ 
+-  find_package(LAPACK REQUIRED)
+-  target_link_libraries(faiss PRIVATE ${LAPACK_LIBRARIES})
+-  target_link_libraries(faiss_avx2 PRIVATE ${LAPACK_LIBRARIES})
+-  target_link_libraries(faiss_avx512 PRIVATE ${LAPACK_LIBRARIES})
+-  target_link_libraries(faiss_avx512_spr PRIVATE ${LAPACK_LIBRARIES})
+-  target_link_libraries(faiss_sve PRIVATE ${LAPACK_LIBRARIES})
++    if(OpenBLAS_LIB)
++      set(OpenBLAS_LIBRARIES ${OpenBLAS_LIB})
++      set(OpenBLAS_FOUND TRUE)
++      message(STATUS "Found OpenBLAS in DORIS_THIRD_LIB_INSTALL_DIR: 
${OpenBLAS_LIB}")
++    endif()
++  else()
++    message(WARNING "DORIS_THIRD_LIB_INSTALL_DIR is not defined. Please set 
it to the directory where OpenBLAS is installed.")
++  endif()
++  
++  if(OpenBLAS_FOUND)
++    message(STATUS "Using OpenBLAS: ${OpenBLAS_LIBRARIES}")
++    target_link_libraries(faiss PRIVATE ${OpenBLAS_LIBRARIES})
++    target_link_libraries(faiss_avx2 PRIVATE ${OpenBLAS_LIBRARIES})
++    target_link_libraries(faiss_avx512 PRIVATE ${OpenBLAS_LIBRARIES})
++    target_link_libraries(faiss_avx512_spr PRIVATE ${OpenBLAS_LIBRARIES})
++    target_link_libraries(faiss_sve PRIVATE ${OpenBLAS_LIBRARIES})
++  else()
++    # Fall back to separate BLAS and LAPACK if OpenBLAS is not found
++    find_package(BLAS REQUIRED)
++    target_link_libraries(faiss PRIVATE ${BLAS_LIBRARIES})
++    target_link_libraries(faiss_avx2 PRIVATE ${BLAS_LIBRARIES})
++    target_link_libraries(faiss_avx512 PRIVATE ${BLAS_LIBRARIES})
++    target_link_libraries(faiss_avx512_spr PRIVATE ${BLAS_LIBRARIES})
++    target_link_libraries(faiss_sve PRIVATE ${BLAS_LIBRARIES})
++
++    find_package(LAPACK REQUIRED)
++    target_link_libraries(faiss PRIVATE ${LAPACK_LIBRARIES})
++    target_link_libraries(faiss_avx2 PRIVATE ${LAPACK_LIBRARIES})
++    target_link_libraries(faiss_avx512 PRIVATE ${LAPACK_LIBRARIES})
++    target_link_libraries(faiss_avx512_spr PRIVATE ${LAPACK_LIBRARIES})
++    target_link_libraries(faiss_sve PRIVATE ${LAPACK_LIBRARIES})
++  endif()
+ endif()
+ 
+ install(TARGETS faiss
diff --git a/thirdparty/vars.sh b/thirdparty/vars.sh
index 33d34782861..ce4d3c370fb 100644
--- a/thirdparty/vars.sh
+++ b/thirdparty/vars.sh
@@ -538,6 +538,19 @@ PUGIXML_NAME=pugixml-1.15.tar.gz
 PUGIXML_SOURCE=pugixml-1.15
 PUGIXML_MD5SUM="3b894c29455eb33a40b165c6e2de5895"
 
+# openblas
+OPENBLAS_DOWNLOAD="https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.29/OpenBLAS-0.3.29.tar.gz";
+OPENBLAS_NAME="OpenBLAS-0.3.29.tar.gz"
+OPENBLAS_SOURCE="OpenBLAS-0.3.29"
+OPENBLAS_MD5SUM="853a0c5c0747c5943e7ef4bbb793162d"
+
+# faiss
+FAISS_DOWNLOAD="https://github.com/facebookresearch/faiss/archive/refs/tags/v1.10.0.tar.gz";
+FAISS_NAME="faiss-1.10.0.tar.gz"
+FAISS_SOURCE="faiss-1.10.0"
+FAISS_MD5SUM="f31edf2492808b27cc963d0ab316a205"
+
+
 # all thirdparties which need to be downloaded is set in array TP_ARCHIVES
 export TP_ARCHIVES=(
     'LIBEVENT'
@@ -618,6 +631,8 @@ export TP_ARCHIVES=(
     'ICU'
     'JINDOFS'
     'PUGIXML'
+    'OPENBLAS'
+    'FAISS'
 )
 
 if [[ "$(uname -s)" == 'Darwin' ]]; then


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to