This is an automated email from the ASF dual-hosted git repository. jiafengzheng pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris-website.git
The following commit(s) were added to refs/heads/master by this push: new 4ee502fd92c Revert "Revert "Change static files to CDN (#64) 4ee502fd92c is described below commit 4ee502fd92cd6d454e993dc27eeeb82caabd6c4f Author: Jeffrey <color.d...@gmail.com> AuthorDate: Wed Aug 24 14:40:40 2022 +0800 Revert "Revert "Change static files to CDN (#64) * [CDN] Use actions-gh-pages deploy website --- config/custom-docusaurus-plugin.js | 18 ++ config/ssrTemplate.js | 33 +++ docusaurus.config.js | 33 +-- package.json | 2 +- src/scss/components/search.scss | 18 +- src/theme/LoadingRing/LoadingRing.module.css | 47 ++++ src/theme/LoadingRing/LoadingRing.tsx | 19 ++ src/theme/SearchBar/EmptyTemplate.js | 12 + src/theme/SearchBar/SearchBar.jsx | 256 ++++++++++++++++++ src/theme/SearchBar/SearchBar.module.css | 256 ++++++++++++++++++ src/theme/SearchBar/SuggestionTemplate.js | 49 ++++ src/theme/SearchBar/fetchIndexes.js | 30 +++ src/theme/SearchBar/icons.js | 7 + src/theme/SearchBar/index.js | 3 + src/utils/SearchSourceFactory.spec.ts | 100 ++++++++ src/utils/SearchSourceFactory.ts | 83 ++++++ src/utils/__mocks__/proxiedGenerated.ts | 21 ++ src/utils/concatDocumentPath.ts | 3 + src/utils/cutZhWords.spec.ts | 42 +++ src/utils/cutZhWords.ts | 89 +++++++ src/utils/escapeHtml.ts | 15 ++ src/utils/getStemmedPositions.spec.ts | 41 +++ src/utils/getStemmedPositions.ts | 17 ++ src/utils/highlight.spec.ts | 31 +++ src/utils/highlight.ts | 43 ++++ src/utils/highlightStemmed.spec.ts | 165 ++++++++++++ src/utils/highlightStemmed.ts | 124 +++++++++ src/utils/looseTokenize.spec.ts | 9 + src/utils/looseTokenize.ts | 22 ++ src/utils/processTreeStatusOfSearchResults.spec.ts | 83 ++++++ src/utils/processTreeStatusOfSearchResults.ts | 19 ++ src/utils/proxiedGenerated.ts | 2 + src/utils/smartQueries.spec.ts | 285 +++++++++++++++++++++ src/utils/smartQueries.ts | 131 ++++++++++ src/utils/smartTerms.spec.ts | 35 +++ src/utils/smartTerms.ts | 42 +++ src/utils/sortSearchResults.spec.ts | 73 ++++++ src/utils/sortSearchResults.ts | 40 +++ src/utils/tokenize.spec.ts | 40 +++ src/utils/tokenize.ts | 32 +++ 40 files changed, 2347 insertions(+), 23 deletions(-) diff --git a/config/custom-docusaurus-plugin.js b/config/custom-docusaurus-plugin.js new file mode 100644 index 00000000000..5ae9f47440e --- /dev/null +++ b/config/custom-docusaurus-plugin.js @@ -0,0 +1,18 @@ +const path = require('path'); + +module.exports = function (context, options) { + return { + name: 'custom-docusaurus-plugin', + configureWebpack(config, isServer, utils) { + return { + output: { + ...config.output, + publicPath: + context.i18n.currentLocale === 'en' + ? 'https://cdn.selectdb.com/' + : 'https://cdn.selectdb.com/zh-CN/', + }, + }; + }, + }; +}; diff --git a/config/ssrTemplate.js b/config/ssrTemplate.js new file mode 100644 index 00000000000..e2c84109b46 --- /dev/null +++ b/config/ssrTemplate.js @@ -0,0 +1,33 @@ +module.exports = { + ssrTemplate: `<!DOCTYPE html> +<html <%~ it.htmlAttributes %>> + <head> + <meta charset="UTF-8"> + <meta name="viewport" content="width=device-width, initial-scale=1.0, minimum-scale=1.0, maximum-scale=1.0, user-scalable=no"> + <meta name="generator" content="Docusaurus v<%= it.version %>"> + <% if (it.noIndex) { %> + <meta name="robots" content="noindex, nofollow" /> + <% } %> + <%~ it.headTags %> + <% it.metaAttributes.forEach((metaAttribute) => { %> + <%~ metaAttribute %> + <% }); %> + <% it.stylesheets.forEach((stylesheet) => { %> + <link rel="stylesheet" href="<%= 'https://cdn.selectdb.com' %><%= it.baseUrl %><%= stylesheet %>" /> + <% }); %> + <% it.scripts.forEach((script) => { %> + <link rel="preload" href="<%= 'https://cdn.selectdb.com' %><%= it.baseUrl %><%= script %>" as="script"> + <% }); %> + </head> + <body <%~ it.bodyAttributes %>> + <%~ it.preBodyTags %> + <div id="__docusaurus"> + <%~ it.appHtml %> + </div> + <% it.scripts.forEach((script) => { %> + <script src="<%= 'https://cdn.selectdb.com' %><%= it.baseUrl %><%= script %>"></script> + <% }); %> + <%~ it.postBodyTags %> + </body> +</html>`, +}; diff --git a/docusaurus.config.js b/docusaurus.config.js index 177fa5aba35..f8c58a6d5c2 100644 --- a/docusaurus.config.js +++ b/docusaurus.config.js @@ -4,6 +4,8 @@ const versions = require('./versions.json'); const lightCodeTheme = require('prism-react-renderer/themes/github'); const showAllVersions = true; +const { ssrTemplate } = require('./config/ssrTemplate'); +const customDocusaurusPlugin = require('./config/custom-docusaurus-plugin'); /** @type {import('@docusaurus/types').Config} */ const config = { @@ -46,6 +48,7 @@ const config = { sidebarPath: require.resolve('./sidebarsCommunity.json'), }), ], + process.env.NODE_ENV === 'development' ? null : customDocusaurusPlugin, [ '@docusaurus/plugin-pwa', { @@ -156,7 +159,6 @@ const config = { highlightSearchTermsOnTargetPage: true, // indexPages: true, indexDocs: true, - docsDir: ['docs', 'community'], indexBlog: false, explicitSearchResultPath: true, }, @@ -169,7 +171,7 @@ const config = { title: '', logo: { alt: 'Doris', - src: 'images/logo.svg', + src: 'https://cdn.selectdb.com/images/logo.svg', }, items: [ { to: '/', label: 'Home', position: 'left', exact: true }, @@ -197,12 +199,12 @@ const config = { type: 'localeDropdown', position: 'right', }, - // { - // href: "https://github.com/apache/doris", - // className: "header-right-button-github", - // position: "right", - // label: "GitHub", - // }, + // { + // href: 'https://github.com/apache/doris', + // className: 'header-right-button-github', + // position: 'right', + // label: 'GitHub', + // }, { href: '/download', className: 'header-right-button-primary navbar-download-mobile', @@ -274,14 +276,15 @@ const config = { colorMode: { disableSwitch: true, }, - metadata: [ - { - name: 'viewport', - content: - 'width=device-width, initial-scale=1.0, minimum-scale=1.0, maximum-scale=1.0, user-scalable=no', - }, - ], + // metadata: [ + // { + // name: 'viewport', + // content: + // 'width=device-width, initial-scale=1.0, minimum-scale=1.0, maximum-scale=1.0, user-scalable=no', + // }, + // ], }), + ssrTemplate, }; module.exports = config; diff --git a/package.json b/package.json index d27458a2840..5d95b1ee807 100644 --- a/package.json +++ b/package.json @@ -20,7 +20,7 @@ "@docusaurus/core": "2.0.0-beta.21", "@docusaurus/plugin-pwa": "2.0.0-beta.21", "@docusaurus/preset-classic": "2.0.0-beta.21", - "@easyops-cn/docusaurus-search-local": "^0.28.0", + "@easyops-cn/docusaurus-search-local": "^0.30.2", "@mdx-js/react": "^1.6.22", "clsx": "^1.1.1", "docusaurus-plugin-sass": "^0.2.2", diff --git a/src/scss/components/search.scss b/src/scss/components/search.scss index 3aa94b58322..401fc36fe21 100644 --- a/src/scss/components/search.scss +++ b/src/scss/components/search.scss @@ -1,7 +1,7 @@ -.dropdownMenu_qbY6 { +.dropdownMenu_jUzS { padding: 0; - .suggestion_fB_2 { + .suggestion_HjS8 { padding: 0 1.5rem; &.cursor_eG29 { @@ -12,20 +12,20 @@ } } - .hitIcon_a7Zy { + .hitIcon_fVnR { display: none; } - .hitTitle_vyVt { + .hitTitle_LImS { font-size: var(--global-font-size-medium); } - .hitPath_ieM4 { + .hitPath_zaD7 { font-size: var(--global-font-size-small); color: rgba(35, 45, 62, 0.7); } - .hitAction_NqkB { + .hitAction__La6 { svg { display: none; @@ -42,9 +42,13 @@ } } - .hitFooter_E9YW a { + .hitFooter_QvWT a { text-decoration: none; color: rgba(35, 45, 62, 0.6); + + &:hover { + color: var(--ifm-color-primary); + } } } diff --git a/src/theme/LoadingRing/LoadingRing.module.css b/src/theme/LoadingRing/LoadingRing.module.css new file mode 100644 index 00000000000..2e569824e25 --- /dev/null +++ b/src/theme/LoadingRing/LoadingRing.module.css @@ -0,0 +1,47 @@ +/* https://loading.io/css/ */ +.loadingRing { + display: inline-block; + position: relative; + width: 20px; + height: 20px; + opacity: var(--search-local-loading-icon-opacity, 0.5); +} + +.loadingRing div { + box-sizing: border-box; + display: block; + position: absolute; + width: 16px; + height: 16px; + margin: 2px; + border: 2px solid + var(--search-load-loading-icon-color, var(--ifm-navbar-search-input-color)); + border-radius: 50%; + animation: loading-ring 1.2s cubic-bezier(0.5, 0, 0.5, 1) infinite; + border-color: var( + --search-load-loading-icon-color, + var(--ifm-navbar-search-input-color) + ) + transparent transparent transparent; +} + +.loadingRing div:nth-child(1) { + animation-delay: -0.45s; +} + +.loadingRing div:nth-child(2) { + animation-delay: -0.3s; +} + +.loadingRing div:nth-child(3) { + animation-delay: -0.15s; +} + +@keyframes loading-ring { + 0% { + transform: rotate(0deg); + } + 100% { + transform: rotate(360deg); + } +} diff --git a/src/theme/LoadingRing/LoadingRing.tsx b/src/theme/LoadingRing/LoadingRing.tsx new file mode 100644 index 00000000000..166a728a12e --- /dev/null +++ b/src/theme/LoadingRing/LoadingRing.tsx @@ -0,0 +1,19 @@ +// istanbul ignore file +import React from "react"; +import clsx from "clsx"; +import styles from "./LoadingRing.module.css"; + +export default function LoadingRing({ + className, +}: { + className?: string; +}): React.ReactElement { + return ( + <div className={clsx(styles.loadingRing, className)}> + <div></div> + <div></div> + <div></div> + <div></div> + </div> + ); +} diff --git a/src/theme/SearchBar/EmptyTemplate.js b/src/theme/SearchBar/EmptyTemplate.js new file mode 100644 index 00000000000..0c67ba65f42 --- /dev/null +++ b/src/theme/SearchBar/EmptyTemplate.js @@ -0,0 +1,12 @@ +import { translate } from "@docusaurus/Translate"; +import { iconNoResults } from "./icons"; +import styles from "./SearchBar.module.css"; +export function EmptyTemplate() { + if (process.env.NODE_ENV === "production") { + return `<span class="${styles.noResults}"><span class="${styles.noResultsIcon}">${iconNoResults}</span><span>${translate({ + id: "theme.SearchBar.noResultsText", + message: "No results", + })}</span></span>`; + } + return `<span class="${styles.noResults}">⚠️ The search index is only available when you run docusaurus build!</span>`; +} diff --git a/src/theme/SearchBar/SearchBar.jsx b/src/theme/SearchBar/SearchBar.jsx new file mode 100644 index 00000000000..dcef94e6309 --- /dev/null +++ b/src/theme/SearchBar/SearchBar.jsx @@ -0,0 +1,256 @@ +import React, { useCallback, useEffect, useRef, useState, } from "react"; +import clsx from "clsx"; +import useDocusaurusContext from "@docusaurus/useDocusaurusContext"; +import ExecutionEnvironment from "@docusaurus/ExecutionEnvironment"; +import { useHistory, useLocation } from "@docusaurus/router"; +import { translate } from "@docusaurus/Translate"; +import { ReactContextError, useDocsPreferredVersion, } from "@docusaurus/theme-common"; +import { useActivePlugin } from "@docusaurus/plugin-content-docs/client"; +import { fetchIndexes } from "./fetchIndexes"; +import { SearchSourceFactory } from "../../utils/SearchSourceFactory"; +import { SuggestionTemplate } from "./SuggestionTemplate"; +import { EmptyTemplate } from "./EmptyTemplate"; +import { searchResultLimits, Mark, searchBarShortcut, searchBarShortcutHint, docsPluginIdForPreferredVersion, indexDocs, } from "../../utils/proxiedGenerated"; +import LoadingRing from "../LoadingRing/LoadingRing"; +import styles from "./SearchBar.module.css"; +async function fetchAutoCompleteJS() { + const autoCompleteModule = await import("@easyops-cn/autocomplete.js"); + const autoComplete = autoCompleteModule.default; + if (autoComplete.noConflict) { + // For webpack v5 since docusaurus v2.0.0-alpha.75 + autoComplete.noConflict(); + } + else if (autoCompleteModule.noConflict) { + // For webpack v4 before docusaurus v2.0.0-alpha.74 + autoCompleteModule.noConflict(); + } + return autoComplete; +} +const SEARCH_PARAM_HIGHLIGHT = "_highlight"; +export default function SearchBar({ handleSearchBarToggle, }) { + const { siteConfig: { baseUrl }, } = useDocusaurusContext(); + // It returns undefined for non-docs pages + const activePlugin = useActivePlugin(); + let versionUrl = baseUrl; + // For non-docs pages while using plugin-content-docs with custom ids, + // this will throw an error of: + // > Docusaurus plugin global data not found for "docusaurus-plugin-content-docs" plugin with id "default". + // It seems that we can not get the correct id for non-docs pages. + try { + // The try-catch is a hack because useDocsPreferredVersion just throws an + // exception when versions are not used. + // The same hack is used in SearchPage.tsx + // eslint-disable-next-line react-hooks/rules-of-hooks + const { preferredVersion } = useDocsPreferredVersion(activePlugin?.pluginId ?? docsPluginIdForPreferredVersion); + if (preferredVersion && !preferredVersion.isLast) { + versionUrl = preferredVersion.path + "/"; + } + } + catch (e) { + if (indexDocs) { + if (e instanceof ReactContextError) { + /* ignore, happens when website doesn't use versions */ + } + else { + throw e; + } + } + } + const history = useHistory(); + const location = useLocation(); + const searchBarRef = useRef(null); + const indexState = useRef("empty"); // empty, loaded, done + // Should the input be focused after the index is loaded? + const focusAfterIndexLoaded = useRef(false); + const [loading, setLoading] = useState(false); + const [inputChanged, setInputChanged] = useState(false); + const [inputValue, setInputValue] = useState(""); + const search = useRef(null); + const loadIndex = useCallback(async () => { + if (indexState.current !== "empty") { + // Do not load the index (again) if its already loaded or in the process of being loaded. + return; + } + indexState.current = "loading"; + setLoading(true); + const [{ wrappedIndexes, zhDictionary }, autoComplete] = await Promise.all([ + fetchIndexes(versionUrl), + fetchAutoCompleteJS(), + ]); + search.current = autoComplete(searchBarRef.current, { + hint: false, + autoselect: true, + openOnFocus: true, + cssClasses: { + root: styles.searchBar, + noPrefix: true, + dropdownMenu: styles.dropdownMenu, + input: styles.input, + hint: styles.hint, + suggestions: styles.suggestions, + suggestion: styles.suggestion, + cursor: styles.cursor, + dataset: styles.dataset, + empty: styles.empty, + }, + }, [ + { + source: SearchSourceFactory(wrappedIndexes, zhDictionary, searchResultLimits), + templates: { + suggestion: SuggestionTemplate, + empty: EmptyTemplate, + footer: ({ query, isEmpty }) => { + if (isEmpty) { + return; + } + const a = document.createElement("a"); + const url = `${baseUrl}search?q=${encodeURIComponent(query)}`; + a.href = url; + a.textContent = translate({ + id: "theme.SearchBar.seeAll", + message: "See all results", + }); + a.addEventListener("click", (e) => { + if (!e.ctrlKey && !e.metaKey) { + e.preventDefault(); + search.current.autocomplete.close(); + history.push(url); + } + }); + const div = document.createElement("div"); + div.className = styles.hitFooter; + div.appendChild(a); + return div; + }, + }, + }, + ]) + .on("autocomplete:selected", function (event, { document: { u, h }, tokens }) { + searchBarRef.current?.blur(); + let url = u; + if (Mark && tokens.length > 0) { + const params = new URLSearchParams(); + for (const token of tokens) { + params.append(SEARCH_PARAM_HIGHLIGHT, token); + } + url += `?${params.toString()}`; + } + if (h) { + url += h; + } + history.push(url); + }) + .on("autocomplete:closed", () => { + searchBarRef.current?.blur(); + }); + indexState.current = "done"; + setLoading(false); + if (focusAfterIndexLoaded.current) { + const input = searchBarRef.current; + if (input.value) { + search.current.autocomplete.open(); + } + input.focus(); + } + }, [baseUrl, versionUrl, history]); + useEffect(() => { + if (!Mark) { + return; + } + const keywords = ExecutionEnvironment.canUseDOM + ? new URLSearchParams(location.search).getAll(SEARCH_PARAM_HIGHLIGHT) + : []; + // A workaround to fix an issue of highlighting in code blocks. + // See https://github.com/easyops-cn/docusaurus-search-local/issues/92 + // Code blocks will be re-rendered after this `useEffect` ran. + // So we make the marking run after a macro task. + setTimeout(() => { + const root = document.querySelector("article"); + if (!root) { + return; + } + const mark = new Mark(root); + mark.unmark(); + if (keywords.length !== 0) { + mark.mark(keywords); + } + // Apply any keywords to the search input so that we can clear marks in case we loaded a page with a highlight in the url + setInputValue(keywords.join(" ")); + search.current?.autocomplete.setVal(keywords.join(" ")); + }); + }, [location.search, location.pathname]); + const [focused, setFocused] = useState(false); + const onInputFocus = useCallback(() => { + focusAfterIndexLoaded.current = true; + loadIndex(); + setFocused(true); + handleSearchBarToggle?.(true); + }, [handleSearchBarToggle, loadIndex]); + const onInputBlur = useCallback(() => { + setFocused(false); + handleSearchBarToggle?.(false); + }, [handleSearchBarToggle]); + const onInputMouseEnter = useCallback(() => { + loadIndex(); + }, [loadIndex]); + const onInputChange = useCallback((event) => { + setInputValue(event.target.value); + if (event.target.value) { + setInputChanged(true); + } + }, []); + // Implement hint icons for the search shortcuts on mac and the rest operating systems. + const isMac = ExecutionEnvironment.canUseDOM + ? /mac/i.test(navigator.userAgentData?.platform ?? navigator.platform) + : false; + useEffect(() => { + if (!searchBarShortcut) { + return; + } + // Add shortcuts command/ctrl + K + const handleShortcut = (event) => { + if ((isMac ? event.metaKey : event.ctrlKey) && event.code === "KeyK") { + event.preventDefault(); + searchBarRef.current?.focus(); + onInputFocus(); + } + }; + document.addEventListener("keydown", handleShortcut); + return () => { + document.removeEventListener("keydown", handleShortcut); + }; + }, [isMac, onInputFocus]); + const onClearSearch = useCallback(() => { + const params = new URLSearchParams(location.search); + params.delete(SEARCH_PARAM_HIGHLIGHT); + const paramsStr = params.toString(); + const searchUrl = location.pathname + + (paramsStr != "" ? `?${paramsStr}` : "") + + location.hash; + if (searchUrl != location.pathname + location.search + location.hash) { + history.push(searchUrl); + } + // We always clear these here because in case no match was selected the above history push wont happen + setInputValue(""); + search.current?.autocomplete.setVal(""); + }, [location.pathname, location.search, location.hash, history]); + return (<div className={clsx("navbar__search", styles.searchBarContainer, { + [styles.searchIndexLoading]: loading && inputChanged, + [styles.focused]: focused, + })}> + <input placeholder={translate({ + id: "theme.SearchBar.label", + message: "Search", + description: "The ARIA label and placeholder for search button", + })} aria-label="Search" className="navbar__search-input" onMouseEnter={onInputMouseEnter} onFocus={onInputFocus} onBlur={onInputBlur} onChange={onInputChange} ref={searchBarRef} value={inputValue}/> + <LoadingRing className={styles.searchBarLoadingRing}/> + {searchBarShortcut && + searchBarShortcutHint && + (inputValue !== "" ? (<button className={styles.searchClearButton} onClick={onClearSearch}> + ✕ + </button>) : (<div className={styles.searchHintContainer}> + <kbd className={styles.searchHint}>{isMac ? "⌘" : "ctrl"}</kbd> + <kbd className={styles.searchHint}>K</kbd> + </div>))} + </div>); +} diff --git a/src/theme/SearchBar/SearchBar.module.css b/src/theme/SearchBar/SearchBar.module.css new file mode 100644 index 00000000000..1777c864ed6 --- /dev/null +++ b/src/theme/SearchBar/SearchBar.module.css @@ -0,0 +1,256 @@ +.searchBar .dropdownMenu { + left: auto !important; + right: 0 !important; + + background: var(--search-local-modal-background, #f5f6f7); + border-radius: 6px; + box-shadow: var( + --search-local-modal-shadow, + inset 1px 1px 0 0 hsla(0, 0%, 100%, 0.5), + 0 3px 8px 0 #555a64 + ); + margin-top: 8px; + width: var(--search-local-modal-width, 560px); + position: relative; + + padding: var(--search-local-spacing, 12px); +} + +@media (max-width: 576px) { + :global(.navbar__search-input):not(:focus) { + width: 2rem; + } + + .searchBar .dropdownMenu { + width: var(--search-local-modal-width-sm, 340px); + max-width: calc(100vw - var(--ifm-navbar-padding-horizontal) * 2); + } +} + +html[data-theme="dark"] .searchBar .dropdownMenu { + background: var(--search-local-modal-background, var(--ifm-background-color)); + box-shadow: var( + --search-local-modal-shadow, + inset 1px 1px 0 0 #2c2e40, + 0 3px 8px 0 #000309 + ); +} + +.searchBar .dropdownMenu .suggestion { + cursor: pointer; + background: var(--search-local-hit-background, #fff); + border-radius: 4px; + box-shadow: var(--search-local-hit-shadow, 0 1px 3px 0 #d4d9e1); + padding: 0 var(--search-local-spacing, 12px); + width: 100%; + + align-items: center; + color: var(--search-local-hit-color, #444950); + display: flex; + flex-direction: row; + height: var(--search-local-hit-height, 56px); +} + +html[data-theme="dark"] .dropdownMenu .suggestion { + background: var(--search-local-hit-background, var(--ifm-color-emphasis-100)); + box-shadow: var(--search-local-hit-shadow, none); + color: var(--search-local-hit-color, var(--ifm-font-color-base)); +} + +.searchBar .dropdownMenu .suggestion:not(:last-child) { + margin-bottom: 4px; +} + +.searchBar .dropdownMenu .suggestion.cursor { + background-color: var( + --search-local-highlight-color, + var(--ifm-color-primary) + ); +} + +.hitTree, +.hitIcon, +.hitPath, +.noResultsIcon, +.hitFooter a { + color: var(--search-local-muted-color, #969faf); +} + +html[data-theme="dark"] .hitTree, +html[data-theme="dark"] .hitIcon, +html[data-theme="dark"] .hitPath, +html[data-theme="dark"] .noResultsIcon { + color: var(--search-local-muted-color, var(--ifm-color-secondary-darkest)); +} + +.hitTree { + display: flex; + align-items: center; +} + +.hitTree > svg { + height: var(--search-local-hit-height, 56px); + opacity: 0.5; + stroke-width: var(--search-local-icon-stroke-width, 1.4); + width: 24px; +} + +.hitIcon { + stroke-width: var(--search-local-icon-stroke-width, 1.4); + + height: 20px; + width: 20px; +} + +.hitWrapper { + flex: 1 1 auto; + display: flex; + flex-direction: column; + font-weight: 500; + justify-content: center; + margin: 0 8px; + overflow-x: hidden; + width: 80%; +} + +.hitWrapper mark { + background: none; + color: var(--search-local-highlight-color, var(--ifm-color-primary)); +} + +.hitTitle { + font-size: 0.9em; +} + +.hitPath { + font-size: 0.75em; +} + +.hitPath, +.hitTitle { + white-space: nowrap; + overflow-x: hidden; + text-overflow: ellipsis; +} + +.hitAction { + height: 20px; + width: 20px; +} + +.hideAction > svg { + display: none; +} + +.noResults { + display: flex; + flex-direction: column; + align-items: center; + justify-content: center; + padding: var(--search-local-spacing, 12px) 0; +} + +.noResultsIcon { + margin-bottom: var(--search-local-spacing, 12px); +} + +.hitFooter { + text-align: center; + margin-top: var(--search-local-spacing, 12px); + font-size: 0.85em; +} + +.hitFooter a { + text-decoration: underline; +} + +.cursor .hideAction > svg { + display: block; +} + +.suggestion.cursor, +.suggestion.cursor mark, +.suggestion.cursor .hitTree, +.suggestion.cursor .hitIcon, +.suggestion.cursor .hitPath { + color: var( + --search-local-hit-active-color, + var(--ifm-color-white) + ) !important; +} + +.suggestion.cursor mark { + text-decoration: underline; +} + +.searchBarContainer { + margin-left: 16px; +} + +.searchBarContainer .searchBarLoadingRing { + display: none; + position: absolute; + left: 10px; + top: 6px; +} + +.searchBarContainer .searchClearButton { + position: absolute; + right: 0.8rem; + top: 50%; + transform: translate(0, -50%); + padding: 0; + background: none; + border: none; + line-height: 1rem; +} + +:global(.navbar__search) { + position: relative; +} + +.searchIndexLoading :global(.navbar__search-input) { + background-image: none; +} + +.searchBarContainer.searchIndexLoading .searchBarLoadingRing { + display: inline-block; +} + +.searchHintContainer { + position: absolute; + right: 10px; + top: 0px; + display: flex; + align-items: center; + justify-content: center; + height: 100%; + pointer-events: none; + gap: 4px; +} + +.searchHint { + color: var(--ifm-navbar-search-input-placeholder-color); + background-color: var(--ifm-navbar-search-input-background-color); + border: 1px solid var(--ifm-color-emphasis-500); + box-shadow: inset 0 -1px 0 var(--ifm-color-emphasis-500); +} + +@media (max-width: 576px) { + .searchBarContainer:not(.focused) .searchClearButton, + .searchHintContainer { + display: none; + } +} + +.input { +} +.hint { +} +.suggestions { +} +.dataset { +} +.empty { +} +/**/ diff --git a/src/theme/SearchBar/SuggestionTemplate.js b/src/theme/SearchBar/SuggestionTemplate.js new file mode 100644 index 00000000000..d11f8c76357 --- /dev/null +++ b/src/theme/SearchBar/SuggestionTemplate.js @@ -0,0 +1,49 @@ +import { concatDocumentPath } from "../../utils/concatDocumentPath"; +import { getStemmedPositions } from "../../utils/getStemmedPositions"; +import { highlight } from "../../utils/highlight"; +import { highlightStemmed } from "../../utils/highlightStemmed"; +import { explicitSearchResultPath } from "../../utils/proxiedGenerated"; +import { iconAction, iconContent, iconHeading, iconTitle, iconTreeInter, iconTreeLast, } from "./icons"; +import styles from "./SearchBar.module.css"; +export function SuggestionTemplate({ document, type, page, metadata, tokens, isInterOfTree, isLastOfTree, }) { + const isTitle = type === 0; + const isHeading = type === 1; + const tree = []; + if (isInterOfTree) { + tree.push(iconTreeInter); + } + else if (isLastOfTree) { + tree.push(iconTreeLast); + } + const treeWrapper = tree.map((item) => `<span class="${styles.hitTree}">${item}</span>`); + const icon = `<span class="${styles.hitIcon}">${isTitle ? iconTitle : isHeading ? iconHeading : iconContent}</span>`; + const wrapped = [ + `<span class="${styles.hitTitle}">${highlightStemmed(document.t, getStemmedPositions(metadata, "t"), tokens)}</span>`, + ]; + const needsExplicitHitPath = !isInterOfTree && !isLastOfTree && explicitSearchResultPath; + if (needsExplicitHitPath) { + const pathItems = page + ? (page.b ?? []) + .concat(page.t) + .concat(!document.s || document.s === page.t ? [] : document.s) + : document.b; + wrapped.push(`<span class="${styles.hitPath}">${concatDocumentPath(pathItems ?? [])}</span>`); + } + else if (!isTitle) { + wrapped.push(`<span class="${styles.hitPath}">${highlight(page.t || + // Todo(weareoutman): This is for EasyOps only. + // istanbul ignore next + (document.u.startsWith("/docs/api-reference/") + ? "API Reference" + : ""), tokens)}</span>`); + } + const action = `<span class="${styles.hitAction}">${iconAction}</span>`; + return [ + ...treeWrapper, + icon, + `<span class="${styles.hitWrapper}">`, + ...wrapped, + "</span>", + action, + ].join(""); +} diff --git a/src/theme/SearchBar/fetchIndexes.js b/src/theme/SearchBar/fetchIndexes.js new file mode 100644 index 00000000000..369677190c3 --- /dev/null +++ b/src/theme/SearchBar/fetchIndexes.js @@ -0,0 +1,30 @@ +import lunr from 'lunr'; +import { searchIndexUrl } from '../../utils/proxiedGenerated'; +export async function fetchIndexes(baseUrl) { + if (process.env.NODE_ENV === 'production') { + // const json = await (await fetch(`${baseUrl}${searchIndexUrl}`)).json(); + const json = await (await fetch(`https://cdn.selectdb.com${baseUrl}${searchIndexUrl}`)).json(); + const wrappedIndexes = json.map(({ documents, index }, type) => ({ + type: type, + documents, + index: lunr.Index.load(index), + })); + const zhDictionary = json.reduce((acc, item) => { + for (const tuple of item.index.invertedIndex) { + if (/\p{Unified_Ideograph}/u.test(tuple[0][0])) { + acc.add(tuple[0]); + } + } + return acc; + }, new Set()); + return { + wrappedIndexes, + zhDictionary: Array.from(zhDictionary), + }; + } + // The index does not exist in development, therefore load a dummy index here. + return { + wrappedIndexes: [], + zhDictionary: [], + }; +} diff --git a/src/theme/SearchBar/icons.js b/src/theme/SearchBar/icons.js new file mode 100644 index 00000000000..d5380213e98 --- /dev/null +++ b/src/theme/SearchBar/icons.js @@ -0,0 +1,7 @@ +export const iconTitle = '<svg width="20" height="20" viewBox="0 0 20 20"><path d="M17 6v12c0 .52-.2 1-1 1H4c-.7 0-1-.33-1-1V2c0-.55.42-1 1-1h8l5 5zM14 8h-3.13c-.51 0-.87-.34-.87-.87V4" stroke="currentColor" fill="none" fill-rule="evenodd" stroke-linejoin="round"></path></svg>'; +export const iconHeading = '<svg width="20" height="20" viewBox="0 0 20 20"><path d="M13 13h4-4V8H7v5h6v4-4H7V8H3h4V3v5h6V3v5h4-4v5zm-6 0v4-4H3h4z" stroke="currentColor" fill="none" fill-rule="evenodd" stroke-linecap="round" stroke-linejoin="round"></path></svg>'; +export const iconContent = '<svg width="20" height="20" viewBox="0 0 20 20"><path d="M17 5H3h14zm0 5H3h14zm0 5H3h14z" stroke="currentColor" fill="none" fill-rule="evenodd" stroke-linejoin="round"></path></svg>'; +export const iconAction = '<svg width="20" height="20" viewBox="0 0 20 20"><g stroke="currentColor" fill="none" fill-rule="evenodd" stroke-linecap="round" stroke-linejoin="round"><path d="M18 3v4c0 2-2 4-4 4H2"></path><path d="M8 17l-6-6 6-6"></path></g></svg>'; +export const iconNoResults = '<svg width="40" height="40" viewBox="0 0 20 20" fill="none" fill-rule="evenodd" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round"><path d="M15.5 4.8c2 3 1.7 7-1 9.7h0l4.3 4.3-4.3-4.3a7.8 7.8 0 01-9.8 1m-2.2-2.2A7.8 7.8 0 0113.2 2.4M2 18L18 2"></path></svg>'; +export const iconTreeInter = '<svg viewBox="0 0 24 54"><g stroke="currentColor" fill="none" fill-rule="evenodd" stroke-linecap="round" stroke-linejoin="round"><path d="M8 6v42M20 27H8.3"></path></g></svg>'; +export const iconTreeLast = '<svg viewBox="0 0 24 54"><g stroke="currentColor" fill="none" fill-rule="evenodd" stroke-linecap="round" stroke-linejoin="round"><path d="M8 6v21M20 27H8.3"></path></g></svg>'; diff --git a/src/theme/SearchBar/index.js b/src/theme/SearchBar/index.js new file mode 100644 index 00000000000..369df710bfd --- /dev/null +++ b/src/theme/SearchBar/index.js @@ -0,0 +1,3 @@ +import "../../utils/proxiedGenerated"; +import SearchBar from "./SearchBar"; +export default SearchBar; diff --git a/src/utils/SearchSourceFactory.spec.ts b/src/utils/SearchSourceFactory.spec.ts new file mode 100644 index 00000000000..bdba3949076 --- /dev/null +++ b/src/utils/SearchSourceFactory.spec.ts @@ -0,0 +1,100 @@ +import lunr from "lunr"; +import { SearchDocument } from "../../shared/interfaces"; +import { SearchSourceFactory } from "./SearchSourceFactory"; + +// eslint-disable-next-line @typescript-eslint/no-var-requires +require("lunr-languages/lunr.stemmer.support")(lunr); +// eslint-disable-next-line @typescript-eslint/no-var-requires +require("../../shared/lunrLanguageZh").lunrLanguageZh(lunr); +// eslint-disable-next-line @typescript-eslint/no-var-requires +require("lunr-languages/lunr.multi")(lunr); + +jest.mock("./proxiedGenerated"); + +describe("SearchSourceFactory", () => { + const documentsOfTitles: SearchDocument[] = [ + { + i: 1, + t: "First Page Title", + u: "/1", + }, + { + i: 4, + t: "Second Page Title > peace", + u: "/2", + }, + ]; + const documentsOfHeadings: SearchDocument[] = [ + { + i: 2, + t: "First heading > peace", + u: "/1#2", + p: 1, + }, + ]; + const documentsOfContents: SearchDocument[] = [ + { + i: 3, + t: "First content. > peace", + u: "/1#2", + p: 1, + }, + ]; + + const getIndex = (documents: SearchDocument[]) => + lunr(function () { + this.ref("i"); + this.field("t"); + this.metadataWhitelist = ["position"]; + documents.forEach((doc) => { + this.add({ + ...doc, + // The ref must be a string. + i: doc.i.toString(), + }); + }); + }); + + const searchSource = SearchSourceFactory( + [ + { + documents: documentsOfTitles, + index: getIndex(documentsOfTitles), + type: 0, + }, + { + documents: documentsOfHeadings, + index: getIndex(documentsOfHeadings), + type: 1, + }, + { + documents: documentsOfContents, + index: getIndex(documentsOfContents), + type: 2, + }, + ], + [], + 2 + ); + const callback = jest.fn(); + + test.each<[string, number[]]>([ + [",", []], + ["nothing", []], + ["peace", [4, 2]], + ])( + "SearchSourceFactory('%s', zhDictionary) should return %j", + (input, results) => { + searchSource(input, callback); + expect(callback).toBeCalledWith( + results.map((i) => + expect.objectContaining({ + document: expect.objectContaining({ + i, + }), + }) + ) + ); + } + ); +}); diff --git a/src/utils/SearchSourceFactory.ts b/src/utils/SearchSourceFactory.ts new file mode 100644 index 00000000000..e2dfa37bc1e --- /dev/null +++ b/src/utils/SearchSourceFactory.ts @@ -0,0 +1,83 @@ +import { tokenize } from "./tokenize"; +import { smartQueries } from "./smartQueries"; +import { + MatchMetadata, + WrappedIndex, + SearchResult, + SearchDocument, + InitialSearchResult, +} from "../../shared/interfaces"; +import { sortSearchResults } from "./sortSearchResults"; +import { processTreeStatusOfSearchResults } from "./processTreeStatusOfSearchResults"; +import { language } from "./proxiedGenerated"; + +export function SearchSourceFactory( + wrappedIndexes: WrappedIndex[], + zhDictionary: string[], + resultsLimit: number +) { + return function searchSource( + input: string, + callback: (results: SearchResult[]) => void + ): void { + const rawTokens = tokenize(input, language); + if (rawTokens.length === 0) { + callback([]); + return; + } + + const queries = smartQueries(rawTokens, zhDictionary); + const results: InitialSearchResult[] = []; + + search: for (const { term, tokens } of queries) { + for (const { documents, index, type } of wrappedIndexes) { + results.push( + ...index + .query((query) => { + for (const item of term) { + query.term(item.value, { + wildcard: item.wildcard, + presence: item.presence, + }); + } + }) + .slice(0, resultsLimit) + // Remove duplicated results. + .filter( + (result) => + !results.some( + (item) => item.document.i.toString() === result.ref + ) + ) + .slice(0, resultsLimit - results.length) + .map((result) => { + const document = documents.find( + (doc) => doc.i.toString() === result.ref + ) as SearchDocument; + return { + document, + type, + page: + type !== 0 && + wrappedIndexes[0].documents.find( + (doc) => doc.i === document.p + ), + metadata: result.matchData.metadata as MatchMetadata, + tokens, + score: result.score, + }; + }) + ); + if (results.length >= resultsLimit) { + break search; + } + } + } + + sortSearchResults(results); + + processTreeStatusOfSearchResults(results); + + callback(results as SearchResult[]); + }; +} diff --git a/src/utils/__mocks__/proxiedGenerated.ts b/src/utils/__mocks__/proxiedGenerated.ts new file mode 100644 index 00000000000..35fc2ba2f53 --- /dev/null +++ b/src/utils/__mocks__/proxiedGenerated.ts @@ -0,0 +1,21 @@ +export let language = ["en", "zh"]; +export let removeDefaultStopWordFilter = false; +export let removeDefaultStemmer = false; +export const searchIndexUrl = "search-index.json?_=abc"; +export const searchResultLimits = 8; +export const searchResultContextMaxLength = 50; +export const explicitSearchResultPath = false; +export const docsPluginIdForPreferredVersion = undefined; +export const indexDocs = true; + +export function __setLanguage(value: string[]): void { + language = value; +} + +export function __setRemoveDefaultStopWordFilter(value: boolean): void { + removeDefaultStopWordFilter = value; +} + +export function __setRemoveDefaultStemmer(value: boolean): void { + removeDefaultStemmer = value; +} diff --git a/src/utils/concatDocumentPath.ts b/src/utils/concatDocumentPath.ts new file mode 100644 index 00000000000..77af1544bac --- /dev/null +++ b/src/utils/concatDocumentPath.ts @@ -0,0 +1,3 @@ +export function concatDocumentPath(pathItems: string[]): string { + return pathItems.join(" › "); +} diff --git a/src/utils/cutZhWords.spec.ts b/src/utils/cutZhWords.spec.ts new file mode 100644 index 00000000000..9dae8c9f54b --- /dev/null +++ b/src/utils/cutZhWords.spec.ts @@ -0,0 +1,42 @@ +import { cutZhWords } from "./cutZhWords"; + +const zhDictionary = ["研究生", "研究", "生命", "科学", "生命科学"]; + +describe("cutZhWords", () => { + test.each<[string, string[][]]>([ + [ + "研究生命科学", + [ + ["研究", "生命科学"], + ["研究", "生命", "科学"], + ["研究生", "科学"], + ], + ], + [ + "研究生命科", + [ + ["研究", "生命科*"], + ["研究", "生命", "科*"], + ["研究生", "科*"], + ], + ], + ["研究生", [["研究生"], ["研究", "生*"]]], + [ + "研究生科", + [ + ["研究生", "科*"], + ["研究", "生*", "科*"], + ], + ], + ["我研究生", [["研究生"], ["研究", "生*"]]], + ["研究生我", [["研究生"], ["研究", "生*"]]], + ["我", []], + ["命", []], + ])("cutZhWords('%s', zhDictionary) should work", (token, terms) => { + expect( + cutZhWords(token, zhDictionary).map((term) => + term.map((item) => `${item.value}${item.trailing ? "*" : ""}`) + ) + ).toEqual(terms); + }); +}); diff --git a/src/utils/cutZhWords.ts b/src/utils/cutZhWords.ts new file mode 100644 index 00000000000..4c7cf1f7672 --- /dev/null +++ b/src/utils/cutZhWords.ts @@ -0,0 +1,89 @@ +import { SmartTerm, WrappedTerm } from "../../shared/interfaces"; + +/** + * Get all possible terms for a string of consecutive Chinese words, + * by a words dictionary. + * + * @remarks + * + * Terms are sorted in ascending order by the count of words. + * + * @param token - A string of consecutive Chinese words. + * @param zhDictionary - A Chinese words dictionary. + * + * @returns A smart term list. + */ +export function cutZhWords(token: string, zhDictionary: string[]): SmartTerm[] { + const wrappedTerms: WrappedTerm[] = []; + function cut(subToken: string, carry: WrappedTerm): void { + let matchedLastIndex = 0; + let matched = false; + for (const words of zhDictionary) { + if (subToken.substr(0, words.length) === words) { + const nextCarry = { + missed: carry.missed, + term: carry.term.concat({ + value: words, + }), + }; + if (subToken.length > words.length) { + cut(subToken.substr(words.length), nextCarry); + } else { + wrappedTerms.push(nextCarry); + } + matched = true; + } else { + for ( + let lastIndex = words.length - 1; + lastIndex > matchedLastIndex; + lastIndex -= 1 + ) { + const subWords = words.substr(0, lastIndex); + if (subToken.substr(0, lastIndex) === subWords) { + matchedLastIndex = lastIndex; + const nextCarry = { + missed: carry.missed, + term: carry.term.concat({ + value: subWords, + trailing: true, + }), + }; + if (subToken.length > lastIndex) { + cut(subToken.substr(lastIndex), nextCarry); + } else { + wrappedTerms.push(nextCarry); + } + matched = true; + break; + } + } + } + } + if (!matched) { + if (subToken.length > 0) { + cut(subToken.substr(1), { + missed: carry.missed + 1, + term: carry.term, + }); + } else if (carry.term.length > 0) { + wrappedTerms.push(carry); + } + } + } + cut(token, { + missed: 0, + term: [], + }); + return wrappedTerms + .sort((a, b) => { + const aMissed = a.missed > 0 ? 1 : 0; + const bMissed = b.missed > 0 ? 1 : 0; + if (aMissed !== bMissed) { + // Put all no-words-missed terms before words-missed terms. + return aMissed - bMissed; + } + // Put terms with less words before those with more words. + return a.term.length - b.term.length; + }) + .map((item) => item.term); +} diff --git a/src/utils/escapeHtml.ts b/src/utils/escapeHtml.ts new file mode 100644 index 00000000000..6e5008d0301 --- /dev/null +++ b/src/utils/escapeHtml.ts @@ -0,0 +1,15 @@ +/** + * Escape html special chars. + * + * @param unsafe - A unsafe string. + * + * @returns A safe string can be injected as innerHTML. + */ +export function escapeHtml(unsafe: string): string { + return unsafe + .replace(/&/g, "&") + .replace(/</g, "<") + .replace(/>/g, ">") + .replace(/"/g, """) + .replace(/'/g, "'"); +} diff --git a/src/utils/getStemmedPositions.spec.ts b/src/utils/getStemmedPositions.spec.ts new file mode 100644 index 00000000000..f5070c8af39 --- /dev/null +++ b/src/utils/getStemmedPositions.spec.ts @@ -0,0 +1,41 @@ +import { getStemmedPositions } from "./getStemmedPositions"; + +describe("getStemmedPositions", () => { + test("flatten and sort positions", () => { + expect( + getStemmedPositions( + { + dr: { + body: { + position: [ + [9, 2], + [24, 2], + ], + }, + }, + dream: { + body: { + position: [ + [9, 5], + [24, 5], + ], + }, + }, + true: { + body: { + position: [[36, 4]], + }, + }, + unknown: {}, + }, + "body" + ) + ).toEqual([ + [9, 5], + [9, 2], + [24, 5], + [24, 2], + [36, 4], + ]); + }); +}); diff --git a/src/utils/getStemmedPositions.ts b/src/utils/getStemmedPositions.ts new file mode 100644 index 00000000000..52f4ec51e5e --- /dev/null +++ b/src/utils/getStemmedPositions.ts @@ -0,0 +1,17 @@ +import { MatchMetadata, MetadataPosition } from "../../shared/interfaces"; + +export function getStemmedPositions( + metadata: MatchMetadata, + field: string +): MetadataPosition[] { + const positions: MetadataPosition[] = []; + for (const match of Object.values(metadata)) { + if (match[field]) { + positions.push(...match[field].position); + } + } + + // Put positions with lower start pos before those with higher start pos. + // Put longer positions before shorter positions when they are the same in start pos. + return positions.sort((a, b) => a[0] - b[0] || b[1] - a[1]); +} diff --git a/src/utils/highlight.spec.ts b/src/utils/highlight.spec.ts new file mode 100644 index 00000000000..0bd97e69f78 --- /dev/null +++ b/src/utils/highlight.spec.ts @@ -0,0 +1,31 @@ +import { highlight } from "./highlight"; + +describe("highlight", () => { + test.each<[string, string[], boolean, string]>([ + [ + "I Have A Dream. And the dream comes true", + ["dream", "have", "true", "i"], + false, + "<mark>I</mark> <mark>Have</mark> A <mark>Dream</mark>. And the <mark>dream</mark> comes <mark>true</mark>", + ], + [ + "<b>The</b> dream comes <em>true</em>", + ["dream"], + false, + "<b>The</b> <mark>dream</mark> comes <em>true</em>", + ], + [ + "query jQuery", + ["jquery", "query"], + false, + "<mark>query</mark> <mark>jQuery</mark>", + ], + ["dream", ["dreams"], true, "<mark>dream</mark>"], + ["<b>dream</b>", ["dreams"], true, "<mark><b>dream</b></mark>"], + ])( + "highlight('%s', %j) should return '%s'", + (text, tokens, matched, result) => { + expect(highlight(text, tokens, matched)).toEqual(result); + } + ); +}); diff --git a/src/utils/highlight.ts b/src/utils/highlight.ts new file mode 100644 index 00000000000..7a193d777dd --- /dev/null +++ b/src/utils/highlight.ts @@ -0,0 +1,43 @@ +import { escapeHtml } from "./escapeHtml"; + +/** + * Highlight specified tokens in text content. + * + * @param content - Text content. + * @param tokens - Tokens to be highlighted (in lower-case and sorted by descending of length). + * @param forceMatched - Whether to force matched. + * + * @returns A html string with marked tokens. + */ +export function highlight( + content: string, + tokens: string[], + forceMatched?: boolean +): string { + const html: string[] = []; + + for (const token of tokens) { + const index = content.toLowerCase().indexOf(token); + if (index >= 0) { + if (index > 0) { + html.push(highlight(content.substr(0, index), tokens)); + } + html.push( + `<mark>${escapeHtml(content.substr(index, token.length))}</mark>` + ); + const end = index + token.length; + if (end < content.length) { + html.push(highlight(content.substr(end), tokens)); + } + break; + } + } + + if (html.length === 0) { + return forceMatched + ? `<mark>${escapeHtml(content)}</mark>` + : escapeHtml(content); + } + + return html.join(""); +} diff --git a/src/utils/highlightStemmed.spec.ts b/src/utils/highlightStemmed.spec.ts new file mode 100644 index 00000000000..43997e5347e --- /dev/null +++ b/src/utils/highlightStemmed.spec.ts @@ -0,0 +1,165 @@ +import { MetadataPosition, HighlightChunk } from "../../shared/interfaces"; +import { highlightStemmed, splitIntoChunks } from "./highlightStemmed"; + +jest.mock("./proxiedGenerated"); + +describe("highlightStemmed", () => { + test.each<[string, MetadataPosition[], string[], number | undefined, string]>( + [ + [ + "I Have A Dream. And the dream comes true", + //1 5 0 5 0 5 0 5 0 + [ + [9, 5], // dream + [24, 5], // dream + [36, 4], // true + ], + ["dream", "true"], + undefined, + "I Have A <mark>Dream</mark>. And the <mark>dream</mark> comes <mark>true</mark>", + ], + [ + "I Have A Dream. And the dream comes true", + //1 5 0 5 0 5 0 5 0 + [ + [9, 5], // dream + [24, 5], // dream + [36, 4], // true + ], + ["dream", "true"], + 16, + "… A <mark>Dream</mark>. And …", + ], + ] + )( + "highlightStemmed('%s', %j, %j, %j) should return '%s'", + (text, positions, tokens, maxLength, result) => { + expect(highlightStemmed(text, positions, tokens, maxLength)).toEqual( + result + ); + } + ); +}); + +describe("splitIntoChunks", () => { + test.each<[string, MetadataPosition[], string[], HighlightChunk[], number]>([ + [ + "I Have A Dream. And the dream comes true.<br />", + //1 5 10 15 20 25 30 35 40 + [ + [9, 5], // dream + [12, 2], // am + [24, 5], // dream + [27, 2], // am + [36, 4], // true + ], + ["dream", "true", "am"], + [ + { + html: "I", + textLength: 1, + }, + { + html: " ", + textLength: 1, + }, + { + html: "Have", + textLength: 4, + }, + { + html: " ", + textLength: 1, + }, + { + html: "A", + textLength: 1, + }, + { + html: " ", + textLength: 1, + }, + { + html: "<mark>Dream</mark>", + textLength: 5, + }, + { + html: ". ", + textLength: 2, + }, + { + html: "And", + textLength: 3, + }, + { + html: " ", + textLength: 1, + }, + { + html: "the", + textLength: 3, + }, + { + html: " ", + textLength: 1, + }, + { + html: "<mark>dream</mark>", + textLength: 5, + }, + { + html: " ", + textLength: 1, + }, + { + html: "comes", + textLength: 5, + }, + { + html: " ", + textLength: 1, + }, + { + html: "<mark>true</mark>", + textLength: 4, + }, + { + html: ".<", + textLength: 2, + }, + { + html: "br", + textLength: 2, + }, + { + html: " />", + textLength: 3, + }, + ], + 6, + ], + [ + "研究生", + [ + [0, 3], + [0, 2], + ], + ["研究生", "研究"], + [ + { + html: "<mark>研究生</mark>", + textLength: 3, + }, + ], + 0, + ], + ])( + "splitIntoChunks('%s', %j, %j, 0, 0) should return %j", + (text, positions, tokens, chunks, chunkIndex) => { + expect(splitIntoChunks(text, positions, tokens)).toEqual({ + chunkIndex, + chunks, + }); + } + ); +}); diff --git a/src/utils/highlightStemmed.ts b/src/utils/highlightStemmed.ts new file mode 100644 index 00000000000..b0158058147 --- /dev/null +++ b/src/utils/highlightStemmed.ts @@ -0,0 +1,124 @@ +import { HighlightChunk, MetadataPosition } from "../../shared/interfaces"; +import { escapeHtml } from "./escapeHtml"; +import { highlight } from "./highlight"; +import { looseTokenize } from "./looseTokenize"; +import { searchResultContextMaxLength } from "./proxiedGenerated"; + +export function highlightStemmed( + content: string, + positions: MetadataPosition[], + tokens: string[], + maxLength = searchResultContextMaxLength +): string { + const { chunkIndex, chunks } = splitIntoChunks(content, positions, tokens); + + const leadingChunks = chunks.slice(0, chunkIndex); + const firstChunk = chunks[chunkIndex]; + const html: string[] = [firstChunk.html]; + const trailingChunks = chunks.slice(chunkIndex + 1); + + let currentLength = firstChunk.textLength; + let leftPadding = 0; + let rightPadding = 0; + let leftOverflowed = false; + let rightOverflowed = false; + + while (currentLength < maxLength) { + if ( + (leftPadding <= rightPadding || trailingChunks.length === 0) && + leadingChunks.length > 0 + ) { + const chunk = leadingChunks.pop() as HighlightChunk; + if (currentLength + chunk.textLength <= maxLength) { + html.unshift(chunk.html); + leftPadding += chunk.textLength; + currentLength += chunk.textLength; + } else { + leftOverflowed = true; + leadingChunks.length = 0; + } + } else if (trailingChunks.length > 0) { + const chunk = trailingChunks.shift() as HighlightChunk; + if (currentLength + chunk.textLength <= maxLength) { + html.push(chunk.html); + rightPadding += chunk.textLength; + currentLength += chunk.textLength; + } else { + rightOverflowed = true; + trailingChunks.length = 0; + } + } else { + break; + } + } + + if (leftOverflowed || leadingChunks.length > 0) { + html.unshift("…"); + } + + if (rightOverflowed || trailingChunks.length > 0) { + html.push("…"); + } + + return html.join(""); +} + +export function splitIntoChunks( + content: string, + positions: MetadataPosition[], + tokens: string[] +): { + chunkIndex: number; + chunks: HighlightChunk[]; +} { + const chunks: HighlightChunk[] = []; + let positionIndex = 0; + let cursor = 0; + let chunkIndex = -1; + while (positionIndex < positions.length) { + const [start, length] = positions[positionIndex]; + positionIndex += 1; + if (start < cursor) { + continue; + } + + if (start > cursor) { + const leadingChunks = looseTokenize(content.substring(cursor, start)).map( + (token) => ({ + html: escapeHtml(token), + textLength: token.length, + }) + ); + for (const item of leadingChunks) { + chunks.push(item); + } + } + + if (chunkIndex === -1) { + chunkIndex = chunks.length; + } + + cursor = start + length; + chunks.push({ + html: highlight(content.substring(start, cursor), tokens, true), + textLength: length, + }); + } + + if (cursor < content.length) { + const trailingChunks = looseTokenize(content.substring(cursor)).map( + (token) => ({ + html: escapeHtml(token), + textLength: token.length, + }) + ); + for (const item of trailingChunks) { + chunks.push(item); + } + } + + return { + chunkIndex, + chunks, + }; +} diff --git a/src/utils/looseTokenize.spec.ts b/src/utils/looseTokenize.spec.ts new file mode 100644 index 00000000000..33f39bc35bf --- /dev/null +++ b/src/utils/looseTokenize.spec.ts @@ -0,0 +1,9 @@ +import { looseTokenize } from "./looseTokenize"; + +describe("looseTokenize", () => { + test.each<[string, string[]]>([ + ["I have a 梦想。", ["I", " ", "have", " ", "a", " ", "梦", "想", "。"]], + ])("looseTokenize('%s') should return %j", (content, tokens) => { + expect(looseTokenize(content)).toEqual(tokens); + }); +}); diff --git a/src/utils/looseTokenize.ts b/src/utils/looseTokenize.ts new file mode 100644 index 00000000000..48b95357bdf --- /dev/null +++ b/src/utils/looseTokenize.ts @@ -0,0 +1,22 @@ +// https://zhuanlan.zhihu.com/p/33335629 +const singleMatchOfWord = /\w+|\p{Unified_Ideograph}/u; + +export function looseTokenize(content: string): string[] { + const tokens: string[] = []; + let start = 0; + let text = content; + while (text.length > 0) { + const match = text.match(singleMatchOfWord); + if (!match) { + tokens.push(text); + break; + } + if ((match.index as number) > 0) { + tokens.push(text.substring(0, match.index)); + } + tokens.push(match[0]); + start += (match.index as number) + match[0].length; + text = content.substring(start); + } + return tokens; +} diff --git a/src/utils/processTreeStatusOfSearchResults.spec.ts b/src/utils/processTreeStatusOfSearchResults.spec.ts new file mode 100644 index 00000000000..019aa844cf1 --- /dev/null +++ b/src/utils/processTreeStatusOfSearchResults.spec.ts @@ -0,0 +1,83 @@ +import { InitialSearchResult } from "../../shared/interfaces"; +import { processTreeStatusOfSearchResults } from "./processTreeStatusOfSearchResults"; + +describe("processTreeStatusOfSearchResults", () => { + test("should work", () => { + const pageTitles = [ + { + document: { + i: 100, + }, + type: 0, + page: undefined, + }, + { + document: { + i: 200, + }, + type: 0, + page: undefined, + }, + ] as InitialSearchResult[]; + const results = [ + { + document: { + i: 1, + }, + type: 2, + page: {}, + }, + { + document: { + i: 2, + }, + type: 1, + page: {}, + }, + pageTitles[0], + { + document: { + i: 101, + }, + type: 2, + page: pageTitles[0].document, + }, + { + document: { + i: 3, + }, + type: 1, + page: {}, + }, + pageTitles[1], + { + document: { + i: 201, + }, + type: 1, + page: pageTitles[1].document, + }, + { + document: { + i: 202, + }, + type: 2, + page: pageTitles[1].document, + }, + ] as InitialSearchResult[]; + processTreeStatusOfSearchResults(results); + const statuses: [boolean, boolean][] = [ + [undefined, undefined], + [undefined, undefined], + [undefined, undefined], + [undefined, true], + [undefined, undefined], + [undefined, undefined], + [true, undefined], + [undefined, true], + ]; + results.forEach((item, i) => { + expect([item.isInterOfTree, item.isLastOfTree]).toEqual(statuses[i]); + }); + }); +}); diff --git a/src/utils/processTreeStatusOfSearchResults.ts b/src/utils/processTreeStatusOfSearchResults.ts new file mode 100644 index 00000000000..9cd29603b8f --- /dev/null +++ b/src/utils/processTreeStatusOfSearchResults.ts @@ -0,0 +1,19 @@ +import { InitialSearchResult } from "../../shared/interfaces"; + +export function processTreeStatusOfSearchResults( + results: InitialSearchResult[] +): void { + results.forEach((item, i) => { + if ( + i > 0 && + item.page && + results.some((prev) => prev.document === item.page) + ) { + if (i < results.length - 1 && results[i + 1].page === item.page) { + item.isInterOfTree = true; + } else { + item.isLastOfTree = true; + } + } + }); +} diff --git a/src/utils/proxiedGenerated.ts b/src/utils/proxiedGenerated.ts new file mode 100644 index 00000000000..8fd1adacb11 --- /dev/null +++ b/src/utils/proxiedGenerated.ts @@ -0,0 +1,2 @@ +// This file is auto generated while building. +export * from "@generated/@easyops-cn/docusaurus-search-local/default/generated.js"; diff --git a/src/utils/smartQueries.spec.ts b/src/utils/smartQueries.spec.ts new file mode 100644 index 00000000000..c0ac72889a8 --- /dev/null +++ b/src/utils/smartQueries.spec.ts @@ -0,0 +1,285 @@ +import lunr from "lunr"; +import { smartQueries } from "./smartQueries"; +import { + __setLanguage, + __setRemoveDefaultStopWordFilter, + __setRemoveDefaultStemmer, +} from "./proxiedGenerated"; +import { SmartQuery } from "../../shared/interfaces"; + +// eslint-disable-next-line @typescript-eslint/no-var-requires +require("lunr-languages/lunr.stemmer.support")(lunr); +// eslint-disable-next-line @typescript-eslint/no-var-requires +require("../../shared/lunrLanguageZh").lunrLanguageZh(lunr); +// eslint-disable-next-line @typescript-eslint/no-var-requires +require("lunr-languages/lunr.multi")(lunr); + +(lunr as any).fake = {}; + +jest.mock("./proxiedGenerated"); + +const zhDictionary = ["研究生", "研究", "生命", "科学", "生命科学"]; + +interface TestQuery { + tokens: string[]; + keyword: string; +} + +describe("smartQueries", () => { + beforeEach(() => { + __setLanguage(["en", "zh"]); + __setRemoveDefaultStopWordFilter(false); + __setRemoveDefaultStemmer(false); + }); + + test.each<[string[], TestQuery[]]>([ + [ + ["hello"], + [ + { + tokens: ["hello"], + keyword: "+hello", + }, + { + tokens: ["hello"], + keyword: "+hello*", + }, + ], + ], + [ + ["hello", "world"], + [ + { + tokens: ["hello", "world"], + keyword: "+hello +world", + }, + { + tokens: ["hello", "world"], + keyword: "+hello +world*", + }, + ], + ], + [ + ["研究生命科学"], + [ + { + tokens: ["研究", "生命科学"], + keyword: "+研究 +生命科学", + }, + { + tokens: ["研究", "生命", "科学"], + keyword: "+研究 +生命 +科学", + }, + { + tokens: ["研究生", "科学"], + keyword: "+研究生 +科学", + }, + { + tokens: ["研究", "生命科学"], + keyword: "+研究 +生命科学*", + }, + { + tokens: ["研究", "生命", "科学"], + keyword: "+研究 +生命 +科学*", + }, + { + tokens: ["研究生", "科学"], + keyword: "+研究生 +科学*", + }, + { + tokens: ["研究", "生命"], + keyword: "+研究 +生命", + }, + { + tokens: ["研究", "科学"], + keyword: "+研究 +科学", + }, + { + tokens: ["生命", "科学"], + keyword: "+生命 +科学", + }, + { + tokens: ["研究", "科学"], + keyword: "+研究 +科学*", + }, + { + tokens: ["生命", "科学"], + keyword: "+生命 +科学*", + }, + ], + ], + [ + ["研究生"], + [ + { + tokens: ["研究生"], + keyword: "+研究生", + }, + { + tokens: ["研究", "生"], + keyword: "+研究 +生*", + }, + { + tokens: ["研究生"], + keyword: "+研究生*", + }, + ], + ], + /* [ + ["生命科学", "研究生"], + [ + { + tokens: ["生命科学", "研究生"], + keyword: "+生命科学 +研究生", + }, + { + tokens: ["生命科学", "研究", "生"], + keyword: "+生命科学 +研究 +生*", + }, + { + tokens: ["生命", "科学", "研究生"], + keyword: "+生命 +科学 +研究生", + }, + { + tokens: ["生命", "科学", "研究", "生"], + keyword: "+生命 +科学 +研究 +生*", + }, + { + tokens: ["生命科学", "研究生"], + keyword: "+生命科学 +研究生*", + }, + { + tokens: ["生命", "科学", "研究生"], + keyword: "+生命 +科学 +研究生*", + }, + ], + ], */ + [ + ["a", "hello", "world"], + [ + { + tokens: ["a", "hello", "world"], + keyword: "+a +hello +world", + }, + { + tokens: ["hello", "world"], + keyword: "+hello +world", + }, + { + tokens: ["a", "hello", "world"], + keyword: "+a +hello +world*", + }, + { + tokens: ["hello", "world"], + keyword: "+hello +world*", + }, + ], + ], + [ + ["hello", "a"], + [ + { + tokens: ["hello", "a"], + keyword: "+hello +a", + }, + { + tokens: ["hello"], + keyword: "+hello", + }, + { + tokens: ["hello", "a"], + keyword: "+hello +a*", + }, + ], + ], + [ + ["a"], + [ + { + tokens: ["a"], + keyword: "+a", + }, + { + tokens: ["a"], + keyword: "+a*", + }, + ], + ], + [ + ["hello", "world", "命"], + [ + { + tokens: ["hello", "world", "命"], + keyword: "+*hello* +*world* +*命*", + }, + ], + ], + [ + ["termos", "alfabetização"], + [ + { + tokens: ["termos", "alfabetização"], + keyword: "+termos +alfabetização", + }, + { + tokens: ["termos", "alfabetização"], + keyword: "+termos +alfabetização*", + }, + ], + ], + ])("smartQueries(%j, zhDictionary) should work", (tokens, queries) => { + expect(smartQueries(tokens, zhDictionary).map(transformQuery)).toEqual( + queries + ); + }); +}); + +describe("smartQueries with no stop words filter", () => { + beforeEach(() => { + __setLanguage(["en", "fake"]); + __setRemoveDefaultStopWordFilter(true); + __setRemoveDefaultStemmer(false); + }); + + test.each<[string[], TestQuery[]]>([ + [ + ["a", "hello"], + [ + { + tokens: ["a", "hello"], + keyword: "+a +hello", + }, + { + tokens: ["a", "hello"], + keyword: "+a +hello*", + }, + ], + ], + ])("smartQueries(%j, zhDictionary) should work", (tokens, queries) => { + expect(smartQueries(tokens, zhDictionary).map(transformQuery)).toEqual( + queries + ); + }); +}); + +function transformQuery(query: SmartQuery): TestQuery { + return { + tokens: query.tokens, + keyword: query.term + .map( + (item) => + `${item.presence === lunr.Query.presence.REQUIRED ? "+" : ""}${ + (item.wildcard & lunr.Query.wildcard.LEADING) === + lunr.Query.wildcard.LEADING + ? "*" + : "" + }${item.value}${ + (item.wildcard & lunr.Query.wildcard.TRAILING) === + lunr.Query.wildcard.TRAILING + ? "*" + : "" + }` + ) + .join(" "), + }; +} diff --git a/src/utils/smartQueries.ts b/src/utils/smartQueries.ts new file mode 100644 index 00000000000..29fe8d7bcd4 --- /dev/null +++ b/src/utils/smartQueries.ts @@ -0,0 +1,131 @@ +import lunr from "lunr"; +import { SmartQuery, SmartTerm } from "../../shared/interfaces"; +import { smartTerms } from "./smartTerms"; +import { language, removeDefaultStopWordFilter } from "./proxiedGenerated"; + +/** + * Get all possible queries for a list of tokens consists of words mixed English and Chinese, + * by a Chinese words dictionary. + * + * @param tokens - Tokens consists of English words or strings of consecutive Chinese words. + * @param zhDictionary - A Chinese words dictionary. + * + * @returns A smart query list. + */ +export function smartQueries( + tokens: string[], + zhDictionary: string[] +): SmartQuery[] { + const terms = smartTerms(tokens, zhDictionary); + + if (terms.length === 0) { + // There are no matched terms. + // All tokens are considered required and with wildcard. + return [ + { + tokens, + term: tokens.map((value) => ({ + value, + presence: lunr.Query.presence.REQUIRED, + wildcard: lunr.Query.wildcard.LEADING | lunr.Query.wildcard.TRAILING, + })), + }, + ]; + } + + // The last token of a term maybe incomplete while user is typing. + for (const term of terms) { + term[term.length - 1].maybeTyping = true; + } + + // Try to append terms without stop words, + // since they are removed in the index. + const stopWordPipelines: lunr.PipelineFunction[] = []; + for (const lang of language) { + if (lang === "en") { + if (!removeDefaultStopWordFilter) { + stopWordPipelines.unshift(lunr.stopWordFilter); + } + } else { + const lunrLang = (lunr as any)[lang] as typeof lunr; + if (lunrLang.stopWordFilter) { + stopWordPipelines.unshift(lunrLang.stopWordFilter); + } + } + } + + let refinedTerms: SmartTerm[]; + + if (stopWordPipelines.length > 0) { + const pipe = (term: SmartTerm) => + stopWordPipelines.reduce( + (term, p) => + term.filter((item) => + (p as unknown as (str: string) => string | undefined)(item.value) + ), + term + ); + refinedTerms = []; + const newTerms: SmartTerm[] = []; + for (const term of terms) { + const filteredTerm = pipe(term); + refinedTerms.push(filteredTerm); + // Add extra terms only if some stop words are removed, + // and some non-stop-words exist too. + if (filteredTerm.length < term.length && filteredTerm.length > 0) { + newTerms.push(filteredTerm); + } + } + terms.push(...newTerms); + } else { + refinedTerms = terms.slice(); + } + + // Also try to add extra terms which miss one of the searched tokens, + // when the term contains 3 or more tokens, + // to improve the search precision. + const extraTerms: SmartTerm[] = []; + for (const term of refinedTerms) { + if (term.length > 2) { + for (let i = term.length - 1; i >= 0; i -= 1) { + extraTerms.push(term.slice(0, i).concat(term.slice(i + 1))); + } + } + } + + return getQueriesMaybeTyping(terms).concat(getQueriesMaybeTyping(extraTerms)); +} + +function getQueriesMaybeTyping(terms: SmartTerm[]): SmartQuery[] { + return termsToQueries(terms).concat( + termsToQueries( + // Ignore terms whose last token already has a trailing wildcard, + // or the last token is not `maybeTyping`. + terms.filter((term) => { + const token = term[term.length - 1]; + return !token.trailing && token.maybeTyping; + }), + true + ) + ); +} + +function termsToQueries( + terms: SmartTerm[], + maybeTyping?: boolean +): SmartQuery[] { + return terms.map((term) => ({ + tokens: term.map((item) => item.value), + term: term.map((item) => ({ + value: item.value, + presence: lunr.Query.presence.REQUIRED, + // The last token of a term maybe incomplete while user is typing. + // So append more queries with trailing wildcard added. + wildcard: ( + maybeTyping ? item.trailing || item.maybeTyping : item.trailing + ) + ? lunr.Query.wildcard.TRAILING + : lunr.Query.wildcard.NONE, + })), + })); +} diff --git a/src/utils/smartTerms.spec.ts b/src/utils/smartTerms.spec.ts new file mode 100644 index 00000000000..1eadc548eb4 --- /dev/null +++ b/src/utils/smartTerms.spec.ts @@ -0,0 +1,35 @@ +import { smartTerms } from "./smartTerms"; + +const zhDictionary = ["研究生", "研究", "生命", "科学", "生命科学"]; + +describe("smartTerms", () => { + test.each<[string[], string[][]]>([ + [["hello"], [["hello"]]], + [["hello", "world"], [["hello", "world"]]], + [ + ["hello", "world", "研究生命科学"], + [ + ["hello", "world", "研究", "生命科学"], + ["hello", "world", "研究", "生命", "科学"], + ["hello", "world", "研究生", "科学"], + ], + ], + [ + ["生命科学", "研究生"], + [ + ["生命科学", "研究生"], + ["生命科学", "研究", "生*"], + ["生命", "科学", "研究生"], + ["生命", "科学", "研究", "生*"], + ], + ], + [["hello", "world", "命"], []], + [["alfabetização"], [["alfabetização"]]], + ])("smartTerms(%j, zhDictionary) should work", (tokens, terms) => { + expect( + smartTerms(tokens, zhDictionary).map((term) => + term.map((item) => `${item.value}${item.trailing ? "*" : ""}`) + ) + ).toEqual(terms); + }); +}); diff --git a/src/utils/smartTerms.ts b/src/utils/smartTerms.ts new file mode 100644 index 00000000000..9787cafa192 --- /dev/null +++ b/src/utils/smartTerms.ts @@ -0,0 +1,42 @@ +import { SmartTerm } from "../../shared/interfaces"; +import { cutZhWords } from "./cutZhWords"; + +/** + * Get all possible terms for a list of tokens consists of words mixed in Chinese and non-Chinese, + * by a Chinese words dictionary. + * + * @param tokens - Tokens consists of English words or strings of consecutive Chinese words. + * @param zhDictionary - A Chinese words dictionary. + * + * @returns A smart term list. + */ +export function smartTerms( + tokens: string[], + zhDictionary: string[] +): SmartTerm[] { + const terms: SmartTerm[] = []; + + function cutMixedWords(subTokens: string[], carry: SmartTerm): void { + if (subTokens.length === 0) { + terms.push(carry); + return; + } + const token = subTokens[0]; + if (/\p{Unified_Ideograph}/u.test(token)) { + const terms = cutZhWords(token, zhDictionary); + for (const term of terms) { + const nextCarry = carry.concat(...term); + cutMixedWords(subTokens.slice(1), nextCarry); + } + } else { + const nextCarry = carry.concat({ + value: token, + }); + cutMixedWords(subTokens.slice(1), nextCarry); + } + } + + cutMixedWords(tokens, []); + + return terms; +} diff --git a/src/utils/sortSearchResults.spec.ts b/src/utils/sortSearchResults.spec.ts new file mode 100644 index 00000000000..952ce2962fd --- /dev/null +++ b/src/utils/sortSearchResults.spec.ts @@ -0,0 +1,73 @@ +import { InitialSearchResult } from "../../shared/interfaces"; +import { sortSearchResults } from "./sortSearchResults"; + +describe("sortSearchResults", () => { + test("should work", () => { + const pageTitles = [ + { + document: { + i: 100, + }, + type: 0, + page: undefined, + }, + { + document: { + i: 200, + }, + type: 0, + page: undefined, + }, + ] as InitialSearchResult[]; + const results = [ + { + document: { + i: 1, + }, + type: 2, + page: {}, + }, + { + document: { + i: 2, + }, + type: 1, + page: {}, + }, + pageTitles[0], + { + document: { + i: 3, + }, + type: 1, + page: {}, + }, + { + document: { + i: 201, + }, + type: 1, + page: pageTitles[1].document, + }, + { + document: { + i: 202, + }, + type: 2, + page: pageTitles[1].document, + }, + pageTitles[1], + { + document: { + i: 101, + }, + type: 2, + page: pageTitles[0].document, + }, + ] as InitialSearchResult[]; + sortSearchResults(results); + expect(results.map((item) => item.document.i)).toEqual([ + 1, 2, 100, 101, 3, 200, 201, 202, + ]); + }); +}); diff --git a/src/utils/sortSearchResults.ts b/src/utils/sortSearchResults.ts new file mode 100644 index 00000000000..41509cc8f1d --- /dev/null +++ b/src/utils/sortSearchResults.ts @@ -0,0 +1,40 @@ +import { InitialSearchResult, SearchResult } from "../../shared/interfaces"; + +export function sortSearchResults(results: InitialSearchResult[]): void { + results.forEach((item, index) => { + item.index = index; + }); + + // Put search results of headings and contents just after + // their belonged page's title, if existed. + (results as SearchResult[]).sort((a, b) => { + let aPageIndex = + a.type > 0 && a.page + ? results.findIndex((item) => item.document === a.page) + : a.index; + + let bPageIndex = + b.type > 0 && b.page + ? results.findIndex((item) => item.document === b.page) + : b.index; + + if (aPageIndex === -1) { + aPageIndex = a.index; + } + + if (bPageIndex === -1) { + bPageIndex = b.index; + } + + if (aPageIndex === bPageIndex) { + if (a.type === 0) { + return -1; + } + if (b.type === 0) { + return 1; + } + return a.index - b.index; + } + return aPageIndex - bPageIndex; + }); +} diff --git a/src/utils/tokenize.spec.ts b/src/utils/tokenize.spec.ts new file mode 100644 index 00000000000..62dcb370d7a --- /dev/null +++ b/src/utils/tokenize.spec.ts @@ -0,0 +1,40 @@ +import lunr from "lunr"; + +// The `require`s below are required for testing `ja`. +// eslint-disable-next-line @typescript-eslint/no-var-requires +require("lunr-languages/lunr.stemmer.support")(lunr); +// eslint-disable-next-line @typescript-eslint/no-var-requires +require("lunr-languages/tinyseg")(lunr); +// eslint-disable-next-line @typescript-eslint/no-var-requires +require(`lunr-languages/lunr.ja`)(lunr); + +import { tokenize } from "./tokenize"; + +describe("tokenize", () => { + test.each<[string, string[]]>([ + ["Hello-World", ["hello", "world"]], + ["Hello World 「世界和平」", ["hello", "world", "世界和平"]], + [ + "a1b2很好c3_d4更好56也好,不错。", + ["a1b2", "很好", "c3_d4", "更好", "56", "也好", "不错"], + ], + ["…", []], + ])("tokenize('%s', ['en', 'zh']) should return %j", (text, tokens) => { + expect(tokenize(text, ["en", "zh"])).toEqual(tokens); + }); + + test.each<[string, string[]]>([ + [ + "População portuguesa é composta", + ["população", "portuguesa", "é", "composta"], + ], + ])("tokenize('%s', ['en', 'pt']) should return %j", (text, tokens) => { + expect(tokenize(text, ["en", "pt"])).toEqual(tokens); + }); + + test.each<[string, string[]]>([ + ["私は電車が好きです。", ["私", "は", "電車", "が", "好き", "です", "。"]], + ])("tokenize('%s', ['ja']) should return %j", (text, tokens) => { + expect(tokenize(text, ["ja"])).toEqual(tokens); + }); +}); diff --git a/src/utils/tokenize.ts b/src/utils/tokenize.ts new file mode 100644 index 00000000000..63755bcb3a5 --- /dev/null +++ b/src/utils/tokenize.ts @@ -0,0 +1,32 @@ +import lunr from "lunr"; + +/** + * Split a sentence to tokens, considering a sequence of consecutive Chinese words as a single token. + * + * @param text - Text to be tokenized. + * @param language - Languages used. + * + * @returns Tokens. + */ +export function tokenize(text: string, language: string[]): string[] { + // Some languages have their own tokenizer. + if (language.length === 1 && ["ja", "jp", "th"].includes(language[0])) { + return ((lunr as any)[language[0]] as typeof lunr) + .tokenizer(text) + .map((token) => token.toString()); + } + + let regExpMatchWords = /[^-\s]+/g; + + // Especially optimization for `zh`. + if (language.includes("zh")) { + // Currently only works fine with letters in Latin alphabet and Chinese. + // https://zhuanlan.zhihu.com/p/33335629 + regExpMatchWords = /\w+|\p{Unified_Ideograph}+/gu; + // regExpMatchWords = /\p{Unified_Ideograph}+|[^-\s\p{Unified_Ideograph}]+/gu; + // https://mothereff.in/regexpu#input=const+regex+%3D+/%5Cp%7BUnified_Ideograph%7D/u%3B&unicodePropertyEscape=1 + // regExpMatchWords = /\w+|[\u3400-\u4DBF\u4E00-\u9FFC\uFA0E\uFA0F\uFA11\uFA13\uFA14\uFA1F\uFA21\uFA23\uFA24\uFA27-\uFA29\u{20000}-\u{2A6DD}\u{2A700}-\u{2B734}\u{2B740}-\u{2B81D}\u{2B820}-\u{2CEA1}\u{2CEB0}-\u{2EBE0}\u{30000}-\u{3134A}]+/gu + } + + return text.toLowerCase().match(regExpMatchWords) || []; +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org