(openoffice) 09/14: Use only the C API for ICU in i18npool, so it can use newer ICU versions.

damjan Sun, 04 May 2025 10:32:31 -0700

This is an automated email from the ASF dual-hosted git repository.

damjan pushed a commit to branch icu-c-api
in repository https://gitbox.apache.org/repos/asf/openoffice.git


commit ad25e7e6cc7202dfaa6e89605fba804f12bcacff
Author: Damjan Jovanovic <[email protected]>
AuthorDate: Thu May 1 11:02:23 2025 +0200

    Use only the C API for ICU in i18npool, so it can use newer ICU versions.
    
    Patch by: me
---
 main/i18npool/Library_i18npool.mk                  |   4 +-
 main/i18npool/inc/breakiterator_unicode.hxx        |   5 +-
 main/i18npool/inc/calendar_gregorian.hxx           |   2 +-
 main/i18npool/inc/collator_unicode.hxx             |   3 +-
 main/i18npool/inc/warnings_guard_unicode_brkiter.h |   4 +
 .../i18npool/inc/warnings_guard_unicode_calendar.h |   5 +-
 main/i18npool/inc/warnings_guard_unicode_tblcoll.h |   4 +
 .../source/breakiterator/breakiterator_unicode.cxx | 142 +++++++++++++--------
 main/i18npool/source/breakiterator/data/line.txt   |   3 +-
 main/i18npool/source/breakiterator/data/sent.txt   |  87 +++----------
 .../source/calendar/calendar_gregorian.cxx         | 104 ++++++++-------
 main/i18npool/source/collator/collator_unicode.cxx |  46 ++++---
 main/i18npool/source/collator/gencoll_rule.cxx     |  39 ++++--
 main/i18npool/source/search/textsearch.cxx         |  96 ++++++++------
 main/i18npool/source/search/textsearch.hxx         |   8 +-
 15 files changed, 308 insertions(+), 244 deletions(-)

diff --git a/main/i18npool/Library_i18npool.mk 
b/main/i18npool/Library_i18npool.mk
index cf030058cc..9c74e5e727 100644
--- a/main/i18npool/Library_i18npool.mk
+++ b/main/i18npool/Library_i18npool.mk
@@ -144,7 +144,9 @@ 
$(WORKDIR)/CustomTarget/i18npool/source/collator/lrl_include.hxx : $(wildcard $(
 
 
 # fdo#31271 ")" reclassified in more recent ICU/Unicode Standards
--include $(OUTDIR)/inc/icuversion.mk
+ifneq ($(SYSTEM_ICU),YES)
+include $(OUTDIR)/inc/icuversion.mk
+endif
 ICU_RECLASSIFIED_BRACKET := $(shell [ ${ICU_MAJOR} -ge 5 -o \( ${ICU_MAJOR} 
-eq 4 -a ${ICU_MINOR} -ge 4 \) ] && echo YES)
 
 
diff --git a/main/i18npool/inc/breakiterator_unicode.hxx 
b/main/i18npool/inc/breakiterator_unicode.hxx
index a6ff9b987b..c3c1fa46b2 100644
--- a/main/i18npool/inc/breakiterator_unicode.hxx
+++ b/main/i18npool/inc/breakiterator_unicode.hxx
@@ -26,6 +26,7 @@
 #include <breakiteratorImpl.hxx>
 
 #include "warnings_guard_unicode_brkiter.h"
+#include "unicode/ustring.h"
 
 namespace com { namespace sun { namespace star { namespace i18n {
 
@@ -80,8 +81,8 @@ protected:
        Boundary result; // for word break iterator
 
     struct BI_Data {
-        UnicodeString aICUText;
-        icu::BreakIterator *aBreakIterator;
+        ::rtl::OUString aICUText;
+        UBreakIterator *aBreakIterator;
         
         BI_Data() : aICUText(), aBreakIterator(NULL) {}
     } character, word, sentence, line, *icuBI; 
diff --git a/main/i18npool/inc/calendar_gregorian.hxx 
b/main/i18npool/inc/calendar_gregorian.hxx
index 90571e7cf3..f0935116a3 100644
--- a/main/i18npool/inc/calendar_gregorian.hxx
+++ b/main/i18npool/inc/calendar_gregorian.hxx
@@ -86,7 +86,7 @@ public:
 
 protected:
     Era *eraArray;
-    icu::Calendar *body;
+    UCalendar *body;
     NativeNumberSupplier aNatNum;
     const sal_Char* cCalendar;
     com::sun::star::lang::Locale aLocale;
diff --git a/main/i18npool/inc/collator_unicode.hxx 
b/main/i18npool/inc/collator_unicode.hxx
index d45b449d78..c480560980 100644
--- a/main/i18npool/inc/collator_unicode.hxx
+++ b/main/i18npool/inc/collator_unicode.hxx
@@ -72,7 +72,8 @@ public:
 protected:
        const sal_Char *implementationName;
 private:
-       RuleBasedCollator *uca_base, *collator;
+    UCollator *uca_base;
+    UCollator *collator;
     oslModule hModule;
 };
 
diff --git a/main/i18npool/inc/warnings_guard_unicode_brkiter.h 
b/main/i18npool/inc/warnings_guard_unicode_brkiter.h
index a681c8b90c..77d1e0adaf 100644
--- a/main/i18npool/inc/warnings_guard_unicode_brkiter.h
+++ b/main/i18npool/inc/warnings_guard_unicode_brkiter.h
@@ -24,6 +24,9 @@
 #ifndef INCLUDED_WARNINGS_GUARD_UNICODE_BRKITER_H
 #define INCLUDED_WARNINGS_GUARD_UNICODE_BRKITER_H
 
+#define U_SHOW_CPLUSPLUS_API 0
+#define U_SHOW_CPLUSPLUS_HEADER_API 0
+
 // Because the GCC system_header mechanism doesn't work in .c/.cxx compilation
 // units and more important affects the rest of the current include file, the
 // warnings guard is separated into this header file on its own.
@@ -37,6 +40,7 @@
 #pragma GCC system_header
 #endif
 #include <unicode/brkiter.h>
+#include <unicode/ubrk.h>
 #ifdef _MSC_VER
 #pragma warning(pop)
 #endif
diff --git a/main/i18npool/inc/warnings_guard_unicode_calendar.h 
b/main/i18npool/inc/warnings_guard_unicode_calendar.h
index 41a5ee26a8..d2db70542c 100644
--- a/main/i18npool/inc/warnings_guard_unicode_calendar.h
+++ b/main/i18npool/inc/warnings_guard_unicode_calendar.h
@@ -24,6 +24,9 @@
 #ifndef INCLUDED_WARNINGS_GUARD_UNICODE_CALENDAR_H
 #define INCLUDED_WARNINGS_GUARD_UNICODE_CALENDAR_H
 
+#define U_SHOW_CPLUSPLUS_API 0
+#define U_SHOW_CPLUSPLUS_HEADER_API 0
+
 // Because the GCC system_header mechanism doesn't work in .c/.cxx compilation
 // units and more important affects the rest of the current include file, the
 // warnings guard is separated into this header file on its own.
@@ -36,7 +39,7 @@
 #elif defined __GNUC__
 #pragma GCC system_header
 #endif
-#include <unicode/calendar.h>
+#include <unicode/ucal.h>
 #ifdef _MSC_VER
 #pragma warning(pop)
 #endif
diff --git a/main/i18npool/inc/warnings_guard_unicode_tblcoll.h 
b/main/i18npool/inc/warnings_guard_unicode_tblcoll.h
index de609c8443..bc131fbac5 100644
--- a/main/i18npool/inc/warnings_guard_unicode_tblcoll.h
+++ b/main/i18npool/inc/warnings_guard_unicode_tblcoll.h
@@ -24,6 +24,9 @@
 #ifndef INCLUDED_WARNINGS_GUARD_UNICODE_TBLCOLL_H
 #define INCLUDED_WARNINGS_GUARD_UNICODE_TBLCOLL_H
 
+#define U_SHOW_CPLUSPLUS_API 0
+#define U_SHOW_CPLUSPLUS_HEADER_API 0
+
 // Because the GCC system_header mechanism doesn't work in .c/.cxx compilation
 // units and more important affects the rest of the current include file, the
 // warnings guard is separated into this header file on its own.
@@ -37,6 +40,7 @@
 #pragma GCC system_header
 #endif
 #include <unicode/tblcoll.h>
+#include <unicode/ucol.h>
 #ifdef _MSC_VER
 #pragma warning(pop)
 #endif
diff --git a/main/i18npool/source/breakiterator/breakiterator_unicode.cxx 
b/main/i18npool/source/breakiterator/breakiterator_unicode.cxx
index c69e871d6a..fdd28c056b 100644
--- a/main/i18npool/source/breakiterator/breakiterator_unicode.cxx
+++ b/main/i18npool/source/breakiterator/breakiterator_unicode.cxx
@@ -31,6 +31,7 @@
 #include <unicode/udata.h>
 #include <rtl/strbuf.hxx>
 #include <rtl/ustring.hxx>
+#include <rtl/ustrbuf.hxx>
 
 U_CDECL_BEGIN
 extern const char OpenOffice_dat[];
@@ -67,15 +68,19 @@ BreakIterator_Unicode::BreakIterator_Unicode() :
 BreakIterator_Unicode::~BreakIterator_Unicode()
 {
         if (icuBI && icuBI->aBreakIterator) {
-            delete icuBI->aBreakIterator;
+            ubrk_close(icuBI->aBreakIterator);
             icuBI->aBreakIterator=NULL;
         }
-        if (character.aBreakIterator) delete character.aBreakIterator;
-        if (word.aBreakIterator) delete word.aBreakIterator;
-        if (sentence.aBreakIterator) delete sentence.aBreakIterator;
-        if (line.aBreakIterator) delete line.aBreakIterator;
+        if (character.aBreakIterator) ubrk_close(character.aBreakIterator);
+        if (word.aBreakIterator) ubrk_close(word.aBreakIterator);
+        if (sentence.aBreakIterator) ubrk_close(sentence.aBreakIterator);
+        if (line.aBreakIterator) ubrk_close(line.aBreakIterator);
 }
 
+// Hard to support in C:
+// 1. setBreakType() cannot be reached from C.
+// 2. UDataMemory's udata_getLength() is a private API, yet we need the length 
for ubrk_openBinaryRules().
+#if 0
 /*
        Wrapper class to provide public access to the RuleBasedBreakIterator's
        setbreakType method.
@@ -91,6 +96,28 @@ class OOoRuleBasedBreakIterator : public 
RuleBasedBreakIterator {
 
 };
 
+static UBreakIterator* open_udata_BreakIterator(UDataMemory *udm, UErrorCode 
*status)
+{
+    *status = U_ZERO_ERROR;
+    UDataInfo info;
+    info.size = sizeof(info);
+    udata_getInfo(udm, &info);
+    if (  !(info.isBigEndian == U_IS_BIG_ENDIAN &&
+            info.charsetFamily == U_CHARSET_FAMILY &&
+            info.dataFormat[0] == 0x42 &&  // dataFormat="Brk "
+            info.dataFormat[1] == 0x72 &&
+            info.dataFormat[2] == 0x6b &&
+            info.dataFormat[3] == 0x20)
+    ) {
+        *status = U_INVALID_FORMAT_ERROR;
+        return NULL;
+    }
+
+    uint8_t *memory = (uint8_t*) udata_getMemory(udm);
+    return ubrk_openBinaryRules(memory, udata_getLength(udm), NULL, 0, status);
+}
+#endif
+
 // loading ICU breakiterator on demand.
 void SAL_CALL BreakIterator_Unicode::loadICUBreakIterator(const 
com::sun::star::lang::Locale& rLocale,
         sal_Int16 rBreakType, sal_Int16 rWordType, const sal_Char *rule, const 
OUString& rText) throw(uno::RuntimeException)
@@ -114,9 +141,11 @@ void SAL_CALL 
BreakIterator_Unicode::loadICUBreakIterator(const com::sun::star::
             rLocale.Language != aLocale.Language || rLocale.Country != 
aLocale.Country ||
             rLocale.Variant != aLocale.Variant) {
         if (icuBI->aBreakIterator) {
-            delete icuBI->aBreakIterator;
+            ubrk_close(icuBI->aBreakIterator);
             icuBI->aBreakIterator=NULL;
         }
+// Hard to support in C:
+#if 0
         if (rule) {
             uno::Sequence< OUString > breakRules = 
LocaleData().getBreakIteratorRules(rLocale);
 
@@ -124,11 +153,11 @@ void SAL_CALL 
BreakIterator_Unicode::loadICUBreakIterator(const com::sun::star::
             udata_setAppData("OpenOffice", OpenOffice_dat, &status);
             if ( !U_SUCCESS(status) ) throw ERROR;
 
-            OOoRuleBasedBreakIterator *rbi = NULL;
+            UBreakIterator *rbi = NULL;
 
             if (breakRules.getLength() > breakType && 
breakRules[breakType].getLength() > 0) {
-                rbi = new OOoRuleBasedBreakIterator(udata_open("OpenOffice", 
"brk", 
-                    OUStringToOString(breakRules[breakType], 
RTL_TEXTENCODING_ASCII_US).getStr(), &status), status);
+                rbi = open_udata_BreakIterator(udata_open("OpenOffice", "brk", 
+                    OUStringToOString(breakRules[breakType], 
RTL_TEXTENCODING_ASCII_US).getStr(), &status), &status);
             } else {
                 status = U_ZERO_ERROR;
                 OStringBuffer aUDName(64);
@@ -137,12 +166,12 @@ void SAL_CALL 
BreakIterator_Unicode::loadICUBreakIterator(const com::sun::star::
                 aUDName.append( OUStringToOString(rLocale.Language, 
RTL_TEXTENCODING_ASCII_US));
                 UDataMemory* pUData = udata_open("OpenOffice", "brk", 
aUDName.getStr(), &status);
                 if( U_SUCCESS(status) )
-                    rbi = new OOoRuleBasedBreakIterator( pUData, status);
+                    rbi = open_udata_BreakIterator( pUData, &status);
                 if (!U_SUCCESS(status) ) {
                     status = U_ZERO_ERROR;
                     pUData = udata_open("OpenOffice", "brk", rule, &status);
                     if( U_SUCCESS(status) )
-                        rbi = new OOoRuleBasedBreakIterator( pUData, status);
+                        rbi = open_udata_BreakIterator( pUData, &status);
                     if (!U_SUCCESS(status) ) icuBI->aBreakIterator=NULL;
                 }
             }
@@ -156,26 +185,37 @@ void SAL_CALL 
BreakIterator_Unicode::loadICUBreakIterator(const com::sun::star::
                 icuBI->aBreakIterator = rbi;
             }
         }
+#endif
 
         if (!icuBI->aBreakIterator) {
-            icu::Locale icuLocale(
-                    OUStringToOString(rLocale.Language, 
RTL_TEXTENCODING_ASCII_US).getStr(),
-                    OUStringToOString(rLocale.Country, 
RTL_TEXTENCODING_ASCII_US).getStr(),
-                    OUStringToOString(rLocale.Variant, 
RTL_TEXTENCODING_ASCII_US).getStr());
+            ::rtl::OUStringBuffer locale;
+            if (!rLocale.Language.isEmpty()) {
+                locale.append(rLocale.Language);
+                locale.appendAscii("_");
+                if (!rLocale.Country.isEmpty()) {
+                    locale.append(rLocale.Country);
+                }
+                if (!rLocale.Variant.isEmpty()) {
+                    locale.appendAscii("_");
+                    locale.append(rLocale.Variant);
+                }
+            }
+            char icuLocale[1024];
+            uloc_getName(OUStringToOString(locale.makeStringAndClear(), 
RTL_TEXTENCODING_ASCII_US).getStr(), icuLocale, sizeof(icuLocale), &status);
 
             status = U_ZERO_ERROR;
             switch (rBreakType) {
                 case LOAD_CHARACTER_BREAKITERATOR:
-                    icuBI->aBreakIterator =  
icu::BreakIterator::createCharacterInstance(icuLocale, status);
+                    icuBI->aBreakIterator = ubrk_open(UBRK_CHARACTER, 
icuLocale, NULL, 0, &status);
                     break;
                 case LOAD_WORD_BREAKITERATOR:
-                    icuBI->aBreakIterator =  
icu::BreakIterator::createWordInstance(icuLocale, status);
+                    icuBI->aBreakIterator = ubrk_open(UBRK_WORD, icuLocale, 
NULL, 0, &status);
                     break;
                 case LOAD_SENTENCE_BREAKITERATOR:
-                    icuBI->aBreakIterator = 
icu::BreakIterator::createSentenceInstance(icuLocale, status);
+                    icuBI->aBreakIterator = ubrk_open(UBRK_SENTENCE, 
icuLocale, NULL, 0, &status);
                     break;
                 case LOAD_LINE_BREAKITERATOR:
-                    icuBI->aBreakIterator = 
icu::BreakIterator::createLineInstance(icuLocale, status);
+                    icuBI->aBreakIterator = ubrk_open(UBRK_LINE, icuLocale, 
NULL, 0, &status);
                     break;
             }
             if ( !U_SUCCESS(status) ) {
@@ -193,9 +233,9 @@ void SAL_CALL 
BreakIterator_Unicode::loadICUBreakIterator(const com::sun::star::
         }
     }
 
-    if (newBreak || 
icuBI->aICUText.compare(UnicodeString(reinterpret_cast<const UChar 
*>(rText.getStr()), rText.getLength()))) {      // UChar != sal_Unicode in MinGW
-        icuBI->aICUText=UnicodeString(reinterpret_cast<const UChar 
*>(rText.getStr()), rText.getLength());
-        icuBI->aBreakIterator->setText(icuBI->aICUText);
+    if (newBreak || icuBI->aICUText.compareTo(rText)) {        // UChar != 
sal_Unicode in MinGW
+        icuBI->aICUText = rText;
+        ubrk_setText(icuBI->aBreakIterator, (const 
UChar*)icuBI->aICUText.getStr(), icuBI->aICUText.getLength(), &status);
     }
 }
 
@@ -208,8 +248,8 @@ sal_Int32 SAL_CALL BreakIterator_Unicode::nextCharacters( 
const OUString& Text,
         if (nCharacterIteratorMode == CharacterIteratorMode::SKIPCELL ) { // 
for CELL mode
             loadICUBreakIterator(rLocale, LOAD_CHARACTER_BREAKITERATOR, 0, 
"char", Text);
             for (nDone = 0; nDone < nCount; nDone++) {
-                nStartPos = character.aBreakIterator->following(nStartPos);
-                if (nStartPos == BreakIterator::DONE)
+                nStartPos = ubrk_following(character.aBreakIterator, 
nStartPos);
+                if (nStartPos == UBRK_DONE)
                     return Text.getLength();
             }
         } else { // for CHARACTER mode
@@ -227,8 +267,8 @@ sal_Int32 SAL_CALL 
BreakIterator_Unicode::previousCharacters( const OUString& Te
         if (nCharacterIteratorMode == CharacterIteratorMode::SKIPCELL ) { // 
for CELL mode
             loadICUBreakIterator(rLocale, LOAD_CHARACTER_BREAKITERATOR, 0, 
"char", Text);
             for (nDone = 0; nDone < nCount; nDone++) {
-                nStartPos = character.aBreakIterator->preceding(nStartPos);
-                if (nStartPos == BreakIterator::DONE)
+                nStartPos = ubrk_preceding(character.aBreakIterator, 
nStartPos);
+                if (nStartPos == UBRK_DONE)
                     return 0;
             }
         } else { // for BS to delete one char and CHARACTER mode.
@@ -244,17 +284,17 @@ Boundary SAL_CALL BreakIterator_Unicode::nextWord( const 
OUString& Text, sal_Int
 {
         loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, 
NULL, Text);
 
-        result.startPos = word.aBreakIterator->following(nStartPos);
-        if( result.startPos >= Text.getLength() || result.startPos == 
BreakIterator::DONE )
+        result.startPos = ubrk_following(word.aBreakIterator, nStartPos);
+        if( result.startPos >= Text.getLength() || result.startPos == 
UBRK_DONE )
             result.endPos = result.startPos;
         else {
             if ( (rWordType == WordType::ANYWORD_IGNOREWHITESPACES ||
                     rWordType == WordType::DICTIONARY_WORD ) &&
                         
u_isWhitespace(Text.iterateCodePoints(&result.startPos, 0)) )
-                result.startPos = 
word.aBreakIterator->following(result.startPos);
+                result.startPos = ubrk_following(word.aBreakIterator, 
result.startPos);
 
-            result.endPos = word.aBreakIterator->following(result.startPos);
-            if(result.endPos == BreakIterator::DONE)
+            result.endPos = ubrk_following(word.aBreakIterator, 
result.startPos);
+            if(result.endPos == UBRK_DONE)
                 result.endPos = result.startPos;
         }
         return result;
@@ -266,17 +306,17 @@ Boundary SAL_CALL 
BreakIterator_Unicode::previousWord(const OUString& Text, sal_
 {
         loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, 
NULL, Text);
 
-        result.startPos = word.aBreakIterator->preceding(nStartPos);
-        if( result.startPos < 0 || result.startPos == BreakIterator::DONE)
+        result.startPos = ubrk_preceding(word.aBreakIterator, nStartPos);
+        if( result.startPos < 0 || result.startPos == UBRK_DONE)
             result.endPos = result.startPos;
         else {
             if ( (rWordType == WordType::ANYWORD_IGNOREWHITESPACES ||
                     rWordType == WordType::DICTIONARY_WORD) &&
                         
u_isWhitespace(Text.iterateCodePoints(&result.startPos, 0)) )
-                result.startPos = 
word.aBreakIterator->preceding(result.startPos);
+                result.startPos = ubrk_preceding(word.aBreakIterator, 
result.startPos);
 
-            result.endPos = word.aBreakIterator->following(result.startPos);
-            if(result.endPos == BreakIterator::DONE)
+            result.endPos = ubrk_following(word.aBreakIterator, 
result.startPos);
+            if(result.endPos == UBRK_DONE)
                 result.endPos = result.startPos;
         }
         return result;
@@ -289,27 +329,27 @@ Boundary SAL_CALL BreakIterator_Unicode::getWordBoundary( 
const OUString& Text,
         loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, 
NULL, Text);
         sal_Int32 len = Text.getLength();
 
-        if(word.aBreakIterator->isBoundary(nPos)) {
+        if(ubrk_isBoundary(word.aBreakIterator, nPos)) {
             result.startPos = result.endPos = nPos;
             if((bDirection || nPos == 0) && nPos < len) //forward
-                result.endPos = word.aBreakIterator->following(nPos);
+                result.endPos = ubrk_following(word.aBreakIterator, nPos);
             else
-                result.startPos = word.aBreakIterator->preceding(nPos);
+                result.startPos = ubrk_preceding(word.aBreakIterator, nPos);
         } else {
             if(nPos <= 0) {
                 result.startPos = 0;
-                result.endPos = len ? 
word.aBreakIterator->following((sal_Int32)0) : 0;
+                result.endPos = len ? ubrk_following(word.aBreakIterator, 
(sal_Int32)0) : 0;
             } else if(nPos >= len) {
-                result.startPos = word.aBreakIterator->preceding(len);
+                result.startPos = ubrk_preceding(word.aBreakIterator, len);
                 result.endPos = len;
             } else {
-                result.startPos = word.aBreakIterator->preceding(nPos);
-                result.endPos = word.aBreakIterator->following(nPos);
+                result.startPos = ubrk_preceding(word.aBreakIterator, nPos);
+                result.endPos = ubrk_following(word.aBreakIterator, nPos);
             }
         }
-        if (result.startPos == BreakIterator::DONE)
+        if (result.startPos == UBRK_DONE)
             result.startPos = result.endPos;
-        else if (result.endPos == BreakIterator::DONE)
+        else if (result.endPos == UBRK_DONE)
             result.endPos = result.startPos;
 
         return result;
@@ -324,8 +364,8 @@ sal_Int32 SAL_CALL BreakIterator_Unicode::beginOfSentence( 
const OUString& Text,
         sal_Int32 len = Text.getLength();
         if (len > 0 && nStartPos == len)
             Text.iterateCodePoints(&nStartPos, -1); // issue #i27703# treat 
end position as part of last sentence
-        if (!sentence.aBreakIterator->isBoundary(nStartPos))
-            nStartPos = sentence.aBreakIterator->preceding(nStartPos);
+        if (!ubrk_isBoundary(sentence.aBreakIterator, nStartPos))
+            nStartPos = ubrk_preceding(sentence.aBreakIterator, nStartPos);
 
         // skip preceding space.
         sal_uInt32 ch = Text.iterateCodePoints(&nStartPos, 1);
@@ -343,7 +383,7 @@ sal_Int32 SAL_CALL BreakIterator_Unicode::endOfSentence( 
const OUString& Text, s
         sal_Int32 len = Text.getLength();
         if (len > 0 && nStartPos == len)
             Text.iterateCodePoints(&nStartPos, -1); // issue #i27703# treat 
end position as part of last sentence
-        nStartPos = sentence.aBreakIterator->following(nStartPos);
+        nStartPos = ubrk_following(sentence.aBreakIterator, nStartPos);
 
         sal_Int32 nPos=nStartPos;
         while (nPos > 0 && u_isWhitespace(Text.iterateCodePoints(&nPos, -1))) 
nStartPos=nPos;
@@ -369,7 +409,7 @@ LineBreakResults SAL_CALL 
BreakIterator_Unicode::getLineBreak(
 
         sal_Bool GlueSpace=sal_True;
         while (GlueSpace) {
-            if (line.aBreakIterator->preceding(nStartPos + 1) == nStartPos) { 
//Line boundary break
+            if (ubrk_preceding(line.aBreakIterator, nStartPos + 1) == 
nStartPos) { //Line boundary break
                 lbr.breakIndex = nStartPos;
                 lbr.breakType = BreakType::WORDBOUNDARY;
             } else if (hOptions.rHyphenator.is()) { //Hyphenation break
@@ -387,11 +427,11 @@ LineBreakResults SAL_CALL 
BreakIterator_Unicode::getLineBreak(
                         lbr.breakIndex = wBoundary.startPos; 
//aHyphenatedWord->getHyphenationPos();
                     lbr.breakType = BreakType::HYPHENATION;
                 } else {
-                    lbr.breakIndex = line.aBreakIterator->preceding(nStartPos);
+                    lbr.breakIndex = ubrk_preceding(line.aBreakIterator, 
nStartPos);
                     lbr.breakType = BreakType::WORDBOUNDARY;
                 }
             } else { //word boundary break
-                lbr.breakIndex = line.aBreakIterator->preceding(nStartPos);
+                lbr.breakIndex = ubrk_preceding(line.aBreakIterator, 
nStartPos);
                 lbr.breakType = BreakType::WORDBOUNDARY;
             }
 
diff --git a/main/i18npool/source/breakiterator/data/line.txt 
b/main/i18npool/source/breakiterator/data/line.txt
index ca5d7b803f..397da21883 100644
--- a/main/i18npool/source/breakiterator/data/line.txt
+++ b/main/i18npool/source/breakiterator/data/line.txt
@@ -14,7 +14,8 @@
 #
 
 !!chain;
-!!LBCMNoChain;
+# Breaks on recent ICU, see LibreOffice's bug report at 
https://bugs.documentfoundation.org/show_bug.cgi?id=158108
+#!!LBCMNoChain;
 
 
 !!lookAheadHardBreak;
diff --git a/main/i18npool/source/breakiterator/data/sent.txt 
b/main/i18npool/source/breakiterator/data/sent.txt
index 7fada89e62..eb1224ea5e 100644
--- a/main/i18npool/source/breakiterator/data/sent.txt
+++ b/main/i18npool/source/breakiterator/data/sent.txt
@@ -1,43 +1,40 @@
+# Copyright (C) 2016 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
 #
-#   Copyright (C) 2002-2006, International Business Machines Corporation and 
others.
+#   Copyright (C) 2002-2015, International Business Machines Corporation and 
others.
 #       All Rights Reserved.
 #
 #   file:  sent.txt
 #
 #   ICU Sentence Break Rules
 #      See Unicode Standard Annex #29.
-#      These rules are based on SA 29 version 5.0.0
-#      Includes post 5.0 changes to treat Japanese half width voicing marks
-#        as Grapheme Extend.
+#      These rules are based on UAX #29 Revision 34 for Unicode Version 12.0
 #
 
-
-$VoiceMarks   = [\uff9e\uff9f];
-$Thai         = [:Script = Thai:];
+!!quoted_literals_only;
 
 #
 # Character categories as defined in TR 29
 #
+$CR        = [\p{Sentence_Break = CR}];
+$LF        = [\p{Sentence_Break = LF}];
+$Extend    = [\p{Sentence_Break = Extend}];
 $Sep       = [\p{Sentence_Break = Sep}];
 $Format    = [\p{Sentence_Break = Format}];
 $Sp        = [\p{Sentence_Break = Sp}];
 $Lower     = [\p{Sentence_Break = Lower}];
 $Upper     = [\p{Sentence_Break = Upper}];
-$OLetter   = [\p{Sentence_Break = OLetter}-$VoiceMarks];
+$OLetter   = [\p{Sentence_Break = OLetter}];
 $Numeric   = [\p{Sentence_Break = Numeric}];
 $ATerm     = [\p{Sentence_Break = ATerm}];
+$SContinue = [\p{Sentence_Break = SContinue}];
 $STerm     = [\p{Sentence_Break = STerm}];
 $Close     = [\p{Sentence_Break = Close}];
 
 #
 # Define extended forms of the character classes,
-#   incorporate grapheme cluster + format chars.
-#   Rules 4 and 5.  
-
-
-$CR         = \u000d;
-$LF         = \u000a;
-$Extend     = [[:Grapheme_Extend = TRUE:]$VoiceMarks];
+#   incorporate trailing Extend or Format chars.
+#   Rules 4 and 5.
 
 $SpEx       = $Sp      ($Extend | $Format)*;
 $LowerEx    = $Lower   ($Extend | $Format)*;
@@ -45,6 +42,7 @@ $UpperEx    = $Upper   ($Extend | $Format)*;
 $OLetterEx  = $OLetter ($Extend | $Format)*;
 $NumericEx  = $Numeric ($Extend | $Format)*;
 $ATermEx    = $ATerm   ($Extend | $Format)*;
+$SContinueEx= $SContinue ($Extend | $Format)*;
 $STermEx    = $STerm   ($Extend | $Format)*;
 $CloseEx    = $Close   ($Extend | $Format)*;
 
@@ -52,77 +50,34 @@ $CloseEx    = $Close   ($Extend | $Format)*;
 ## -------------------------------------------------
 
 !!chain;
-!!forward;
 
 # Rule 3 - break after separators.  Keep CR/LF together.
 #
 $CR $LF;
 
-$LettersEx = [$OLetter $Upper $Lower $Numeric $Close $STerm] ($Extend | 
$Format)*;
-$LettersEx* $Thai $LettersEx* ($ATermEx | $SpEx)*;
 
 # Rule 4 - Break after $Sep.
 # Rule 5 - Ignore $Format and $Extend
 #
-[^$Sep]? ($Extend | $Format)*;
+[^$Sep $CR $LF]? ($Extend | $Format)*;
 
 
 # Rule 6
 $ATermEx $NumericEx;
 
 # Rule 7
-$UpperEx $ATermEx $UpperEx;
+($UpperEx | $LowerEx) $ATermEx $UpperEx;
 
 #Rule 8
-#  Note:  follows errata for Unicode 5.0 boundary rules.
-$NotLettersEx = [^$OLetter $Upper $Lower $Sep $ATerm $STerm] ($Extend | 
$Format)*;
+$NotLettersEx = [^$OLetter $Upper $Lower $Sep $CR $LF $ATerm $STerm] ($Extend 
| $Format)*;
 $ATermEx $CloseEx* $SpEx* $NotLettersEx* $Lower;
 
 # Rule 8a
-($STermEx | $ATermEx) $CloseEx* $SpEx* ($STermEx | $ATermEx);
+($STermEx | $ATermEx) $CloseEx* $SpEx* ($SContinueEx | $STermEx | $ATermEx);
 
 #Rule 9, 10, 11
-($STermEx | $ATermEx) $CloseEx* $SpEx* $Sep?;
-
-#Rule 12
-[[^$STerm $ATerm $Close $Sp $Sep $Format $Extend $Thai]{bof}] ($Extend | 
$Format | $Close | $Sp)* [^$Thai];
-[[^$STerm $ATerm $Close $Sp $Sep $Format $Extend]{bof}] ($Extend | $Format | 
$Close | $Sp)* ([$Sep{eof}] | $CR $LF){100};
-
-## -------------------------------------------------
-
-!!reverse;
-
-$SpEx_R       = ($Extend | $Format)* $Sp;
-$ATermEx_R    = ($Extend | $Format)* $ATerm;
-$STermEx_R    = ($Extend | $Format)* $STerm;
-$CloseEx_R    = ($Extend | $Format)* $Close;
-
-#
-#  Reverse rules.
-#     For now, use the old style inexact reverse rules, which are easier
-#     to write, but less efficient.
-#     TODO:  exact reverse rules.  It appears that exact reverse rules
-#            may require improving support for look-ahead breaks in the
-#            builder.  Needs more investigation.
-#
-
-[{bof}] (.? | $LF $CR) [^$Sep]* [$Sep {eof}] ($SpEx_R* $CloseEx_R* ($STermEx_R 
| $ATermEx_R))*;
-#.*;
-
-# Explanation for this rule:
-#
-#    It needs to back over
-#        The $Sep at which we probably begin
-#        All of the non $Sep chars leading to the preceding $Sep
-#        The preceding $Sep, which will be the second one that the rule 
matches.
-#        Any immediately preceding STerm or ATerm sequences.  We need to see 
these
-#              to get the correct rule status when moving forwards again.
-#        
-# [{bof}]           inhibit rule chaining.  Without this, rule would loop on 
itself and match
-#                   the entire string.
-#
-# (.? | $LF $CR)    Match one $Sep instance.  Use .? rather than $Sep because 
position might be
-#                   at the beginning of the string at this point, and we don't 
want to fail.
-#                   Can only use {eof} once, and it is used later.
-#
+($STermEx | $ATermEx) $CloseEx* $SpEx* ($Sep | $CR | $LF)?;
 
+#Rule 998
+[[^$STerm $ATerm $Close $Sp $Sep $LF $CR $Format $Extend]{bof}] ($Extend | 
$Format | $Close | $Sp)* .;
+[[^$STerm $ATerm $Close $Sp $Sep $LF $CR $Format $Extend]{bof}] ($Extend | 
$Format | $Close | $Sp)* ([$Sep $LF $CR {eof}] | $CR $LF){100};
diff --git a/main/i18npool/source/calendar/calendar_gregorian.cxx 
b/main/i18npool/source/calendar/calendar_gregorian.cxx
index 9fd7349731..6eb1d50a9d 100644
--- a/main/i18npool/source/calendar/calendar_gregorian.cxx
+++ b/main/i18npool/source/calendar/calendar_gregorian.cxx
@@ -161,11 +161,10 @@ Calendar_gregorian::init(Era *_eraArray)
          * not all, language and country and variant), otherwise the current 
          * default locale would be used again and the calendar keyword 
ignored. 
          * */
-        icu::Locale aIcuLocale( "", NULL, NULL, "calendar=gregorian");
-
-        UErrorCode status;
-        body = icu::Calendar::createInstance( aIcuLocale, status = 
U_ZERO_ERROR);
-        if (!body || !U_SUCCESS(status)) throw ERROR;
+        UErrorCode status = U_ZERO_ERROR;
+        body = ucal_open(NULL, -1, "@calendar=gregorian", UCAL_GREGORIAN, 
&status);
+        if (!body || !U_SUCCESS(status))
+            throw ERROR;
 
 #if 0
         {
@@ -292,8 +291,8 @@ Calendar_gregorian::getUniqueID() throw(RuntimeException)
 void SAL_CALL
 Calendar_gregorian::setDateTime( double timeInDays ) throw(RuntimeException)
 {
-        UErrorCode status;
-        body->setTime(timeInDays * U_MILLIS_PER_DAY, status = U_ZERO_ERROR);
+        UErrorCode status = U_ZERO_ERROR;
+        ucal_setMillis(body, timeInDays * U_MILLIS_PER_DAY, &status);
         if ( !U_SUCCESS(status) ) throw ERROR;
         getValue();
 }
@@ -305,8 +304,8 @@ Calendar_gregorian::getDateTime() throw(RuntimeException)
             setValue();
             getValue();
         }
-        UErrorCode status;
-        double r = body->getTime(status = U_ZERO_ERROR);
+        UErrorCode status = U_ZERO_ERROR;
+        double r = ucal_getMillis(body, &status);
         if ( !U_SUCCESS(status) ) throw ERROR;
         return r / U_MILLIS_PER_DAY;
 }
@@ -432,7 +431,7 @@ void Calendar_gregorian::submitFields() 
throw(com::sun::star::uno::RuntimeExcept
             switch (fieldIndex)
             {
                 default:
-                    body->set(fieldNameConverter(fieldIndex), 
fieldSetValue[fieldIndex]);
+                    ucal_set(body, fieldNameConverter(fieldIndex), 
fieldSetValue[fieldIndex]);
                     break;
                 case CalendarFieldIndex::ZONE_OFFSET:
                 case CalendarFieldIndex::DST_OFFSET:
@@ -444,9 +443,9 @@ void Calendar_gregorian::submitFields() 
throw(com::sun::star::uno::RuntimeExcept
     }
     sal_Int32 nZoneOffset, nDSTOffset;
     if (getZoneOffset( nZoneOffset))
-        body->set( fieldNameConverter( CalendarFieldIndex::ZONE_OFFSET), 
nZoneOffset);
+        ucal_set(body, fieldNameConverter( CalendarFieldIndex::ZONE_OFFSET), 
nZoneOffset);
     if (getDSTOffset( nDSTOffset))
-        body->set( fieldNameConverter( CalendarFieldIndex::DST_OFFSET), 
nDSTOffset);
+        ucal_set(body, fieldNameConverter( CalendarFieldIndex::DST_OFFSET), 
nDSTOffset);
 }
 
 void Calendar_gregorian::submitValues( sal_Int32 nYear,
@@ -456,23 +455,23 @@ void Calendar_gregorian::submitValues( sal_Int32 nYear,
 {
     submitFields();
     if (nYear >= 0)
-        body->set( UCAL_YEAR, nYear);
+        ucal_set(body, UCAL_YEAR, nYear);
     if (nMonth >= 0)
-        body->set( UCAL_MONTH, nMonth);
+        ucal_set(body, UCAL_MONTH, nMonth);
     if (nDay >= 0)
-        body->set( UCAL_DATE, nDay);
+        ucal_set(body, UCAL_DATE, nDay);
     if (nHour >= 0)
-        body->set( UCAL_HOUR_OF_DAY, nHour);
+        ucal_set(body, UCAL_HOUR_OF_DAY, nHour);
     if (nMinute >= 0)
-        body->set( UCAL_MINUTE, nMinute);
+        ucal_set(body, UCAL_MINUTE, nMinute);
     if (nSecond >= 0)
-        body->set( UCAL_SECOND, nSecond);
+        ucal_set(body, UCAL_SECOND, nSecond);
     if (nMilliSecond >= 0)
-        body->set( UCAL_MILLISECOND, nMilliSecond);
+        ucal_set(body, UCAL_MILLISECOND, nMilliSecond);
     if (nZone != 0)
-        body->set( UCAL_ZONE_OFFSET, nZone);
+        ucal_set(body, UCAL_ZONE_OFFSET, nZone);
     if (nDST != 0)
-        body->set( UCAL_DST_OFFSET, nDST);
+        ucal_set(body, UCAL_DST_OFFSET, nDST);
 }
 
 static void lcl_setCombinedOffsetFieldValues( sal_Int32 nValue,
@@ -533,55 +532,64 @@ void Calendar_gregorian::setValue() 
throw(RuntimeException)
             UErrorCode status;
             if ( !(fieldSet & (1 << CalendarFieldIndex::YEAR)) )
             {
-                nYear = body->get( UCAL_YEAR, status = U_ZERO_ERROR);
+                status = U_ZERO_ERROR;
+                nYear = ucal_get(body, UCAL_YEAR, &status);
                 if ( !U_SUCCESS(status) )
                     nYear = -1;
             }
             if ( !(fieldSet & (1 << CalendarFieldIndex::MONTH)) )
             {
-                nMonth = body->get( UCAL_MONTH, status = U_ZERO_ERROR);
+                status = U_ZERO_ERROR;
+                nMonth = ucal_get(body, UCAL_MONTH, &status);
                 if ( !U_SUCCESS(status) )
                     nMonth = -1;
             }
             if ( !(fieldSet & (1 << CalendarFieldIndex::DAY_OF_MONTH)) )
             {
-                nDay = body->get( UCAL_DATE, status = U_ZERO_ERROR);
+                status = U_ZERO_ERROR;
+                nDay = ucal_get(body, UCAL_DATE, &status);
                 if ( !U_SUCCESS(status) )
                     nDay = -1;
             }
             if ( !(fieldSet & (1 << CalendarFieldIndex::HOUR)) )
             {
-                nHour = body->get( UCAL_HOUR_OF_DAY, status = U_ZERO_ERROR);
+                status = U_ZERO_ERROR;
+                nHour = ucal_get(body, UCAL_HOUR_OF_DAY, &status);
                 if ( !U_SUCCESS(status) )
                     nHour = -1;
             }
             if ( !(fieldSet & (1 << CalendarFieldIndex::MINUTE)) )
             {
-                nMinute = body->get( UCAL_MINUTE, status = U_ZERO_ERROR);
+                status = U_ZERO_ERROR;
+                nMinute = ucal_get(body, UCAL_MINUTE, &status);
                 if ( !U_SUCCESS(status) )
                     nMinute = -1;
             }
             if ( !(fieldSet & (1 << CalendarFieldIndex::SECOND)) )
             {
-                nSecond = body->get( UCAL_SECOND, status = U_ZERO_ERROR);
+                status = U_ZERO_ERROR;
+                nSecond = ucal_get(body, UCAL_SECOND, &status);
                 if ( !U_SUCCESS(status) )
                     nSecond = -1;
             }
             if ( !(fieldSet & (1 << CalendarFieldIndex::MILLISECOND)) )
             {
-                nMilliSecond = body->get( UCAL_MILLISECOND, status = 
U_ZERO_ERROR);
+                status = U_ZERO_ERROR;
+                nMilliSecond = ucal_get(body, UCAL_MILLISECOND, &status);
                 if ( !U_SUCCESS(status) )
                     nMilliSecond = -1;
             }
             if ( !(fieldSet & (1 << CalendarFieldIndex::ZONE_OFFSET)) )
             {
-                nZone0 = body->get( UCAL_ZONE_OFFSET, status = U_ZERO_ERROR);
+                status = U_ZERO_ERROR;
+                nZone0 = ucal_get(body, UCAL_ZONE_OFFSET, &status);
                 if ( !U_SUCCESS(status) )
                     nZone0 = 0;
             }
             if ( !(fieldSet & (1 << CalendarFieldIndex::DST_OFFSET)) )
             {
-                nDST0 = body->get( UCAL_DST_OFFSET, status = U_ZERO_ERROR);
+                status = U_ZERO_ERROR;
+                nDST0 = ucal_get(body, UCAL_DST_OFFSET, &status);
                 if ( !U_SUCCESS(status) )
                     nDST0 = 0;
             }
@@ -591,10 +599,12 @@ void Calendar_gregorian::setValue() 
throw(RuntimeException)
 
             DUMP_ICU_CAL_MSG(("%s\n","setValue() in bNeedZone||bNeedDST after 
submitValues()"));
             DUMP_I18N_CAL_MSG(("%s\n","setValue() in bNeedZone||bNeedDST after 
submitValues()"));
-            nZone1 = body->get( UCAL_ZONE_OFFSET, status = U_ZERO_ERROR);
+            status = U_ZERO_ERROR;
+            nZone1 = ucal_get(body, UCAL_ZONE_OFFSET, &status);
             if ( !U_SUCCESS(status) )
                 nZone1 = 0;
-            nDST1 = body->get( UCAL_DST_OFFSET, status = U_ZERO_ERROR);
+            status = U_ZERO_ERROR;
+            nDST1 = ucal_get(body, UCAL_DST_OFFSET, &status);
             if ( !U_SUCCESS(status) )
                 nDST1 = 0;
         }
@@ -607,11 +617,11 @@ void Calendar_gregorian::setValue() 
throw(RuntimeException)
 
         if ( bNeedZone || bNeedDST )
         {
-            UErrorCode status;
-            sal_Int32 nZone2 = body->get( UCAL_ZONE_OFFSET, status = 
U_ZERO_ERROR);
+            UErrorCode status = U_ZERO_ERROR;
+            sal_Int32 nZone2 = ucal_get(body, UCAL_ZONE_OFFSET, &status);
             if ( !U_SUCCESS(status) )
                 nZone2 = nZone1;
-            sal_Int32 nDST2 = body->get( UCAL_DST_OFFSET, status = 
U_ZERO_ERROR);
+            sal_Int32 nDST2 = ucal_get(body, UCAL_DST_OFFSET, &status);
             if ( !U_SUCCESS(status) )
                 nDST2 = nDST1;
             if ( nZone0 != nZone1 || nZone2 != nZone1 || nDST0 != nDST1 || 
nDST2 != nDST1 )
@@ -647,7 +657,8 @@ void Calendar_gregorian::setValue() throw(RuntimeException)
                 //      -3:30:52 (!) instead of -3:30
                 //      if first submission included time zone -3:30 that 
would be wrong.
                 bool bResubmit = false;
-                sal_Int32 nZone3 = body->get( UCAL_ZONE_OFFSET, status = 
U_ZERO_ERROR);
+                status = U_ZERO_ERROR;
+                sal_Int32 nZone3 = ucal_get(body, UCAL_ZONE_OFFSET, &status);
                 if ( !U_SUCCESS(status) )
                     nZone3 = nZone2;
                 if (nZone3 != nZone2)
@@ -668,7 +679,8 @@ void Calendar_gregorian::setValue() throw(RuntimeException)
                 // factored in all days by ICU and there seems to be some
                 // unknown behavior.
                 // TZ=Asia/Tehran 1999-03-22 exposes this, for example.
-                sal_Int32 nDST3 = body->get( UCAL_DST_OFFSET, status = 
U_ZERO_ERROR);
+                status = U_ZERO_ERROR;
+                sal_Int32 nDST3 = ucal_get(body, UCAL_DST_OFFSET, &status);
                 if ( !U_SUCCESS(status) )
                     nDST3 = nDST2;
                 if (nDST2 != nDST3 && !nDST3)
@@ -693,8 +705,8 @@ void Calendar_gregorian::setValue() throw(RuntimeException)
 #if erDUMP_ICU_CALENDAR || erDUMP_I18N_CALENDAR
         {
             // force icu::Calendar to recalculate
-            UErrorCode status;
-            sal_Int32 nTmp = body->get( UCAL_DATE, status = U_ZERO_ERROR);
+            UErrorCode status = U_ZERO_ERROR;
+            sal_Int32 nTmp = ucal_get(body, UCAL_DATE, &status);
             DUMP_ICU_CAL_MSG(("%s: %d\n","setValue() result day",nTmp));
             DUMP_I18N_CAL_MSG(("%s: %d\n","setValue() result day",nTmp));
         }
@@ -711,8 +723,8 @@ void Calendar_gregorian::getValue() throw(RuntimeException)
                 fieldIndex == CalendarFieldIndex::DST_OFFSET_SECOND_MILLIS)
             continue;   // not ICU fields
 
-        UErrorCode status; sal_Int32 value = body->get( fieldNameConverter(
-                    fieldIndex), status = U_ZERO_ERROR);
+        UErrorCode status = U_ZERO_ERROR;
+        sal_Int32 value = ucal_get(body, fieldNameConverter(fieldIndex), 
&status);
         if ( !U_SUCCESS(status) ) throw ERROR;
 
         // Convert millisecond to minute for ZONE and DST and set remainder in
@@ -762,8 +774,8 @@ void SAL_CALL
 Calendar_gregorian::addValue( sal_Int16 fieldIndex, sal_Int32 value ) 
throw(RuntimeException)
 {
         // since ZONE and DST could not be add, we don't need to convert value 
here
-        UErrorCode status;
-        body->add(fieldNameConverter(fieldIndex), value, status = 
U_ZERO_ERROR);
+        UErrorCode status = U_ZERO_ERROR;
+        ucal_add(body, fieldNameConverter(fieldIndex), value, &status);
         if ( !U_SUCCESS(status) ) throw ERROR;
         getValue();
 }
@@ -865,7 +877,7 @@ Calendar_gregorian::getFirstDayOfWeek() 
throw(RuntimeException)
     // Check for underflow just in case we're called "out of sync".
     return ::std::max( sal::static_int_cast<sal_Int16>(0),
             sal::static_int_cast<sal_Int16>( static_cast<sal_Int16>(
-                    body->getFirstDayOfWeek()) - 1));
+                    ucal_getAttribute(body, UCAL_FIRST_DAY_OF_WEEK)) - 1));
 }
 
 void SAL_CALL
@@ -873,14 +885,14 @@ Calendar_gregorian::setFirstDayOfWeek( sal_Int16 day )
 throw(RuntimeException)
 {
     // Weekdays::SUNDAY == 0, UCAL_SUNDAY == 1 => offset +1
-    body->setFirstDayOfWeek( static_cast<UCalendarDaysOfWeek>( day + 1));
+    ucal_setAttribute(body, UCAL_FIRST_DAY_OF_WEEK, 
static_cast<UCalendarDaysOfWeek>( day + 1));
 }
 
 void SAL_CALL
 Calendar_gregorian::setMinimumNumberOfDaysForFirstWeek( sal_Int16 days ) 
throw(RuntimeException)
 {
         aCalendar.MinimumNumberOfDaysForFirstWeek = days;
-        body->setMinimalDaysInFirstWeek( static_cast<uint8_t>( days));
+        ucal_setAttribute(body, UCAL_MINIMAL_DAYS_IN_FIRST_WEEK, 
static_cast<uint8_t>( days));
 }
 
 sal_Int16 SAL_CALL
diff --git a/main/i18npool/source/collator/collator_unicode.cxx 
b/main/i18npool/source/collator/collator_unicode.cxx
index f8b3c214bc..fe5ba42d32 100644
--- a/main/i18npool/source/collator/collator_unicode.cxx
+++ b/main/i18npool/source/collator/collator_unicode.cxx
@@ -49,8 +49,8 @@ Collator_Unicode::Collator_Unicode()
 
 Collator_Unicode::~Collator_Unicode()
 {
-       if (collator) delete collator;
-       if (uca_base) delete uca_base;
+       if (collator) ucol_close(collator);
+       if (uca_base) ucol_close(uca_base);
     if (hModule) osl_unloadModule(hModule);
 }
 
@@ -58,13 +58,13 @@ sal_Int32 SAL_CALL
 Collator_Unicode::compareSubstring( const OUString& str1, sal_Int32 off1, 
sal_Int32 len1,
        const OUString& str2, sal_Int32 off2, sal_Int32 len2) 
throw(RuntimeException)
 {
-    return collator->compare(reinterpret_cast<const UChar *>(str1.getStr()) + 
off1, len1, reinterpret_cast<const UChar *>(str2.getStr()) + off2, len2);       
 // UChar != sal_Unicode in MinGW
+    return ucol_strcoll(collator, reinterpret_cast<const UChar 
*>(str1.getStr()) + off1, len1, reinterpret_cast<const UChar *>(str2.getStr()) 
+ off2, len2);   // UChar != sal_Unicode in MinGW
 }
 
 sal_Int32 SAL_CALL
 Collator_Unicode::compareString( const OUString& str1, const OUString& str2) 
throw(RuntimeException)
 {
-    return collator->compare(reinterpret_cast<const UChar *>(str1.getStr()), 
reinterpret_cast<const UChar *>(str2.getStr()));  // UChar != sal_Unicode in 
MinGW
+    return ucol_strcoll(collator, reinterpret_cast<const UChar 
*>(str1.getStr()), -1, reinterpret_cast<const UChar *>(str2.getStr()), -1);     
// UChar != sal_Unicode in MinGW
 }
 
 extern "C" { static void SAL_CALL thisModule() {} }
@@ -75,9 +75,10 @@ Collator_Unicode::loadCollatorAlgorithm(const OUString& 
rAlgorithm, const lang::
 {
        if (!collator) {
         UErrorCode status = U_ZERO_ERROR;
+        UParseError parseError;
         OUString rule = LocaleData().getCollatorRuleByAlgorithm(rLocale, 
rAlgorithm);
         if (rule.getLength() > 0) {
-            collator = new RuleBasedCollator(reinterpret_cast<const UChar 
*>(rule.getStr()), status);  // UChar != sal_Unicode in MinGW
+            collator = ucol_openRules(reinterpret_cast<const UChar 
*>(rule.getStr()), -1, UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH, &parseError, 
&status);  // UChar != sal_Unicode in MinGW
                        if (! U_SUCCESS(status)) throw RuntimeException();
                }
                if (!collator && 
OUString::createFromAscii(LOCAL_RULE_LANGS).indexOf(rLocale.Language) >= 0) {
@@ -113,9 +114,9 @@ Collator_Unicode::loadCollatorAlgorithm(const OUString& 
rAlgorithm, const lang::
                                }
                                if (func) {
                                        const sal_uInt8* ruleImage=func();
-                                       uca_base = new 
RuleBasedCollator(static_cast<UChar*>(NULL), status);
+                    uca_base = ucol_open("root", &status);
                                        if (! U_SUCCESS(status)) throw 
RuntimeException();
-                                       collator = new 
RuleBasedCollator(reinterpret_cast<const uint8_t*>(ruleImage), -1, uca_base, 
status);
+                                       collator = 
ucol_openBinary(reinterpret_cast<const uint8_t*>(ruleImage), -1, uca_base, 
&status);
                                        if (! U_SUCCESS(status)) throw 
RuntimeException();
                                }
                        }
@@ -127,22 +128,37 @@ Collator_Unicode::loadCollatorAlgorithm(const OUString& 
rAlgorithm, const lang::
                                case here. The icu::Locale constructor changes 
the algorithm name to
                                uppercase itself, so we don't have to bother 
with that.
                        */
-                       icu::Locale icuLocale(
-                                  OUStringToOString(rLocale.Language, 
RTL_TEXTENCODING_ASCII_US).getStr(),
-                                  OUStringToOString(rLocale.Country, 
RTL_TEXTENCODING_ASCII_US).getStr(),
-                                  OUStringToOString(rAlgorithm, 
RTL_TEXTENCODING_ASCII_US).getStr());
+            /** "The Locale constructor (in C++ and Java) taking multiple 
strings behaves exactly as if those strings
+                 were concatenated, with the ‘_’ separator inserted between 
two adjacent non-empty strings, and
+                 the result passed to uloc_getName." -- 
https://unicode-org.github.io/icu/userguide/locale/
+            */
+            OUStringBuffer locale;
+            if (!rLocale.Language.isEmpty()) {
+                locale.append(rLocale.Language);
+                locale.appendAscii("_");
+                if (!rLocale.Country.isEmpty()) {
+                    locale.append(rLocale.Country);
+                }
+                if (!rAlgorithm.isEmpty()) {
+                    locale.appendAscii("_");
+                    locale.append(rAlgorithm);
+                }
+            }
+            char icuLocale[1024];
+            uloc_getName(OUStringToOString(locale.makeStringAndClear(), 
RTL_TEXTENCODING_ASCII_US).getStr(), icuLocale, sizeof(icuLocale), &status);
+            if (! U_SUCCESS(status)) throw RuntimeException();
                        // load ICU collator
-                       collator = (RuleBasedCollator*) 
icu::Collator::createInstance(icuLocale, status);
+            collator = ucol_open(icuLocale, &status);
                        if (! U_SUCCESS(status)) throw RuntimeException();
                }
     }
 
        if (options & CollatorOptions::CollatorOptions_IGNORE_CASE_ACCENT)
-        collator->setStrength(Collator::PRIMARY);
+        ucol_setStrength(collator, UCOL_PRIMARY);
        else if (options & CollatorOptions::CollatorOptions_IGNORE_CASE)
-        collator->setStrength(Collator::SECONDARY);
+        ucol_setStrength(collator, UCOL_SECONDARY);
     else
-        collator->setStrength(Collator::TERTIARY);
+        ucol_setStrength(collator, UCOL_TERTIARY);
 
        return(0);
 }
diff --git a/main/i18npool/source/collator/gencoll_rule.cxx 
b/main/i18npool/source/collator/gencoll_rule.cxx
index 2295d79b35..66a1b7962a 100644
--- a/main/i18npool/source/collator/gencoll_rule.cxx
+++ b/main/i18npool/source/collator/gencoll_rule.cxx
@@ -30,8 +30,10 @@
 #include <sal/main.h>
 #include <sal/types.h>
 #include <rtl/ustrbuf.hxx>
-
+#define U_SHOW_CPLUSPLUS_API 0
+#define U_SHOW_CPLUSPLUS_HEADER_API 0
 #include "warnings_guard_unicode_tblcoll.h"
+#include "unicode/ucol.h"
 
 U_CAPI void U_EXPORT2 uprv_free(void *mem);
 
@@ -107,30 +109,41 @@ SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
        fclose(fp);
 
     UErrorCode status = U_ZERO_ERROR;
-    //UParseError parseError;
-    //UCollator *coll = ucol_openRules(Obuf.getStr(), Obuf.getLength(), 
UCOL_OFF, 
-    //        UCOL_DEFAULT_STRENGTH, &parseError, &status);
+    UParseError parseError;
+    UCollator *coll = ucol_openRules(reinterpret_cast<const UChar 
*>(Obuf.getStr()), -1, UCOL_OFF, 
+            UCOL_DEFAULT_STRENGTH, &parseError, &status);
+
+    //RuleBasedCollator *coll = new RuleBasedCollator(reinterpret_cast<const 
UChar *>(Obuf.getStr()), status); // UChar != sal_Unicode in MinGW
 
-    RuleBasedCollator *coll = new RuleBasedCollator(reinterpret_cast<const 
UChar *>(Obuf.getStr()), status);   // UChar != sal_Unicode in MinGW
 
     if (U_SUCCESS(status)) {
 
         int32_t len = 0;
-        uint8_t *data = coll->cloneRuleData(len, status);
-
-        if (U_SUCCESS(status) && data != NULL)
-            data_write(argv[2], argv[3], data, len);
-        else {
+        status = U_ZERO_ERROR;
+        len = ucol_cloneBinary(coll, NULL, 0, &status);
+        if (len > 0 && status == U_BUFFER_OVERFLOW_ERROR) {
+            uint8_t* data = (uint8_t*)malloc(len);
+            if (data != NULL) {
+                status = U_ZERO_ERROR;
+                len = ucol_cloneBinary(coll, data, len, &status);
+                if (U_SUCCESS(status))
+                    data_write(argv[2], argv[3], data, len);
+                else {
+                    printf("Could not get rule data from collator\n");
+                }
+                free(data);
+            } else {
+                printf("Out of memory getting rule data from collator\n");
+            }
+        } else {
             printf("Could not get rule data from collator\n");
         }
-
-       if (data) uprv_free(data);
     } else {
         printf("\nRule parsering error\n");
     }
 
     if (coll)
-        delete coll;
+        ucol_close(coll); //delete coll;
 
     return U_SUCCESS(status) ? 0 : 1;
 }      // End of main
diff --git a/main/i18npool/source/search/textsearch.cxx 
b/main/i18npool/source/search/textsearch.cxx
index ec30c46d03..6a7f732c0f 100644
--- a/main/i18npool/source/search/textsearch.cxx
+++ b/main/i18npool/source/search/textsearch.cxx
@@ -73,7 +73,7 @@ TextSearch::TextSearch(const Reference < XMultiServiceFactory 
> & rxMSF)
         : xMSF( rxMSF )
         , pJumpTable( 0 )
         , pJumpTable2( 0 )
-        , pRegexMatcher( NULL )
+        , pRegex( NULL )
         , pWLD( 0 )
 {
     SearchOptions aOpt;
@@ -85,7 +85,7 @@ TextSearch::TextSearch(const Reference < XMultiServiceFactory 
> & rxMSF)
 
 TextSearch::~TextSearch()
 {
-    delete pRegexMatcher;
+    uregex_close(pRegex);
     delete pWLD;
     delete pJumpTable;
     delete pJumpTable2;
@@ -95,7 +95,10 @@ void TextSearch::setOptions( const SearchOptions& rOptions ) 
throw( RuntimeExcep
 {
     aSrchPara = rOptions;
 
-    delete pRegexMatcher, pRegexMatcher = NULL;
+    if (pRegex) {
+        uregex_close(pRegex);
+        pRegex = NULL;
+    }
     delete pWLD, pWLD = 0;
     delete pJumpTable, pJumpTable = 0;
     delete pJumpTable2, pJumpTable2 = 0;
@@ -739,27 +742,36 @@ void TextSearch::RESrchPrepare( const 
::com::sun::star::util::SearchOptions& rOp
                nIcuSearchFlags |= UREGEX_CASE_INSENSITIVE;
        UErrorCode nIcuErr = U_ZERO_ERROR;
        // assumption: transliteration didn't mangle regexp control chars
-       IcuUniString aIcuSearchPatStr( (const UChar*)rPatternStr.getStr(), 
rPatternStr.getLength());
 #ifndef DISABLE_WORDBOUND_EMULATION
        // for conveniance specific syntax elements of the old regex engine are 
emulated
        // - by replacing \< with "word-break followed by a look-ahead 
word-char"
-       static const IcuUniString aChevronPatternB( "\\\\<", -1, 
IcuUniString::kInvariant);
-       static const IcuUniString aChevronReplaceB( "\\\\b(?=\\\\w)", -1, 
IcuUniString::kInvariant);
-       static RegexMatcher aChevronMatcherB( aChevronPatternB, 0, nIcuErr);
-       aChevronMatcherB.reset( aIcuSearchPatStr);
-       aIcuSearchPatStr = aChevronMatcherB.replaceAll( aChevronReplaceB, 
nIcuErr);
-       aChevronMatcherB.reset();
+       static const ::rtl::OUString aChevronPatternB = 
::rtl::OUString::createFromAscii( "\\\\<" );
+       static const ::rtl::OUString aChevronReplaceB = 
::rtl::OUString::createFromAscii( "\\\\b(?=\\\\w)" );
+    URegularExpression *aChevronMatcherB = uregex_open( (const 
UChar*)aChevronPatternB.getStr(), -1, 0, NULL, &nIcuErr);
+    uregex_setText(aChevronMatcherB, (const UChar*)rPatternStr.getStr(), -1, 
&nIcuErr);
+    ::std::vector<sal_uInt16> replacedTextB(rPatternStr.getLength() * 2);
+    int32_t realSize = uregex_replaceAll(aChevronMatcherB, (const 
UChar*)aChevronReplaceB.getStr(), -1, (UChar*)replacedTextB.data(), 
replacedTextB.capacity(), &nIcuErr);
+    if (realSize > replacedTextB.capacity()) {
+        replacedTextB.reserve(realSize);
+        realSize = uregex_replaceAll(aChevronMatcherB, (const 
UChar*)aChevronReplaceB.getStr(), -1, (UChar*)replacedTextB.data(), 
replacedTextB.capacity(), &nIcuErr);
+    }
+       uregex_close(aChevronMatcherB);
        // - by replacing \> with "look-behind word-char followed by a 
word-break"
-       static const IcuUniString aChevronPatternE( "\\\\>", -1, 
IcuUniString::kInvariant);
-       static const IcuUniString aChevronReplaceE( "(?<=\\\\w)\\\\b", -1, 
IcuUniString::kInvariant);
-       static RegexMatcher aChevronMatcherE( aChevronPatternE, 0, nIcuErr);
-       aChevronMatcherE.reset( aIcuSearchPatStr);
-       aIcuSearchPatStr = aChevronMatcherE.replaceAll( aChevronReplaceE, 
nIcuErr);
-       aChevronMatcherE.reset();
+       static const ::rtl::OUString aChevronPatternE = 
::rtl::OUString::createFromAscii( "\\\\>" );
+       static const ::rtl::OUString aChevronReplaceE = 
::rtl::OUString::createFromAscii( "(?<=\\\\w)\\\\b" );
+    URegularExpression *aChevronMatcherE = uregex_open( (const 
UChar*)aChevronPatternE.getStr(), -1, 0, NULL, &nIcuErr);
+       uregex_setText(aChevronMatcherE, (const UChar*)replacedTextB.data(), 
-1, &nIcuErr);
+    ::std::vector<sal_uInt16> replacedTextE(replacedTextB.capacity() * 2);
+    realSize = uregex_replaceAll(aChevronMatcherE, (const 
UChar*)aChevronReplaceE.getStr(), -1, (UChar*)replacedTextE.data(), 
replacedTextE.capacity(), &nIcuErr);
+    if (realSize > replacedTextE.capacity()) {
+        replacedTextE.reserve(realSize);
+        realSize = uregex_replaceAll(aChevronMatcherE, (const 
UChar*)aChevronReplaceE.getStr(), -1, (UChar*)replacedTextE.data(), 
replacedTextE.capacity(), &nIcuErr);
+    }
+       uregex_close(aChevronMatcherE);
 #endif
-       pRegexMatcher = new RegexMatcher( aIcuSearchPatStr, nIcuSearchFlags, 
nIcuErr);
+       pRegex = uregex_open( (const UChar*)replacedTextE.data(), -1, 
nIcuSearchFlags, NULL, &nIcuErr);
        if( nIcuErr)
-               { delete pRegexMatcher; pRegexMatcher = NULL;}
+               { uregex_close(pRegex); pRegex = NULL;}
 }
 
 //---------------------------------------------------------------------------
@@ -770,7 +782,7 @@ SearchResult TextSearch::RESrchFrwrd( const OUString& 
searchStr,
 {
        SearchResult aRet;
        aRet.subRegExpressions = 0;
-       if( !pRegexMatcher)
+       if( !pRegex)
                return aRet;
 
        if( endPos > searchStr.getLength())
@@ -778,17 +790,17 @@ SearchResult TextSearch::RESrchFrwrd( const OUString& 
searchStr,
 
        // use the ICU RegexMatcher to find the matches
        UErrorCode nIcuErr = U_ZERO_ERROR;
-       const IcuUniString aSearchTargetStr( (const UChar*)searchStr.getStr(), 
endPos);
-       pRegexMatcher->reset( aSearchTargetStr);
+       const ::rtl::OUString aSearchTargetStr = searchStr.copy(0, endPos);
+       uregex_setText(pRegex, (const UChar*)aSearchTargetStr.getStr(), -1, 
&nIcuErr);
        // search until there is a valid match
        for(;;)
        {
-               if( !pRegexMatcher->find( startPos, nIcuErr))
+               if( !uregex_find(pRegex, startPos, &nIcuErr))
                        return aRet;
 
                // #i118887# ignore zero-length matches e.g. "a*" in "bc"
-               int nStartOfs = pRegexMatcher->start( nIcuErr);
-               int nEndOfs = pRegexMatcher->end( nIcuErr);
+               int nStartOfs = uregex_start(pRegex, 0, &nIcuErr);
+               int nEndOfs = uregex_end(pRegex, 0, &nIcuErr);
                if( nStartOfs < nEndOfs)
                        break;
                // try at next position if there was a zero-length match
@@ -797,15 +809,15 @@ SearchResult TextSearch::RESrchFrwrd( const OUString& 
searchStr,
        }
 
        // extract the result of the search
-       const int nGroupCount = pRegexMatcher->groupCount();
+       const int nGroupCount = uregex_groupCount(pRegex, &nIcuErr);
        aRet.subRegExpressions = nGroupCount + 1;
        aRet.startOffset.realloc( aRet.subRegExpressions);
        aRet.endOffset.realloc( aRet.subRegExpressions);
-       aRet.startOffset[0] = pRegexMatcher->start( nIcuErr);
-       aRet.endOffset[0]   = pRegexMatcher->end( nIcuErr);
+       aRet.startOffset[0] = uregex_start(pRegex, 0, &nIcuErr);
+       aRet.endOffset[0]   = uregex_end(pRegex, 0, &nIcuErr);
        for( int i = 1; i <= nGroupCount; ++i) {
-               aRet.startOffset[i] = pRegexMatcher->start( i, nIcuErr);
-               aRet.endOffset[i]   = pRegexMatcher->end( i, nIcuErr);
+               aRet.startOffset[i] = uregex_start(pRegex, i, &nIcuErr);
+               aRet.endOffset[i]   = uregex_end(pRegex, i, &nIcuErr);
        }
 
        return aRet;
@@ -818,7 +830,7 @@ SearchResult TextSearch::RESrchBkwrd( const OUString& 
searchStr,
        // NOTE: for backwards search callers provide startPos/endPos inverted!
        SearchResult aRet;
        aRet.subRegExpressions = 0;
-       if( !pRegexMatcher)
+       if( !pRegex)
                return aRet;
 
        if( startPos > searchStr.getLength())
@@ -828,37 +840,37 @@ SearchResult TextSearch::RESrchBkwrd( const OUString& 
searchStr,
        // TODO: use ICU's backward searching once it becomes available
        //       as its replacement using forward search is not as good as the 
real thing
        UErrorCode nIcuErr = U_ZERO_ERROR;
-       const IcuUniString aSearchTargetStr( (const UChar*)searchStr.getStr(), 
startPos);
-       pRegexMatcher->reset( aSearchTargetStr);
-       if( !pRegexMatcher->find( endPos, nIcuErr))
+       const ::rtl::OUString aSearchTargetStr = searchStr.copy(0, startPos);
+       uregex_setText(pRegex, (const UChar*)aSearchTargetStr.getStr(), -1, 
&nIcuErr);
+       if( !uregex_find(pRegex, endPos, &nIcuErr))
                return aRet;
 
        // find the last match
        int nLastPos = 0;
        int nFoundEnd = 0;
        do {
-               nLastPos = pRegexMatcher->start( nIcuErr);
-               nFoundEnd = pRegexMatcher->end( nIcuErr);
+               nLastPos = uregex_start(pRegex, 0, &nIcuErr);
+               nFoundEnd = uregex_end(pRegex, 0, &nIcuErr);
                if( nFoundEnd >= startPos)
                        break;
                if( nFoundEnd == nLastPos)
                        ++nFoundEnd;
-       } while( pRegexMatcher->find( nFoundEnd, nIcuErr));
+       } while( uregex_find(pRegex, nFoundEnd, &nIcuErr));
 
        // find last match again to get its details
-       pRegexMatcher->find( nLastPos, nIcuErr);
+       uregex_find(pRegex, nLastPos, &nIcuErr);
 
        // fill in the details of the last match
-       const int nGroupCount = pRegexMatcher->groupCount();
+       const int nGroupCount = uregex_groupCount(pRegex, &nIcuErr);
        aRet.subRegExpressions = nGroupCount + 1;
        aRet.startOffset.realloc( aRet.subRegExpressions);
        aRet.endOffset.realloc( aRet.subRegExpressions);
        // NOTE: existing users of backward search seem to expect 
startOfs/endOfs being inverted!
-       aRet.startOffset[0] = pRegexMatcher->end( nIcuErr);
-       aRet.endOffset[0]   = pRegexMatcher->start( nIcuErr);
+       aRet.startOffset[0] = uregex_end(pRegex, 0, &nIcuErr);
+       aRet.endOffset[0]   = uregex_start(pRegex, 0, &nIcuErr);
        for( int i = 1; i <= nGroupCount; ++i) {
-               aRet.startOffset[i] = pRegexMatcher->end( i, nIcuErr);
-               aRet.endOffset[i]   = pRegexMatcher->start( i, nIcuErr);
+               aRet.startOffset[i] = uregex_end(pRegex, i, &nIcuErr);
+               aRet.endOffset[i]   = uregex_start(pRegex, i, &nIcuErr);
        }
 
        return aRet;
diff --git a/main/i18npool/source/search/textsearch.hxx 
b/main/i18npool/source/search/textsearch.hxx
index 793066c9b6..f5a89e1816 100644
--- a/main/i18npool/source/search/textsearch.hxx
+++ b/main/i18npool/source/search/textsearch.hxx
@@ -34,9 +34,9 @@
 
 #include <map>
 
-#include <unicode/regex.h>
-using namespace U_ICU_NAMESPACE;
-typedef U_ICU_NAMESPACE::UnicodeString IcuUniString;
+#define U_SHOW_CPLUSPLUS_API 0
+#define U_SHOW_CPLUSPLUS_HEADER_API 0
+#include <unicode/uregex.h>
 
 class WLevDistance;
 typedef ::std::map< sal_Unicode, sal_Int32 > TextSearchJumpTable;
@@ -92,7 +92,7 @@ class TextSearch: public cppu::WeakImplHelper2
                                                        
throw(::com::sun::star::uno::RuntimeException);
 
        // Members and methods for the regular expression search
-       RegexMatcher* pRegexMatcher;
+       URegularExpression* pRegex;
        ::com::sun::star::util::SearchResult SAL_CALL
                RESrchFrwrd( const ::rtl::OUString& searchStr,
                                                                sal_Int32 
startPos, sal_Int32 endPos )

(openoffice) 09/14: Use only the C API for ICU in i18npool, so it can use newer ICU versions.

Reply via email to