sw/CppunitTest_sw_tox.mk | 50 +++++++ sw/Library_sw.mk | 1 sw/Module_sw.mk | 4 sw/inc/ToxWhitespaceStripper.hxx | 52 +++++++ sw/qa/cppunit/tox/test_ToxWhitespaceStripper.cxx | 150 +++++++++++++++++++++++ sw/source/core/tox/ToxTextGenerator.cxx | 36 ----- sw/source/core/tox/ToxWhitespaceStripper.cxx | 62 +++++++++ 7 files changed, 324 insertions(+), 31 deletions(-)
New commits: commit e5345f62bf525b6258736f1ce11a61b5e638e0ff Author: Tobias Lippert <[email protected]> Date: Sun Jun 1 14:15:35 2014 +0200 Unittest the whitespace stripping logic for tox text generation The separate class allows to unittest this functionality in isoloation. Change-Id: I1e5eddfb455ca85a662ea38c03302883decc5d58 Reviewed-on: https://gerrit.libreoffice.org/9608 Tested-by: Caolán McNamara <[email protected]> Reviewed-by: Caolán McNamara <[email protected]> diff --git a/sw/CppunitTest_sw_tox.mk b/sw/CppunitTest_sw_tox.mk new file mode 100644 index 0000000..f372442 --- /dev/null +++ b/sw/CppunitTest_sw_tox.mk @@ -0,0 +1,50 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +# This file contains the unit test definition for class in the sources/core/tox subfolder +# The macro which defines the main method is contained in test_ToxWhitespaceStripper.cxx + +$(eval $(call gb_CppunitTest_CppunitTest,sw_tox_test)) + +$(eval $(call gb_CppunitTest_add_exception_objects,sw_tox_test, \ + sw/qa/cppunit/tox/test_ToxWhitespaceStripper \ +)) + +$(eval $(call gb_CppunitTest_use_libraries,sw_tox_test, \ + comphelper \ + cppu \ + cppuhelper \ + sal \ + svt \ + sw \ + test \ + unotest \ + vcl \ + tl \ + utl \ + $(gb_UWINAPI) \ +)) + +$(eval $(call gb_CppunitTest_use_externals,sw_tox_test, \ + boost_headers \ + libxml2 \ +)) + +$(eval $(call gb_CppunitTest_use_api,sw_tox_test,\ + offapi \ + udkapi \ +)) + +$(eval $(call gb_CppunitTest_set_include,sw_tox_test,\ + -I$(SRCDIR)/sw/inc \ + -I$(SRCDIR)/sw/source/core/inc \ + $$(INCLUDE) \ +)) + +# vim: set noet sw=4 ts=4: diff --git a/sw/Library_sw.mk b/sw/Library_sw.mk index c9804a0..a7500ad 100644 --- a/sw/Library_sw.mk +++ b/sw/Library_sw.mk @@ -389,6 +389,7 @@ $(eval $(call gb_Library_add_exception_objects,sw,\ sw/source/core/tox/toxhlp \ sw/source/core/tox/txmsrt \ sw/source/core/tox/ToxTextGenerator \ + sw/source/core/tox/ToxWhitespaceStripper \ sw/source/core/txtnode/SwGrammarContact \ sw/source/core/txtnode/atrfld \ sw/source/core/txtnode/atrflyin \ diff --git a/sw/Module_sw.mk b/sw/Module_sw.mk index 53a19bb..7e71e2c 100644 --- a/sw/Module_sw.mk +++ b/sw/Module_sw.mk @@ -46,6 +46,10 @@ $(eval $(call gb_Module_add_targets,sw,\ endif +$(eval $(call gb_Module_add_check_targets,sw,\ + CppunitTest_sw_tox \ +)) + $(eval $(call gb_Module_add_slowcheck_targets,sw,\ CppunitTest_sw_uwriter \ CppunitTest_sw_htmlexport \ diff --git a/sw/inc/ToxWhitespaceStripper.hxx b/sw/inc/ToxWhitespaceStripper.hxx new file mode 100644 index 0000000..96b254b --- /dev/null +++ b/sw/inc/ToxWhitespaceStripper.hxx @@ -0,0 +1,52 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + + +#ifndef TOXWHITESPACESTRIPPER_HXX_ +#define TOXWHITESPACESTRIPPER_HXX_ + +#include "rtl/ustring.hxx" + +#include <vector> + +namespace sw { + +/** This class helps to remove unwanted whitespaces from a string to use in a Tox. + * + * The new string will have + * - Newlines changed to spaces + * - Consecutive spaces merged + * - Trailing spaces removed + * + * It also allows to find the corresponding new positions of the input string in the stripped string. + * This is important for attributes which might have to be imported, e.g., it helps to answer the question: + * The 3rd character of the input string is subscript, which character in the output string is that? + * + * @note One leading whitespace is preserved. + */ +class SAL_DLLPUBLIC ToxWhitespaceStripper { +public: + ToxWhitespaceStripper(const OUString&); + + sal_Int32 + GetPositionInStrippedString(sal_Int32 pos) const; + + OUString + GetStrippedString() const; + +private: + OUString mStripped; + std::vector<sal_Int32> mNewPositions; +}; + +} // end namespace sw + + + +#endif /* TOXWHITESPACESTRIPPER_HXX_ */ diff --git a/sw/qa/cppunit/tox/test_ToxWhitespaceStripper.cxx b/sw/qa/cppunit/tox/test_ToxWhitespaceStripper.cxx new file mode 100644 index 0000000..fdbd47c --- /dev/null +++ b/sw/qa/cppunit/tox/test_ToxWhitespaceStripper.cxx @@ -0,0 +1,150 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <stdexcept> + +#include <sal/types.h> + +#include <rtl/ustring.hxx> + +#include <ToxWhitespaceStripper.hxx> + +#include <cppunit/TestAssert.h> +#include <cppunit/TestFixture.h> +#include <cppunit/extensions/HelperMacros.h> +#include <cppunit/plugin/TestPlugIn.h> + +using namespace sw; + +class ToxWhitespaceStripperTest : public CppUnit::TestFixture +{ + void + MappingCharactersToVariousStrippedStringsWorks(); + + void + StrippingWhitespacesFromVariousStringsWorks(); + + void + PositionAfterStringCanBeRequested(); + + CPPUNIT_TEST_SUITE(ToxWhitespaceStripperTest); + CPPUNIT_TEST(MappingCharactersToVariousStrippedStringsWorks); + CPPUNIT_TEST(StrippingWhitespacesFromVariousStringsWorks); + CPPUNIT_TEST(PositionAfterStringCanBeRequested); + + CPPUNIT_TEST_SUITE_END(); + +}; + +void +ToxWhitespaceStripperTest::MappingCharactersToVariousStrippedStringsWorks() +{ + { + OUString test("abc\n"); + ToxWhitespaceStripper sut(test); + CPPUNIT_ASSERT_EQUAL(0, sut.GetPositionInStrippedString(0)); + CPPUNIT_ASSERT_EQUAL(1, sut.GetPositionInStrippedString(1)); + CPPUNIT_ASSERT_EQUAL(2, sut.GetPositionInStrippedString(2)); + CPPUNIT_ASSERT_EQUAL(3, sut.GetPositionInStrippedString(3)); + } + { + OUString test("abc\n\n"); + ToxWhitespaceStripper sut(test); + CPPUNIT_ASSERT_EQUAL(0, sut.GetPositionInStrippedString(0)); + CPPUNIT_ASSERT_EQUAL(1, sut.GetPositionInStrippedString(1)); + CPPUNIT_ASSERT_EQUAL(2, sut.GetPositionInStrippedString(2)); + CPPUNIT_ASSERT_EQUAL(3, sut.GetPositionInStrippedString(3)); + CPPUNIT_ASSERT_EQUAL(3, sut.GetPositionInStrippedString(4)); + } + { + OUString test("abc\ndef"); + ToxWhitespaceStripper sut(test); + CPPUNIT_ASSERT_EQUAL(0, sut.GetPositionInStrippedString(0)); + CPPUNIT_ASSERT_EQUAL(1, sut.GetPositionInStrippedString(1)); + CPPUNIT_ASSERT_EQUAL(2, sut.GetPositionInStrippedString(2)); + CPPUNIT_ASSERT_EQUAL(3, sut.GetPositionInStrippedString(3)); + CPPUNIT_ASSERT_EQUAL(4, sut.GetPositionInStrippedString(4)); + CPPUNIT_ASSERT_EQUAL(5, sut.GetPositionInStrippedString(5)); + CPPUNIT_ASSERT_EQUAL(6, sut.GetPositionInStrippedString(6)); + } + { + // 012345 6789 + OUString test(" abc \ndef"); + // 01234567 + // " abc def" + ToxWhitespaceStripper sut(test); + CPPUNIT_ASSERT_EQUAL(0, sut.GetPositionInStrippedString(0)); + CPPUNIT_ASSERT_EQUAL(0, sut.GetPositionInStrippedString(1)); + CPPUNIT_ASSERT_EQUAL(1, sut.GetPositionInStrippedString(2)); + CPPUNIT_ASSERT_EQUAL(2, sut.GetPositionInStrippedString(3)); + CPPUNIT_ASSERT_EQUAL(3, sut.GetPositionInStrippedString(4)); + CPPUNIT_ASSERT_EQUAL(4, sut.GetPositionInStrippedString(5)); + CPPUNIT_ASSERT_EQUAL(4, sut.GetPositionInStrippedString(6)); + CPPUNIT_ASSERT_EQUAL(5, sut.GetPositionInStrippedString(7)); + CPPUNIT_ASSERT_EQUAL(6, sut.GetPositionInStrippedString(8)); + CPPUNIT_ASSERT_EQUAL(7, sut.GetPositionInStrippedString(9)); + } +} + +void +ToxWhitespaceStripperTest::StrippingWhitespacesFromVariousStringsWorks() +{ + { + OUString test("abc\n"); + OUString expected("abc"); + ToxWhitespaceStripper sut(test); + CPPUNIT_ASSERT_EQUAL(expected, sut.GetStrippedString()); + } + { + OUString test("abc\n\n"); + OUString expected("abc"); + ToxWhitespaceStripper sut(test); + CPPUNIT_ASSERT_EQUAL(expected, sut.GetStrippedString()); + } + { + OUString test("abc\ndef"); + OUString expected("abc def"); + ToxWhitespaceStripper sut(test); + CPPUNIT_ASSERT_EQUAL(expected, sut.GetStrippedString()); + } + { + OUString test(" abc \ndef"); + OUString expected(" abc def"); + ToxWhitespaceStripper sut(test); + CPPUNIT_ASSERT_EQUAL(expected, sut.GetStrippedString()); + } + { + OUString test(" "); + OUString expected(""); + ToxWhitespaceStripper sut(test); + CPPUNIT_ASSERT_EQUAL(expected, sut.GetStrippedString()); + } + { + OUString test("d "); + OUString expected("d"); + ToxWhitespaceStripper sut(test); + CPPUNIT_ASSERT_EQUAL(expected, sut.GetStrippedString()); + } +} + +void +ToxWhitespaceStripperTest::PositionAfterStringCanBeRequested() +{ + OUString test("abc"); + ToxWhitespaceStripper sut(test); + sal_Int32 expected = test.getLength(); + CPPUNIT_ASSERT_EQUAL(expected, sut.GetPositionInStrippedString(test.getLength())); +} + +// Put the test suite in the registry +CPPUNIT_TEST_SUITE_REGISTRATION(ToxWhitespaceStripperTest); + +CPPUNIT_PLUGIN_IMPLEMENT(); + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sw/source/core/tox/ToxTextGenerator.cxx b/sw/source/core/tox/ToxTextGenerator.cxx index 54d7b6a..8554c88 100644 --- a/sw/source/core/tox/ToxTextGenerator.cxx +++ b/sw/source/core/tox/ToxTextGenerator.cxx @@ -33,6 +33,7 @@ #include "fmtpdsc.hxx" #include "DocumentSettingManager.hxx" #include "SwStyleNameMapper.hxx" +#include "ToxWhitespaceStripper.hxx" #include "editeng/tstpitem.hxx" #include "editeng/lrspitem.hxx" @@ -49,32 +50,6 @@ struct LinkStruct nEndTextPos(nEnd) {} }; -/// Generate String with newlines changed to spaces, consecutive spaces changed -/// to a single space, and trailing space removed. -OUString lcl_RemoveLineBreaks(const OUString &rRet) -{ - if (rRet.isEmpty()) - return rRet; - sal_Int32 nOffset = 0; - OUStringBuffer sRet(rRet.replace('\n', ' ')); - for (sal_Int32 i = 1; i < sRet.getLength(); ++i) - { - if ( sRet[i - 1] == ' ' && sRet[i] == ' ' ) - { - nOffset += 1; - } - else - { - sRet[i - nOffset] = sRet[i]; - } - } - if (sRet[sRet.getLength() - 1] == ' ') - { - nOffset += 1; - } - return sRet.copy(0, sRet.getLength() - nOffset).toString(); -} - /// Generate String according to the Form and remove the /// special characters 0-31 and 255. static OUString lcl_GetNumString( const SwTOXSortTabBase& rBase, bool bUsePrefix, sal_uInt8 nLevel ) @@ -144,8 +119,8 @@ void ToxTextGenerator::GenerateText(SwDoc* pDoc, const std::vector<SwTOXSortTabB case TOKEN_ENTRY_TEXT: { SwIndex aIdx( pTOXNd, std::min(pTOXNd->GetTxt().getLength(),rTxt.getLength()) ); - rBase.FillText( *pTOXNd, aIdx ); - rTxt = lcl_RemoveLineBreaks(rTxt); + ToxWhitespaceStripper stripper(rBase.GetTxt().sText); + pTOXNd->InsertText(stripper.GetStrippedString(), aIdx); } break; @@ -153,10 +128,9 @@ void ToxTextGenerator::GenerateText(SwDoc* pDoc, const std::vector<SwTOXSortTabB { // for TOC numbering rTxt += lcl_GetNumString( rBase, true, MAXLEVEL ); - SwIndex aIdx( pTOXNd, rTxt.getLength() ); - rBase.FillText( *pTOXNd, aIdx ); - rTxt = lcl_RemoveLineBreaks(rTxt); + ToxWhitespaceStripper stripper(rBase.GetTxt().sText); + pTOXNd->InsertText(stripper.GetStrippedString(), aIdx); } break; diff --git a/sw/source/core/tox/ToxWhitespaceStripper.cxx b/sw/source/core/tox/ToxWhitespaceStripper.cxx new file mode 100644 index 0000000..b01c92c --- /dev/null +++ b/sw/source/core/tox/ToxWhitespaceStripper.cxx @@ -0,0 +1,62 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include "ToxWhitespaceStripper.hxx" + +#include "rtl/ustrbuf.hxx" +#include <boost/numeric/conversion/cast.hpp> + +namespace sw { + +ToxWhitespaceStripper::ToxWhitespaceStripper(const OUString& inputString) +{ + OUStringBuffer buffer; + + bool lastCharacterWasWhitespace = false; + for (sal_Int32 pos = 0; pos < inputString.getLength(); ++pos) { + sal_Unicode cur = inputString[pos]; + + if (cur == ' ' || cur == '\n') { + // merge consecutive whitespaces (and translate them to spaces) + if (!lastCharacterWasWhitespace) { + buffer.append(' '); + } + lastCharacterWasWhitespace = true; + } + else { + buffer.append(cur); + lastCharacterWasWhitespace = false; + } + mNewPositions.push_back(buffer.getLength()-1); + } + // Add one position if the position after the stripped string is requested, e.g., for attributes which + // extend beyond the string. + mNewPositions.push_back(buffer.getLength()); + // strip the last whitespace (if there was one) + if (lastCharacterWasWhitespace) { + buffer.truncate(buffer.getLength() - 1); + } + mStripped = buffer.getStr(); +} + + +sal_Int32 +ToxWhitespaceStripper::GetPositionInStrippedString(sal_Int32 pos) const +{ + size_t upos = boost::numeric_cast<size_t>(pos); + return mNewPositions.at(upos); +} + +OUString +ToxWhitespaceStripper::GetStrippedString() const +{ + return mStripped; +} + +}
_______________________________________________ Libreoffice-commits mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits
