include/svtools/htmlkywd.hxx       |    1 
 include/svtools/htmltokn.h         |    1 
 sc/CppunitTest_sc_filter_html.mk   |   82 +++++++++++++++++++++++++++++++++++++
 sc/Module_sc.mk                    |    1 
 sc/qa/filter/html/data/text.html   |    8 +++
 sc/qa/filter/html/html.cxx         |   62 +++++++++++++++++++++++++++
 sc/source/filter/html/htmlpars.cxx |   22 +++++++++
 svtools/source/svhtml/htmlkywd.cxx |    1 
 8 files changed, 178 insertions(+)

New commits:
commit 0156a797ff62172354f95cb104be2d4893c89875
Author:     Miklos Vajna <[email protected]>
AuthorDate: Thu Feb 1 13:10:26 2024 +0100
Commit:     Xisco Fauli <[email protected]>
CommitDate: Fri Feb 2 12:51:13 2024 +0100

    tdf#159483 sc HTML import: handle data-sheets-value attribute for the text 
case
    
    The A2 cell in the bugdoc has 01 in it, which was auto-converted to 1
    (float) value on import, even if it was text originally.
    
    This is hard to solve for HTML in general, which is not typed, but this
    input is coming from google sheets, which has an additional
    data-sheets-value attribute on <td> that does tell us about the type of
    the cell.
    
    Fix the problem by handling that attribute, and in case it explicitly
    says it's text, then apply the matching number format.
    
    Other types are not yet handled.
    
    Change-Id: I2986ef864e97d9c46d191aba25ca5740a1151a71
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/162869
    Reviewed-by: Miklos Vajna <[email protected]>
    Tested-by: Jenkins
    (cherry picked from commit e6e5660b726ecf3b0c39b277568568973b43c9f0)
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/162893
    Reviewed-by: Xisco Fauli <[email protected]>

diff --git a/include/svtools/htmlkywd.hxx b/include/svtools/htmlkywd.hxx
index 00c8260749bd..23e836ea7cea 100644
--- a/include/svtools/htmlkywd.hxx
+++ b/include/svtools/htmlkywd.hxx
@@ -445,6 +445,7 @@
 #define OOO_STRING_SVTOOLS_HTML_O_title "title"
 #define OOO_STRING_SVTOOLS_HTML_O_value "value"
 #define OOO_STRING_SVTOOLS_HTML_O_SDval "sdval"
+#define OOO_STRING_SVTOOLS_HTML_O_DSval "data-sheets-value"
 #define OOO_STRING_SVTOOLS_HTML_O_SDnum "sdnum"
 #define OOO_STRING_SVTOOLS_HTML_O_sdlibrary "sdlibrary"
 #define OOO_STRING_SVTOOLS_HTML_O_sdmodule "sdmodule"
diff --git a/include/svtools/htmltokn.h b/include/svtools/htmltokn.h
index 4a333ee2f6d9..27370e5cb869 100644
--- a/include/svtools/htmltokn.h
+++ b/include/svtools/htmltokn.h
@@ -344,6 +344,7 @@ STRING_START        = BOOL_END,
     TITLE,
     VALUE,
     SDVAL, // StarDiv NumberValue
+    DSVAL,
     SDNUM, // StarDiv NumberFormat
     SDLIBRARY,
     SDMODULE,
diff --git a/sc/CppunitTest_sc_filter_html.mk b/sc/CppunitTest_sc_filter_html.mk
new file mode 100644
index 000000000000..b78349d64703
--- /dev/null
+++ b/sc/CppunitTest_sc_filter_html.mk
@@ -0,0 +1,82 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+$(eval $(call gb_CppunitTest_CppunitTest,sc_filter_html))
+
+$(eval $(call gb_CppunitTest_use_common_precompiled_header,sc_filter_html))
+
+$(eval $(call gb_CppunitTest_add_exception_objects,sc_filter_html, \
+    sc/qa/filter/html/html \
+))
+
+$(eval $(call gb_CppunitTest_use_externals,sc_filter_html, \
+       boost_headers \
+       libxml2 \
+))
+
+$(eval $(call gb_CppunitTest_use_libraries,sc_filter_html, \
+    basegfx \
+    comphelper \
+    cppu \
+    cppuhelper \
+    drawinglayer \
+    drawinglayercore \
+    editeng \
+    for \
+    forui \
+    i18nlangtag \
+    msfilter \
+    oox \
+    sal \
+    salhelper \
+    sax \
+    sc \
+    scqahelper \
+    sfx \
+    sot \
+    subsequenttest \
+    svl \
+    svt \
+    svx \
+    svxcore \
+       test \
+    tk \
+    tl \
+    ucbhelper \
+       unotest \
+    utl \
+    vcl \
+    xo \
+))
+
+$(eval $(call gb_CppunitTest_set_include,sc_filter_html,\
+    -I$(SRCDIR)/sc/source/ui/inc \
+    -I$(SRCDIR)/sc/inc \
+    $$(INCLUDE) \
+))
+
+$(eval $(call gb_CppunitTest_use_api,sc_filter_html,\
+       udkapi \
+       offapi \
+       oovbaapi \
+))
+
+$(eval $(call gb_CppunitTest_use_packages,sc_filter_html, \
+    filter_xhtml \
+    filter_xslt \
+))
+
+$(eval $(call gb_CppunitTest_use_ure,sc_filter_html))
+$(eval $(call gb_CppunitTest_use_vcl,sc_filter_html))
+
+$(eval $(call gb_CppunitTest_use_rdb,sc_filter_html,services))
+
+$(eval $(call gb_CppunitTest_use_configuration,sc_filter_html))
+
+# vim: set noet sw=4 ts=4:
diff --git a/sc/Module_sc.mk b/sc/Module_sc.mk
index 0c2178b7fea1..a01c9115f349 100644
--- a/sc/Module_sc.mk
+++ b/sc/Module_sc.mk
@@ -96,6 +96,7 @@ $(eval $(call gb_Module_add_slowcheck_targets,sc, \
        CppunitTest_sc_uicalc2 \
        CppunitTest_sc_vba_macro_test \
        CppunitTest_sc_a11y \
+       CppunitTest_sc_filter_html \
 ))
 
 ifneq ($(ENABLE_JUMBO_SHEETS),)
diff --git a/sc/qa/filter/html/data/text.html b/sc/qa/filter/html/data/text.html
new file mode 100644
index 000000000000..eadb34b5e1f8
--- /dev/null
+++ b/sc/qa/filter/html/data/text.html
@@ -0,0 +1,8 @@
+<table>
+  <tr>
+    <td data-sheets-value="{&quot;1&quot;:3,&quot;3&quot;:1}">1</td>
+  </tr>
+  <tr>
+    <td 
data-sheets-value="{&quot;1&quot;:2,&quot;2&quot;:&quot;01&quot;,&quot;6&quot;:1}">01</td>
+  </tr>
+</table>
diff --git a/sc/qa/filter/html/html.cxx b/sc/qa/filter/html/html.cxx
new file mode 100644
index 000000000000..76413c6455b4
--- /dev/null
+++ b/sc/qa/filter/html/html.cxx
@@ -0,0 +1,62 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <test/unoapixml_test.hxx>
+#include <test/htmltesttools.hxx>
+
+#include <com/sun/star/beans/XPropertySet.hpp>
+#include <com/sun/star/sheet/XSpreadsheetDocument.hpp>
+#include <com/sun/star/table/XCellRange.hpp>
+
+#include <comphelper/propertyvalue.hxx>
+
+using namespace com::sun::star;
+
+namespace
+{
+/// Covers sc/source/filter/html/ fixes.
+class Test : public UnoApiXmlTest, public HtmlTestTools
+{
+public:
+    Test()
+        : UnoApiXmlTest("/sc/qa/filter/html/data/")
+    {
+    }
+};
+
+CPPUNIT_TEST_FIXTURE(Test, testTdAsText)
+{
+    // Given a document with an A2 cell that contains "02" as text:
+    OUString aURL = createFileURL(u"text.html");
+
+    // When loading that document to Calc:
+    uno::Sequence<beans::PropertyValue> aParams = {
+        comphelper::makePropertyValue("DocumentService",
+                                      
OUString("com.sun.star.sheet.SpreadsheetDocument")),
+    };
+    loadWithParams(aURL, aParams);
+
+    // Then make sure "01" is not auto-converted to 1, as a number:
+    uno::Reference<sheet::XSpreadsheetDocument> xDocument(mxComponent, 
uno::UNO_QUERY);
+    uno::Reference<container::XIndexAccess> xSheets(xDocument->getSheets(), 
uno::UNO_QUERY);
+    uno::Reference<table::XCellRange> xSheet(xSheets->getByIndex(0), 
uno::UNO_QUERY);
+    uno::Reference<beans::XPropertySet> xCell(xSheet->getCellByPosition(0, 1), 
uno::UNO_QUERY);
+    table::CellContentType eType{};
+    xCell->getPropertyValue("CellContentType") >>= eType;
+    // Without the accompanying fix in place, this test would have failed with:
+    // - Expected: 2 (TEXT)
+    // - Actual  : 1 (VALUE)
+    // i.e. data-sheets-value was ignored on import.
+    CPPUNIT_ASSERT_EQUAL(table::CellContentType_TEXT, eType);
+}
+}
+
+CPPUNIT_PLUGIN_IMPLEMENT();
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/filter/html/htmlpars.cxx 
b/sc/source/filter/html/htmlpars.cxx
index f5f890081592..666ab7427e1a 100644
--- a/sc/source/filter/html/htmlpars.cxx
+++ b/sc/source/filter/html/htmlpars.cxx
@@ -61,6 +61,7 @@
 #include <rangelst.hxx>
 
 #include <orcus/css_parser.hpp>
+#include <boost/property_tree/json_parser.hpp>
 
 #include <com/sun/star/document/XDocumentProperties.hpp>
 #include <com/sun/star/document/XDocumentPropertiesSupplier.hpp>
@@ -2116,6 +2117,27 @@ void ScHTMLTable::DataOn( const HtmlImportInfo& rInfo )
                     }
                 }
                 break;
+                case HtmlOptionId::DSVAL:
+                {
+                    // data-sheets-value from google sheets, value is a JSON.
+                    OString aEncodedOption = rOption.GetString().toUtf8();
+                    const char* pEncodedOption = aEncodedOption.getStr();
+                    std::stringstream aStream(pEncodedOption);
+                    boost::property_tree::ptree aTree;
+                    boost::property_tree::read_json(aStream, aTree);
+                    // The "1" key describes the original data type.
+                    auto it = aTree.find("1");
+                    if (it != aTree.not_found())
+                    {
+                        int nValueType = 
std::stoi(it->second.get_value<std::string>());
+                        // 2 is text.
+                        if (nValueType == 2)
+                        {
+                            nNumberFormat = NF_STANDARD_FORMAT_TEXT;
+                        }
+                    }
+                }
+                break;
                 default: break;
             }
         }
diff --git a/svtools/source/svhtml/htmlkywd.cxx 
b/svtools/source/svhtml/htmlkywd.cxx
index d1b0ea2ee03e..f5799434b72a 100644
--- a/svtools/source/svhtml/htmlkywd.cxx
+++ b/svtools/source/svhtml/htmlkywd.cxx
@@ -524,6 +524,7 @@ static HTML_OptionEntry aHTMLOptionTab[] = {
     {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_title),     
HtmlOptionId::TITLE},
     {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_value),     
HtmlOptionId::VALUE},
     {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDval),     
HtmlOptionId::SDVAL}, // StarDiv NumberValue
+    {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_DSval),     
HtmlOptionId::DSVAL},
     {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDnum),     
HtmlOptionId::SDNUM}, // StarDiv NumberFormat
     {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_sdlibrary), 
HtmlOptionId::SDLIBRARY},
     {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_sdmodule),  
HtmlOptionId::SDMODULE},

Reply via email to