sc/CppunitTest_sc_filter_html.mk   |    1 
 sc/qa/filter/html/html.cxx         |   32 +++++++++++++++++++++-
 sc/source/filter/html/htmlpars.cxx |   52 ++++++++++++++++++++++++-------------
 sc/source/filter/inc/htmlpars.hxx  |    3 ++
 4 files changed, 69 insertions(+), 19 deletions(-)

New commits:
commit f5f6db55a5938f37a1c136be904ad7f10a3438ef
Author:     Miklos Vajna <[email protected]>
AuthorDate: Wed Feb 7 08:12:02 2024 +0100
Commit:     Xisco Fauli <[email protected]>
CommitDate: Thu Feb 8 21:35:51 2024 +0100

    tdf#159483 sc HTML paste: handle data-sheets-value here, too
    
    HTML import into Calc could already create text cells, but HTML paste
    with the same content remained auto-converted to numbers
    unconditionally.
    
    Turns out HTML paste goes via ScHTMLLayoutParser instead of the HTML
    import's ScHTMLQueryParser, so the data-sheets-value was ignored for
    paste.
    
    Fix the problem by extracting the old data-sheets-value handler from
    ScHTMLQueryParser to a separate ParseDataSheetsValue(), and use it also
    in ScHTMLLayoutParser.
    
    For the actual handling, still only text is handled, no other formats
    yet.
    
    Change-Id: I0b2bf4665af331d07624ed42e30a24e31bfca331
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/163068
    Reviewed-by: Miklos Vajna <[email protected]>
    Tested-by: Jenkins
    (cherry picked from commit 543e52481e764b8e0eea6cf0123a77cf492bdf8e)
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/163130
    Reviewed-by: Caolán McNamara <[email protected]>
    Reviewed-by: Xisco Fauli <[email protected]>

diff --git a/sc/CppunitTest_sc_filter_html.mk b/sc/CppunitTest_sc_filter_html.mk
index b78349d64703..f3dec22c0866 100644
--- a/sc/CppunitTest_sc_filter_html.mk
+++ b/sc/CppunitTest_sc_filter_html.mk
@@ -58,6 +58,7 @@ $(eval $(call gb_CppunitTest_use_libraries,sc_filter_html, \
 $(eval $(call gb_CppunitTest_set_include,sc_filter_html,\
     -I$(SRCDIR)/sc/source/ui/inc \
     -I$(SRCDIR)/sc/inc \
+    -I$(SRCDIR)/sc/qa/unit \
     $$(INCLUDE) \
 ))
 
diff --git a/sc/qa/filter/html/html.cxx b/sc/qa/filter/html/html.cxx
index 76413c6455b4..ba50361e927e 100644
--- a/sc/qa/filter/html/html.cxx
+++ b/sc/qa/filter/html/html.cxx
@@ -16,16 +16,19 @@
 
 #include <comphelper/propertyvalue.hxx>
 
+#include <helper/qahelper.hxx>
+#include <impex.hxx>
+
 using namespace com::sun::star;
 
 namespace
 {
 /// Covers sc/source/filter/html/ fixes.
-class Test : public UnoApiXmlTest, public HtmlTestTools
+class Test : public ScModelTestBase, public HtmlTestTools
 {
 public:
     Test()
-        : UnoApiXmlTest("/sc/qa/filter/html/data/")
+        : ScModelTestBase("/sc/qa/filter/html/data/")
     {
     }
 };
@@ -55,6 +58,31 @@ CPPUNIT_TEST_FIXTURE(Test, testTdAsText)
     // i.e. data-sheets-value was ignored on import.
     CPPUNIT_ASSERT_EQUAL(table::CellContentType_TEXT, eType);
 }
+
+CPPUNIT_TEST_FIXTURE(Test, testPasteTdAsText)
+{
+    // Given an empty document:
+    createScDoc();
+
+    // When pasting HTML with an A2 cell that contains "01" as text:
+    ScDocument* pDoc = getScDoc();
+    ScAddress aCellPos(/*nColP=*/0, /*nRowP=*/0, /*nTabP=*/0);
+    ScImportExport aImporter(*pDoc, aCellPos);
+    SvFileStream aFile(createFileURL(u"text.html"), StreamMode::READ);
+    SvMemoryStream aMemory;
+    aMemory.WriteStream(aFile);
+    aMemory.Seek(0);
+    CPPUNIT_ASSERT(aImporter.ImportStream(aMemory, OUString(), 
SotClipboardFormatId::HTML));
+
+    // Then make sure "01" is not auto-converted to 1, as a number:
+    aCellPos = ScAddress(/*nColP=*/0, /*nRowP=*/1, /*nTabP=*/0);
+    CellType eCellType = pDoc->GetCellType(aCellPos);
+    // Without the accompanying fix in place, this test would have failed with:
+    // - Expected: 2 (CELLTYPE_STRING)
+    // - Actual  : 1 (CELLTYPE_VALUE)
+    // i.e. data-sheets-value was ignored on paste.
+    CPPUNIT_ASSERT_EQUAL(CELLTYPE_STRING, eCellType);
+}
 }
 
 CPPUNIT_PLUGIN_IMPLEMENT();
diff --git a/sc/source/filter/html/htmlpars.cxx 
b/sc/source/filter/html/htmlpars.cxx
index 666ab7427e1a..c31e97c54621 100644
--- a/sc/source/filter/html/htmlpars.cxx
+++ b/sc/source/filter/html/htmlpars.cxx
@@ -72,6 +72,31 @@
 using ::editeng::SvxBorderLine;
 using namespace ::com::sun::star;
 
+namespace
+{
+/// data-sheets-value from google sheets, value is a JSON.
+void ParseDataSheetsValue(const OUString& rDataSheetsValue, sal_uInt32& 
rNumberFormat)
+{
+    // data-sheets-value from google sheets, value is a JSON.
+    OString aEncodedOption = rDataSheetsValue.toUtf8();
+    const char* pEncodedOption = aEncodedOption.getStr();
+    std::stringstream aStream(pEncodedOption);
+    boost::property_tree::ptree aTree;
+    boost::property_tree::read_json(aStream, aTree);
+    // The "1" key describes the original data type.
+    auto it = aTree.find("1");
+    if (it != aTree.not_found())
+    {
+        int nValueType = std::stoi(it->second.get_value<std::string>());
+        // 2 is text.
+        if (nValueType == 2)
+        {
+            rNumberFormat = NF_STANDARD_FORMAT_TEXT;
+        }
+    }
+}
+}
+
 ScHTMLStyles::ScHTMLStyles() : maEmpty() {}
 
 void ScHTMLStyles::add(const char* pElemName, size_t nElemName, const char* 
pClassName, size_t nClassName,
@@ -911,6 +936,7 @@ void ScHTMLLayoutParser::TableDataOn( HtmlImportInfo* pInfo 
)
     bInCell = true;
     bool bHorJustifyCenterTH = (pInfo->nToken == HtmlTokenId::TABLEHEADER_ON);
     const HTMLOptions& rOptions = 
static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
+    sal_uInt32 nNumberFormat = NUMBERFORMAT_ENTRY_NOT_FOUND;
     for (const auto & rOption : rOptions)
     {
         switch( rOption.GetToken() )
@@ -979,10 +1005,18 @@ void ScHTMLLayoutParser::TableDataOn( HtmlImportInfo* 
pInfo )
                 mxActEntry->pNumStr = rOption.GetString();
             }
             break;
+            case HtmlOptionId::DSVAL:
+            {
+                ParseDataSheetsValue(rOption.GetString(), nNumberFormat);
+            }
+            break;
             default: break;
         }
     }
 
+    if (nNumberFormat != NUMBERFORMAT_ENTRY_NOT_FOUND)
+        mxActEntry->aItemSet.Put(SfxUInt32Item(ATTR_VALUE_FORMAT, 
nNumberFormat));
+
     mxActEntry->nCol = nColCnt;
     mxActEntry->nRow = nRowCnt;
     mxActEntry->nTab = nTable;
@@ -2119,23 +2153,7 @@ void ScHTMLTable::DataOn( const HtmlImportInfo& rInfo )
                 break;
                 case HtmlOptionId::DSVAL:
                 {
-                    // data-sheets-value from google sheets, value is a JSON.
-                    OString aEncodedOption = rOption.GetString().toUtf8();
-                    const char* pEncodedOption = aEncodedOption.getStr();
-                    std::stringstream aStream(pEncodedOption);
-                    boost::property_tree::ptree aTree;
-                    boost::property_tree::read_json(aStream, aTree);
-                    // The "1" key describes the original data type.
-                    auto it = aTree.find("1");
-                    if (it != aTree.not_found())
-                    {
-                        int nValueType = 
std::stoi(it->second.get_value<std::string>());
-                        // 2 is text.
-                        if (nValueType == 2)
-                        {
-                            nNumberFormat = NF_STANDARD_FORMAT_TEXT;
-                        }
-                    }
+                    ParseDataSheetsValue(rOption.GetString(), nNumberFormat);
                 }
                 break;
                 default: break;
diff --git a/sc/source/filter/inc/htmlpars.hxx 
b/sc/source/filter/inc/htmlpars.hxx
index fcdf6b4443fa..5b2d441098f3 100644
--- a/sc/source/filter/inc/htmlpars.hxx
+++ b/sc/source/filter/inc/htmlpars.hxx
@@ -149,6 +149,7 @@ class HTMLOption;
 typedef ::std::map<SCROW, SCROW> InnerMap;
 typedef ::std::map<sal_uInt16, InnerMap*> OuterMap;
 
+/// HTML parser used during paste into Calc.
 class ScHTMLLayoutParser : public ScHTMLParser
 {
 private:
@@ -575,6 +576,8 @@ public:
 
     Builds the table structure correctly, ignores extended formatting like
     pictures or column widths.
+
+    Used during file load / import into Calc.
  */
 class ScHTMLQueryParser : public ScHTMLParser
 {

Reply via email to