configure.ac | 2 vcl/inc/sft.hxx | 6 vcl/source/font/PhysicalFontFace.cxx | 220 ++++++++++++++++++++++++++--------- vcl/source/fontsubset/sft.cxx | 2 vcl/source/pdf/pdfwriter_impl.cxx | 33 +++-- 5 files changed, 198 insertions(+), 65 deletions(-)
New commits: commit 76095f9252f8e88e995415ac94ce9920df539086 Author: Khaled Hosny <[email protected]> AuthorDate: Tue Feb 17 21:32:47 2026 +0200 Commit: Khaled Hosny <[email protected]> CommitDate: Fri Feb 20 22:28:55 2026 +0100 Use HarfBuzz subsetter to subset fonts for PDF This gives us a more up-to-date subsetter and would allow dropping most of our low-level, CVE-happy, font subsetting code. Update required HarfBuzz version to the version that provides all new APIs we are now using. Change-Id: I774ddf4f7af448a3cf17b64f89e8b1f36097775a Reviewed-on: https://gerrit.libreoffice.org/c/core/+/199572 Reviewed-by: Khaled Hosny <[email protected]> Tested-by: Jenkins diff --git a/configure.ac b/configure.ac index d18caa997ba8..9d02bbce9900 100644 --- a/configure.ac +++ b/configure.ac @@ -11679,7 +11679,7 @@ AC_SUBST(SYSTEM_LIBORCUS) dnl =================================================================== dnl HarfBuzz dnl =================================================================== -harfbuzz_required_version=5.1.0 +harfbuzz_required_version=7.3.0 GRAPHITE_CFLAGS_internal="-I${WORKDIR}/UnpackedTarball/graphite/include -DGRAPHITE2_STATIC" HARFBUZZ_CFLAGS_internal="-I${WORKDIR}/UnpackedTarball/harfbuzz/src" diff --git a/vcl/inc/sft.hxx b/vcl/inc/sft.hxx index 324d9994e0ff..a196ff2358f0 100644 --- a/vcl/inc/sft.hxx +++ b/vcl/inc/sft.hxx @@ -582,6 +582,12 @@ class TrueTypeFont; const sal_uInt8* pEncoding, int nGlyphCount, FontSubsetInfo& rInfo); + bool CreateCFFfontSubset(const unsigned char* pFontBytes, int nByteLength, + std::vector<sal_uInt8>& rOutBuffer, + const sal_GlyphId* pGlyphIds, + const sal_uInt8* pEncoding, + int nGlyphCount, FontSubsetInfo& rInfo); + /** * Returns global font information about the TrueType font. * @see TTGlobalFontInfo diff --git a/vcl/source/font/PhysicalFontFace.cxx b/vcl/source/font/PhysicalFontFace.cxx index 5a666b01322f..35205a1eab81 100644 --- a/vcl/source/font/PhysicalFontFace.cxx +++ b/vcl/source/font/PhysicalFontFace.cxx @@ -21,6 +21,7 @@ #include <sal/types.h> #include <tools/fontenum.hxx> +#include <tools/stream.hxx> #include <unotools/fontdefs.hxx> #include <osl/file.hxx> #include <osl/thread.h> @@ -33,10 +34,12 @@ #include <font/FontSelectPattern.hxx> #include <font/PhysicalFontFace.hxx> #include <o3tl/string_view.hxx> +#include <comphelper/scopeguard.hxx> #include <string_view> #include <hb-ot.h> +#include <hb-subset.h> namespace vcl::font { @@ -294,77 +297,190 @@ bool PhysicalFontFace::GetFontCapabilities(vcl::FontCapabilities& rFontCapabilit return rFontCapabilities.oUnicodeRange || rFontCapabilities.oCodePageRange; } -namespace -{ -class RawFace +// These are “private” HarfBuzz metrics tags, they are supported by not exposed +// in the public header. They are safe to use, HarfBuzz just does not want to +// advertise them. +constexpr auto ASCENT_OS2 = static_cast<hb_ot_metrics_tag_t>(HB_TAG('O', 'a', 's', 'c')); +constexpr auto DESCENT_OS2 = static_cast<hb_ot_metrics_tag_t>(HB_TAG('O', 'd', 's', 'c')); +constexpr auto ASCENT_HHEA = static_cast<hb_ot_metrics_tag_t>(HB_TAG('H', 'a', 's', 'c')); +constexpr auto DESCENT_HHEA = static_cast<hb_ot_metrics_tag_t>(HB_TAG('H', 'd', 's', 'c')); + +bool PhysicalFontFace::CreateFontSubset(std::vector<sal_uInt8>& rOutBuffer, + const sal_GlyphId* pGlyphIds, const sal_uInt8* pEncoding, + const int nGlyphCount, FontSubsetInfo& rInfo) const { -public: - RawFace(hb_face_t* pFace) - : mpFace(hb_face_reference(pFace)) - { - } + // Create subset input + hb_subset_input_t* pInput = hb_subset_input_create_or_fail(); + comphelper::ScopeGuard aInputGuard([&]() { hb_subset_input_destroy(pInput); }); + if (!pInput) + return false; - RawFace(const RawFace& rOther) - : mpFace(hb_face_reference(rOther.mpFace)) + // Add the requested glyph IDs to the subset input, and set up + // old-to-new glyph ID mapping so that each glyph appears at the + // GID position matching its encoding byte. + hb_set_t* pGlyphSet = hb_subset_input_glyph_set(pInput); + hb_map_t* pGlyphMap = hb_subset_input_old_to_new_glyph_mapping(pInput); + for (int i = 0; i < nGlyphCount; ++i) { + hb_set_add(pGlyphSet, pGlyphIds[i]); + hb_map_set(pGlyphMap, pGlyphIds[i], pEncoding[i]); } - ~RawFace() { hb_face_destroy(mpFace); } + // Keep only tables needed for PDF embedding, drop everything else. + // By default hb-subset keeps many tables; we use the DROP_TABLE set to + // remove all tables we don't need. + static constexpr hb_tag_t aKeepTables[] = { + HB_TAG('h', 'e', 'a', 'd'), HB_TAG('h', 'h', 'e', 'a'), HB_TAG('h', 'm', 't', 'x'), + HB_TAG('l', 'o', 'c', 'a'), HB_TAG('m', 'a', 'x', 'p'), HB_TAG('g', 'l', 'y', 'f'), + HB_TAG('C', 'F', 'F', ' '), HB_TAG('p', 'o', 's', 't'), HB_TAG('n', 'a', 'm', 'e'), + HB_TAG('O', 'S', '/', '2'), HB_TAG('c', 'v', 't', ' '), HB_TAG('f', 'p', 'g', 'm'), + HB_TAG('p', 'r', 'e', 'p'), + }; + + hb_set_t* pDropTableSet = hb_subset_input_set(pInput, HB_SUBSET_SETS_DROP_TABLE_TAG); + // Drop all tables except the ones we need + hb_set_invert(pDropTableSet); + for (auto nKeep : aKeepTables) + hb_set_del(pDropTableSet, nKeep); + + // Perform the subsettting + hb_face_t* pSubsetFace = hb_subset_or_fail(GetHbFace(), pInput); + comphelper::ScopeGuard aSubsetFaceGuard([&]() { hb_face_destroy(pSubsetFace); }); + if (!pSubsetFace) + return false; - RawFontData GetTable(uint32_t nTag) const + // Fill FontSubsetInfo + rInfo.m_aPSName = GetName(NAME_ID_POSTSCRIPT_NAME); + + auto nUPEM = UnitsPerEm(); + + hb_font_t* pSubsetFont = hb_font_create(pSubsetFace); + comphelper::ScopeGuard aSubsetFontGuard([&]() { hb_font_destroy(pSubsetFont); }); + hb_position_t nAscent, nDescent, nCapHeight; + // Try hhea first, then OS/2 similar to old FillFontSubsetInfo() + if (hb_ot_metrics_get_position(pSubsetFont, ASCENT_HHEA, &nAscent) + || hb_ot_metrics_get_position(pSubsetFont, ASCENT_OS2, &nAscent)) + rInfo.m_nAscent = XUnits(nUPEM, nAscent); + if (hb_ot_metrics_get_position(pSubsetFont, DESCENT_HHEA, &nDescent) + || hb_ot_metrics_get_position(pSubsetFont, DESCENT_OS2, &nDescent)) + rInfo.m_nDescent = XUnits(nUPEM, -nDescent); + if (hb_ot_metrics_get_position(pSubsetFont, HB_OT_METRICS_TAG_CAP_HEIGHT, &nCapHeight)) + rInfo.m_nCapHeight = XUnits(nUPEM, nCapHeight); + + hb_blob_t* pHeadBlob = hb_face_reference_table(pSubsetFace, HB_TAG('h', 'e', 'a', 'd')); + comphelper::ScopeGuard aHeadBlobGuard([&]() { hb_blob_destroy(pHeadBlob); }); + + unsigned int nHeadLen; + const char* pHead = hb_blob_get_data(pHeadBlob, &nHeadLen); + SvMemoryStream aStream(const_cast<char*>(pHead), nHeadLen, StreamMode::READ); + // Font data are big endian. + aStream.SetEndian(SvStreamEndian::BIG); + if (aStream.Seek(vcl::HEAD_yMax_offset) == vcl::HEAD_yMax_offset) { - return RawFontData(hb_face_reference_table(mpFace, nTag)); + sal_Int16 xMin, yMin, xMax, yMax; + aStream.Seek(vcl::HEAD_xMin_offset); + aStream.ReadInt16(xMin); + aStream.ReadInt16(yMin); + aStream.ReadInt16(xMax); + aStream.ReadInt16(yMax); + rInfo.m_aFontBBox = tools::Rectangle(Point(XUnits(nUPEM, xMin), XUnits(nUPEM, yMin)), + Point(XUnits(nUPEM, xMax), XUnits(nUPEM, yMax))); } -private: - hb_face_t* mpFace; -}; + rInfo.m_bFilled = true; -class TrueTypeFace final : public AbstractTrueTypeFont -{ - const RawFace m_aFace; - mutable std::array<RawFontData, NUM_TAGS> m_aTableList; + hb_blob_t* pSubsetBlob = nullptr; + comphelper::ScopeGuard aBuilderBlobGuard([&]() { hb_blob_destroy(pSubsetBlob); }); - const RawFontData& table(sal_uInt32 nIdx) const + // HarfBuzz creates a Unicode cmap, but we need a fake cmap based on pEncoding, + // so we use face builder construct a new face based in the subset table, + // and create a new cmap table and add it to the new face. { - assert(nIdx < NUM_TAGS); - static const uint32_t aTags[NUM_TAGS] = { - T_maxp, T_glyf, T_head, T_loca, T_name, T_hhea, T_hmtx, T_cmap, - T_vhea, T_vmtx, T_OS2, T_post, T_cvt, T_prep, T_fpgm, T_CFF, - }; - if (m_aTableList[nIdx].empty()) - m_aTableList[nIdx] = std::move(m_aFace.GetTable(aTags[nIdx])); - return m_aTableList[nIdx]; - } + hb_face_t* pBuilderFace = hb_face_builder_create(); + comphelper::ScopeGuard aBuilderFaceGuard([&]() { hb_face_destroy(pBuilderFace); }); + unsigned int nSubsetTableCount = hb_face_get_table_tags(pSubsetFace, 0, nullptr, nullptr); + std::vector<hb_tag_t> aSubsetTableTags(nSubsetTableCount); + hb_face_get_table_tags(pSubsetFace, 0, &nSubsetTableCount, aSubsetTableTags.data()); + for (unsigned int i = 0; i < nSubsetTableCount; ++i) + { + hb_blob_t* pTableBlob = hb_face_reference_table(pSubsetFace, aSubsetTableTags[i]); + hb_face_builder_add_table(pBuilderFace, aSubsetTableTags[i], pTableBlob); + hb_blob_destroy(pTableBlob); + } -public: - TrueTypeFace(RawFace aFace, const FontCharMapRef rCharMap) - : AbstractTrueTypeFont(nullptr, rCharMap) - , m_aFace(std::move(aFace)) - { + // Build a cmap table with a format 0 subtable + SvMemoryStream aCmapStream; + aCmapStream.SetEndian(SvStreamEndian::BIG); + + // cmap header + aCmapStream.WriteUInt16(0); // version + aCmapStream.WriteUInt16(1); // numTables + + // Encoding record + aCmapStream.WriteUInt16(1); // platformID (Mac: 1) + aCmapStream.WriteUInt16(0); // encodingID (Roman: 0) + aCmapStream.WriteUInt32(12); // subtable offset + + // Format 0 subtable + aCmapStream.WriteUInt16(0); // format + aCmapStream.WriteUInt16(262); // length + aCmapStream.WriteUInt16(0); // language + + // glyphIdArray + for (int i = 0; i < 256; ++i) + { + if (i < nGlyphCount) + aCmapStream.WriteUInt8(pEncoding[i]); + else + aCmapStream.WriteUInt8(0); + } + + hb_blob_t* pCmapBlob + = hb_blob_create(static_cast<const char*>(aCmapStream.GetData()), aCmapStream.Tell(), + HB_MEMORY_MODE_DUPLICATE, nullptr, nullptr); + hb_face_builder_add_table(pBuilderFace, HB_TAG('c', 'm', 'a', 'p'), pCmapBlob); + hb_blob_destroy(pCmapBlob); + + pSubsetBlob = hb_face_reference_blob(pBuilderFace); } - bool hasTable(sal_uInt32 nIdx) const override { return !table(nIdx).empty(); } - const sal_uInt8* table(sal_uInt32 nIdx, sal_uInt32& nSize) const override + hb_blob_t* pCFFBlob = hb_face_reference_table(pSubsetFace, HB_TAG('C', 'F', 'F', ' ')); + comphelper::ScopeGuard aCFFBlobGuard([&]() { hb_blob_destroy(pCFFBlob); }); + if (pCFFBlob != hb_blob_get_empty()) { - auto& rTable = table(nIdx); - nSize = rTable.size(); - return rTable.data(); + // Ideally we should be outputting a CFF (Type1C) font here, but I couldn’t get it to work. + // So we oconvert it to Type1 font instead. + // TODO: simplify CreateCFFfontSubset() to only do the conversion, since we already + // have the subsetted font. + rInfo.m_nFontType = FontType::TYPE1_PFB; + + unsigned int nCffLen; + const unsigned char* pCffData + = reinterpret_cast<const unsigned char*>(hb_blob_get_data(pCFFBlob, &nCffLen)); + + std::vector<sal_GlyphId> aNewGlyphIds(nGlyphCount); + for (int i = 0; i < nGlyphCount; ++i) + aNewGlyphIds[i] = i; + + if (!CreateCFFfontSubset(pCffData, nCffLen, rOutBuffer, aNewGlyphIds.data(), pEncoding, + nGlyphCount, rInfo)) + return false; } -}; -} + else + { + rInfo.m_nFontType = FontType::SFNT_TTF; -bool PhysicalFontFace::CreateFontSubset(std::vector<sal_uInt8>& rOutBuffer, - const sal_GlyphId* pGlyphIds, const sal_uInt8* pEncoding, - const int nGlyphCount, FontSubsetInfo& rInfo) const -{ - // Prepare data for font subsetter. - TrueTypeFace aSftFont(RawFace(GetHbFace()), GetFontCharMap()); - if (aSftFont.initialize() != SFErrCodes::Ok) - return false; + unsigned int nSubsetLength; + const char* pSubsetData = nullptr; + pSubsetData = hb_blob_get_data(pSubsetBlob, &nSubsetLength); + if (!pSubsetData || !nSubsetLength) + return false; + + rOutBuffer.assign(reinterpret_cast<const sal_uInt8*>(pSubsetData), + reinterpret_cast<const sal_uInt8*>(pSubsetData) + nSubsetLength); + } - // write subset into destination file - return CreateTTFfontSubset(aSftFont, rOutBuffer, pGlyphIds, pEncoding, nGlyphCount, rInfo); + return true; } bool PhysicalFontFace::HasColorLayers() const diff --git a/vcl/source/fontsubset/sft.cxx b/vcl/source/fontsubset/sft.cxx index 5e7da45adb64..dc03041dafcf 100644 --- a/vcl/source/fontsubset/sft.cxx +++ b/vcl/source/fontsubset/sft.cxx @@ -1436,6 +1436,7 @@ void FillFontSubsetInfo(const AbstractTrueTypeFont* ttf, FontSubsetInfo& rInfo) rInfo.m_bFilled = true; } +} bool CreateCFFfontSubset(const unsigned char* pFontBytes, int nByteLength, std::vector<sal_uInt8>& rOutBuffer, const sal_GlyphId* pGlyphIds, @@ -1462,7 +1463,6 @@ bool CreateCFFfontSubset(const unsigned char* pFontBytes, int nByteLength, return bRet; } -} bool CreateTTFfontSubset(vcl::AbstractTrueTypeFont& rTTF, std::vector<sal_uInt8>& rOutBuffer, const sal_GlyphId* pGlyphIds, const sal_uInt8* pEncoding, diff --git a/vcl/source/pdf/pdfwriter_impl.cxx b/vcl/source/pdf/pdfwriter_impl.cxx index ec91e73bc07f..0305bf5c1403 100644 --- a/vcl/source/pdf/pdfwriter_impl.cxx +++ b/vcl/source/pdf/pdfwriter_impl.cxx @@ -2030,6 +2030,9 @@ sal_Int32 PDFWriterImpl::emitFontDescriptor( const vcl::font::PhysicalFontFace* case FontType::SFNT_TTF: aLine.append( '2' ); break; + case FontType::CFF_FONT: + aLine.append( "3" ); + break; case FontType::TYPE1_PFA: case FontType::TYPE1_PFB: case FontType::ANY_TYPE1: @@ -2100,18 +2103,17 @@ bool PDFWriterImpl::emitFonts() + OString::number( nStreamLengthObject ) ); if (!g_bDebugDisableCompression) aLine.append( " 0 R" - "/Filter/FlateDecode" - "/Length1 " ); + "/Filter/FlateDecode"); else - aLine.append( " 0 R" - "/Length1 " ); + aLine.append( " 0 R"); sal_uInt64 nStartPos = 0; if( aSubsetInfo.m_nFontType == FontType::SFNT_TTF ) { - aLine.append( OString::number(aBuffer.size()) - + ">> " - "stream " ); + aLine.append("/Length1 " + + OString::number(aBuffer.size()) + + ">> " + "stream " ); if ( !writeBuffer( aLine ) ) return false; if ( osl::File::E_None != m_aFile.getPos(nStartPos) ) return false; @@ -2123,8 +2125,16 @@ bool PDFWriterImpl::emitFonts() } else if( aSubsetInfo.m_nFontType & FontType::CFF_FONT) { - // TODO: implement - OSL_FAIL( "PDFWriterImpl does not support CFF-font subsets yet!" ); + // CFF subset is embedded as an SFNT font (same as TrueType) + aLine.append("/Subtype/Type1C>> stream "); + if ( !writeBuffer( aLine ) ) return false; + if ( osl::File::E_None != m_aFile.getPos(nStartPos) ) return false; + + // copy font file + beginCompression(); + checkAndEnableStreamEncryption( nFontStream ); + if (!writeBufferBytes(aBuffer.data(), aBuffer.size())) + return false; } else if( aSubsetInfo.m_nFontType & FontType::TYPE1_PFB) // TODO: also support PFA? { @@ -2133,7 +2143,8 @@ bool PDFWriterImpl::emitFonts() getPfbSegmentLengths(aBuffer.data(), aBuffer.size(), aSegmentLengths); // the lengths below are mandatory for PDF-exported Type1 fonts // because the PFB segment headers get stripped! WhyOhWhy. - aLine.append( OString::number(aSegmentLengths[0] ) + aLine.append("/Length1 " + + OString::number(aSegmentLengths[0] ) + "/Length2 " + OString::number( aSegmentLengths[1] ) + "/Length3 " @@ -2185,7 +2196,7 @@ bool PDFWriterImpl::emitFonts() if ( !updateObject( nFontObject ) ) return false; aLine.setLength( 0 ); aLine.append( OString::number(nFontObject) + " 0 obj " ); - aLine.append( (aSubsetInfo.m_nFontType & FontType::ANY_TYPE1) ? + aLine.append( (aSubsetInfo.m_nFontType & (FontType::ANY_TYPE1 | FontType::CFF_FONT)) ? "<</Type/Font/Subtype/Type1/BaseFont/" : "<</Type/Font/Subtype/TrueType/BaseFont/" ); appendSubsetName( s_subset.m_nFontID, aSubsetInfo.m_aPSName, aLine );
