utils/HtmlOutputDev.cc | 22 ++++++++++++++++++---- utils/pdftohtml.1 | 3 +++ utils/pdftohtml.cc | 4 ++++ 3 files changed, 25 insertions(+), 4 deletions(-)
New commits: commit bcd89bc0abb2cc05d3dc428074bb24b450ab7cf0 Author: Thibaut Brard <[email protected]> Date: Sat Jul 21 00:17:58 2018 +0200 pdftohtml: Add option to not round coordinates when outputing as xml diff --git a/utils/HtmlOutputDev.cc b/utils/HtmlOutputDev.cc index ace303b3..8a088c22 100644 --- a/utils/HtmlOutputDev.cc +++ b/utils/HtmlOutputDev.cc @@ -41,6 +41,7 @@ // Copyright (C) 2016 Vincent Le Garrec <[email protected]> // Copyright (C) 2017 Caolán McNamara <[email protected]> // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <[email protected]>. Work sponsored by the LiMux project of the city of Munich +// Copyright (C) 2018 Thibaut Brard <[email protected]> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -109,6 +110,7 @@ extern GBool printHtml; extern GBool noframes; extern GBool stout; extern GBool xml; +extern GBool noRoundedCoordinates; extern GBool showHidden; extern GBool noMerge; @@ -760,16 +762,28 @@ void HtmlPage::dumpAsXML(FILE* f,int page){ int listlen=imgList->getLength(); for (int i = 0; i < listlen; i++) { HtmlImage *img = (HtmlImage*)imgList->del(0); - fprintf(f,"<image top=\"%d\" left=\"%d\" ",xoutRound(img->yMin),xoutRound(img->xMin)); - fprintf(f,"width=\"%d\" height=\"%d\" ",xoutRound(img->xMax-img->xMin),xoutRound(img->yMax-img->yMin)); + if (!noRoundedCoordinates) { + fprintf(f, "<image top=\"%d\" left=\"%d\" ", xoutRound(img->yMin), xoutRound(img->xMin)); + fprintf(f, "width=\"%d\" height=\"%d\" ", xoutRound(img->xMax - img->xMin), xoutRound(img->yMax - img->yMin)); + } + else { + fprintf(f, "<image top=\"%f\" left=\"%f\" ", img->yMin, img->xMin); + fprintf(f, "width=\"%f\" height=\"%f\" ", img->xMax - img->xMin, img->yMax - img->yMin); + } fprintf(f,"src=\"%s\"/>\n",img->fName->getCString()); delete img; } for(HtmlString *tmp=yxStrings;tmp;tmp=tmp->yxNext){ if (tmp->htext){ - fprintf(f,"<text top=\"%d\" left=\"%d\" ",xoutRound(tmp->yMin),xoutRound(tmp->xMin)); - fprintf(f,"width=\"%d\" height=\"%d\" ",xoutRound(tmp->xMax-tmp->xMin),xoutRound(tmp->yMax-tmp->yMin)); + if (!noRoundedCoordinates) { + fprintf(f, "<text top=\"%d\" left=\"%d\" ", xoutRound(tmp->yMin), xoutRound(tmp->xMin)); + fprintf(f, "width=\"%d\" height=\"%d\" ", xoutRound(tmp->xMax - tmp->xMin), xoutRound(tmp->yMax - tmp->yMin)); + } + else { + fprintf(f, "<text top=\"%f\" left=\"%f\" ", tmp->yMin, tmp->xMin); + fprintf(f, "width=\"%f\" height=\"%f\" ", tmp->xMax - tmp->xMin, tmp->yMax - tmp->yMin); + } fprintf(f,"font=\"%d\">", tmp->fontpos); fputs(tmp->htext->getCString(),f); fputs("</text>\n",f); diff --git a/utils/pdftohtml.1 b/utils/pdftohtml.1 index 5de42880..5d711ba9 100644 --- a/utils/pdftohtml.1 +++ b/utils/pdftohtml.1 @@ -58,6 +58,9 @@ zoom the PDF document (default 1.5) .B \-xml output for XML post-processing .TP +.B \-noRoundedCoordinates +do not round coordinates (with XML output only) +.TP .B \-enc <string> output text encoding name .TP diff --git a/utils/pdftohtml.cc b/utils/pdftohtml.cc index 04aeb1bc..b82c2552 100644 --- a/utils/pdftohtml.cc +++ b/utils/pdftohtml.cc @@ -26,6 +26,7 @@ // Copyright (C) 2015 William Bader <[email protected]> // Copyright (C) 2017 Adrian Johnson <[email protected]> // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <[email protected]>. Work sponsored by the LiMux project of the city of Munich +// Copyright (C) 2018 Thibaut Brard <[email protected]> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -81,6 +82,7 @@ static double scale=1.5; GBool noframes=gFalse; GBool stout=gFalse; GBool xml=gFalse; +GBool noRoundedCoordinates = gFalse; static GBool errQuiet=gFalse; static GBool noDrm=gFalse; double wordBreakThreshold=10; // 10%, below converted into a coefficient - 0.1 @@ -130,6 +132,8 @@ static const ArgDesc argDesc[] = { "zoom the pdf document (default 1.5)"}, {"-xml", argFlag, &xml, 0, "output for XML post-processing"}, + {"-noroundcoord", argFlag, &noRoundedCoordinates, 0, + "do not round coordinates (with XML output only)"}, {"-hidden", argFlag, &showHidden, 0, "output hidden text"}, {"-nomerge", argFlag, &noMerge, 0, _______________________________________________ poppler mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/poppler
