poppler/TextOutputDev.cc | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-)
New commits: commit 51ca2b7c7dec5430d29860fd887ad5c5d9b3f574 Author: Albert Astals Cid <[email protected]> Date: Thu Dec 15 00:26:09 2011 +0100 [xpdf303] Some more changes in TextOutputDev diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc index 0c451ec..15fdf99 100644 --- a/poppler/TextOutputDev.cc +++ b/poppler/TextOutputDev.cc @@ -4669,10 +4669,9 @@ GBool TextPage::findCharRange(int pos, int length, return gFalse; } - //~ this doesn't correctly handle: - //~ - ranges split across multiple lines (the highlighted region - //~ is the bounding box of all the parts of the range) - //~ - cases where characters don't convert one-to-one into Unicode + //~ this doesn't correctly handle ranges split across multiple lines + //~ (the highlighted region is the bounding box of all the parts of + //~ the range) first = gTrue; xMin0 = xMax0 = yMin0 = yMax0 = 0; // make gcc happy xMin1 = xMax1 = yMin1 = yMax1 = 0; // make gcc happy @@ -4791,7 +4790,9 @@ void TextPage::dump(void *outputStream, TextOutputFunc outputFunc, delete s; if (word->next && fabs(word->next->base - word->base) < - maxIntraLineDelta * word->fontSize) { + maxIntraLineDelta * word->fontSize && + word->next->xMin > + word->xMax - minDupBreakOverlap * word->fontSize) { if (word->next->xMin > word->xMax + minWordSpacing * word->fontSize) { (*outputFunc)(outputStream, space, spaceLen); } @@ -5041,7 +5042,9 @@ int TextPage::dumpFragment(Unicode *text, int len, UnicodeMap *uMap, } i = j; // output a right-to-left section - for (j = i; j < len && !unicodeTypeL(text[j]); ++j) ; + for (j = i; + j < len && !(unicodeTypeL(text[j]) || unicodeTypeNum(text[j])); + ++j) ; if (j > i) { s->append(rle, rleLen); for (k = j - 1; k >= i; --k) { @@ -5056,11 +5059,17 @@ int TextPage::dumpFragment(Unicode *text, int len, UnicodeMap *uMap, } else { + // Note: This code treats numeric characters (European and + // Arabic/Indic) as left-to-right, which isn't strictly correct + // (incurs extra LRE/POPDF pairs), but does produce correct + // visual formatting. s->append(rle, rleLen); i = len - 1; while (i >= 0) { // output a right-to-left section - for (j = i; j >= 0 && !unicodeTypeL(text[j]); --j) ; + for (j = i; + j >= 0 && !(unicodeTypeL(text[j]) || unicodeTypeNum(text[j])); + --j) ; for (k = i; k > j; --k) { n = uMap->mapUnicode(text[k], buf, sizeof(buf)); s->append(buf, n); _______________________________________________ poppler mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/poppler
