poppler/Annot.cc | 2 +- poppler/Annot.h | 4 ++-- poppler/TextOutputDev.cc | 23 ++++++++++++++++++++--- qt4/src/poppler-page.cc | 11 +++++++++-- qt4/src/poppler-qt4.h | 21 +++++++++++++++++++++ qt4/tests/CMakeLists.txt | 1 + qt4/tests/Makefile.am | 7 ++++++- 7 files changed, 60 insertions(+), 9 deletions(-)
New commits: commit a86f9d90be99a36c41c6932fb4d9a202c4ff6d05 Author: Albert Astals Cid <[email protected]> Date: Wed Sep 1 20:20:48 2010 +0100 Clarify the ownership diff --git a/qt4/src/poppler-qt4.h b/qt4/src/poppler-qt4.h index cb4ec39..5ed7218 100644 --- a/qt4/src/poppler-qt4.h +++ b/qt4/src/poppler-qt4.h @@ -799,6 +799,8 @@ delete it; Note that this follows the PDF standard of being zero based - if you want the first page, then you need an index of zero. + + The caller gets the ownership of the returned object. \param index the page number index */ commit 33ad3a17ac26879fcd6a7fad2023dd219bc5919f Author: Suzuki Toshiya <[email protected]> Date: Wed Sep 1 20:19:54 2010 +0100 Add a way to access the raw text diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc index c840aef..576bcc9 100644 --- a/poppler/TextOutputDev.cc +++ b/poppler/TextOutputDev.cc @@ -23,6 +23,7 @@ // Copyright (C) 2009 Ross Moore <[email protected]> // Copyright (C) 2009 Kovid Goyal <[email protected]> // Copyright (C) 2010 Brian Ewins <[email protected]> +// Copyright (C) 2010 Suzuki Toshiya <[email protected]> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -3605,14 +3606,30 @@ GooString *TextPage::getText(double xMin, double yMin, s = new GooString(); - if (rawOrder) { + // get the output encoding + if (!(uMap = globalParams->getTextEncoding())) { return s; } - // get the output encoding - if (!(uMap = globalParams->getTextEncoding())) { + if (rawOrder) { + TextWord* word; + char mbc[16]; + int mbc_len; + + for (word = rawWords; word && word <= rawLastWord; word = word->next) { + for (j = 0; j < word->getLength(); ++j) { + double gXMin, gXMax, gYMin, gYMax; + word->getCharBBox(j, &gXMin, &gYMin, &gXMax, &gYMax); + if (xMin <= gXMin && gXMax <= xMax && yMin <= gYMin && gYMax <= yMax) + { + mbc_len = uMap->mapUnicode( *(word->getChar(j)), mbc, sizeof(mbc) ); + s->append(mbc, mbc_len); + } + } + } return s; } + isUnicode = uMap->isUnicode(); spaceLen = uMap->mapUnicode(0x20, space, sizeof(space)); eolLen = 0; // make gcc happy diff --git a/qt4/src/poppler-page.cc b/qt4/src/poppler-page.cc index ae67b11..49a0a77 100644 --- a/qt4/src/poppler-page.cc +++ b/qt4/src/poppler-page.cc @@ -7,6 +7,7 @@ * Copyright (C) 2008 Carlos Garcia Campos <[email protected]> * Copyright (C) 2009 Shawn Rutledge <[email protected]> * Copyright (C) 2010, Guillermo Amaral <[email protected]> + * Copyright (C) 2010 Suzuki Toshiya <[email protected]> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -295,14 +296,15 @@ QImage Page::thumbnail() const return ret; } -QString Page::text(const QRectF &r) const +QString Page::text(const QRectF &r, TextLayout textLayout) const { TextOutputDev *output_dev; GooString *s; PDFRectangle *rect; QString result; - output_dev = new TextOutputDev(0, gFalse, gFalse, gFalse); + const GBool rawOrder = textLayout == RawOrder; + output_dev = new TextOutputDev(0, gFalse, rawOrder, gFalse); m_page->parentDoc->doc->displayPageSlice(output_dev, m_page->index + 1, 72, 72, 0, false, true, false, -1, -1, -1, -1); if (r.isNull()) @@ -322,6 +324,11 @@ QString Page::text(const QRectF &r) const return result; } +QString Page::text(const QRectF &r) const +{ + return text(r, PhysicalLayout); +} + bool Page::search(const QString &text, double &sLeft, double &sTop, double &sRight, double &sBottom, SearchDirection direction, SearchMode caseSensitive, Rotation rotate) const { const QChar * str = text.unicode(); diff --git a/qt4/src/poppler-qt4.h b/qt4/src/poppler-qt4.h index 117dc43..cb4ec39 100644 --- a/qt4/src/poppler-qt4.h +++ b/qt4/src/poppler-qt4.h @@ -5,6 +5,7 @@ * Copyright (C) 2005, Stefan Kebekus <[email protected]> * Copyright (C) 2006-2009, Pino Toscano <[email protected]> * Copyright (C) 2009 Shawn Rutledge <[email protected]> + * Copyright (C) 2010 Suzuki Toshiya <[email protected]> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -387,6 +388,14 @@ delete it; Opening, ///< The action when a page is "opened" Closing ///< The action when a page is "closed" }; + + /** + How the text is going to be returned + */ + enum TextLayout { + PhysicalLayout, ///< The text is layouted to resemble the real page layout + RawOrder ///< The text is returned without any type of processing + }; /** Render the page to a QImage using the current @@ -445,6 +454,16 @@ delete it; with coordinates given in points, i.e., 1/72th of an inch. If rect is null, all text on the page is given **/ + QString text(const QRectF &rect, TextLayout textLayout) const; + + /** + Returns the text that is inside a specified rectangle. + The text is returned using the physical layout of the page + + \param rect the rectangle specifying the area of interest, + with coordinates given in points, i.e., 1/72th of an inch. + If rect is null, all text on the page is given + **/ QString text(const QRectF &rect) const; /** diff --git a/qt4/tests/CMakeLists.txt b/qt4/tests/CMakeLists.txt index 892ec66..3a67614 100644 --- a/qt4/tests/CMakeLists.txt +++ b/qt4/tests/CMakeLists.txt @@ -41,6 +41,7 @@ qt4_add_simpletest(poppler-fonts poppler-fonts.cpp) qt4_add_simpletest(poppler_attachments poppler-attachments.cpp) qt4_add_simpletest(stress-poppler-qt4 stress-poppler-qt4.cpp) qt4_add_simpletest(stress-poppler-dir stress-poppler-dir.cpp) +qt4_add_simpletest(poppler-texts poppler-texts.cpp) qt4_add_qtest(check_attachments check_attachments.cpp) qt4_add_qtest(check_dateConversion check_dateConversion.cpp) diff --git a/qt4/tests/Makefile.am b/qt4/tests/Makefile.am index 7bc16d7..244097c 100644 --- a/qt4/tests/Makefile.am +++ b/qt4/tests/Makefile.am @@ -21,7 +21,7 @@ SUFFIXES: .moc noinst_PROGRAMS = test-poppler-qt4 stress-poppler-qt4 \ poppler-fonts test-password-qt4 stress-poppler-dir \ - poppler-attachments + poppler-attachments poppler-texts test_poppler_qt4_SOURCES = \ @@ -46,6 +46,11 @@ poppler_attachments_SOURCES = \ poppler_attachments_LDADD = $(LDADDS) +poppler_texts_SOURCES = \ + poppler-texts.cpp + +poppler_texts_LDADD = $(LDADDS) + stress_poppler_qt4_SOURCES = \ stress-poppler-qt4.cpp commit 46e89248b3c5b1789baa3bd9bfa012570720ddb5 Author: Albert Astals Cid <[email protected]> Date: Wed Sep 1 19:54:02 2010 +0100 quadding is not a GBool but an int diff --git a/poppler/Annot.cc b/poppler/Annot.cc index 6a18d7f..0cb6516 100644 --- a/poppler/Annot.cc +++ b/poppler/Annot.cc @@ -3400,7 +3400,7 @@ void AnnotWidget::drawText(GooString *text, GooString *da, GfxFontDict *fontDict // Draw the variable text or caption for a field. void AnnotWidget::drawListBox(GooString **text, GBool *selection, int nOptions, int topIdx, - GooString *da, GfxFontDict *fontDict, GBool quadding) { + GooString *da, GfxFontDict *fontDict, int quadding) { GooList *daToks; GooString *tok, *convertedText; GfxFont *font; diff --git a/poppler/Annot.h b/poppler/Annot.h index a21b55e..a392267 100644 --- a/poppler/Annot.h +++ b/poppler/Annot.h @@ -21,7 +21,7 @@ // Copyright (C) 2008 Hugo Mercier <[email protected]> // Copyright (C) 2008 Pino Toscano <[email protected]> // Copyright (C) 2008 Tomas Are Haavet <[email protected]> -// Copyright (C) 2009 Albert Astals Cid <[email protected]> +// Copyright (C) 2009, 2010 Albert Astals Cid <[email protected]> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -1175,7 +1175,7 @@ private: GBool password=false); void drawListBox(GooString **text, GBool *selection, int nOptions, int topIdx, - GooString *da, GfxFontDict *fontDict, GBool quadding); + GooString *da, GfxFontDict *fontDict, int quadding); void layoutText(GooString *text, GooString *outBuf, int *i, GfxFont *font, double *width, double widthLimit, int *charCount, GBool noReencode); _______________________________________________ poppler mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/poppler
