tags 533138 +patch +upstream thanks Dear poppler maintainers,
This bug seems to be fixed in 0.14. However, there is a patch attached to the upstream bug [1] which I tried and adapted to 0.12.4. It works fine and solve this scaling issue which makes some PDF files (from scientific publishers in particular) barely readable. Could you consider including it in Debian if upgrading to 0.14 is not planned before squeeze? Cheers, Denis 1: https://bugs.freedesktop.org/show_bug.cgi?id=5589
Fix scaling issue for scanned PDF files. Adapted from: https://bugs.freedesktop.org/attachment.cgi?id=32750 found in poppler bug: https://bugs.freedesktop.org/show_bug.cgi?id=5589 Index: poppler-0.12.4/poppler/CairoOutputDev.cc =================================================================== --- poppler-0.12.4.orig/poppler/CairoOutputDev.cc 2010-02-16 18:11:46.000000000 -0500 +++ poppler-0.12.4/poppler/CairoOutputDev.cc 2010-07-29 17:40:17.120406565 -0400 @@ -58,6 +58,7 @@ #include <splash/SplashBitmap.h> #include "CairoOutputDev.h" #include "CairoFontEngine.h" +#include "CairoRescaleBox.h" //------------------------------------------------------------------------ // #define LOG_CAIRO @@ -1291,6 +1292,82 @@ clearSoftMask(state); } +cairo_surface_t *CairoOutputDev::downscaleSurface(cairo_surface_t *orig_surface) { + cairo_surface_t *dest_surface; + unsigned char *dest_buffer; + int dest_stride; + unsigned char *orig_buffer; + int orig_width, orig_height; + int orig_stride; + GBool res; + + if (printing) + return NULL; + + cairo_matrix_t matrix; + cairo_get_matrix(cairo, &matrix); + + /* this whole computation should be factored out */ + double xScale = matrix.xx; + double yScale = matrix.yy; + int tx, tx2, ty, ty2; /* the integer co-oridinates of the resulting image */ + int scaledHeight; + int scaledWidth; + if (xScale >= 0) { + tx = splashRound(matrix.x0 - 0.01); + tx2 = splashRound(matrix.x0 + xScale + 0.01) - 1; + } else { + tx = splashRound(matrix.x0 + 0.01) - 1; + tx2 = splashRound(matrix.x0 + xScale - 0.01); + } + scaledWidth = abs(tx2 - tx) + 1; + //scaledWidth = splashRound(fabs(xScale)); + if (scaledWidth == 0) { + // technically, this should draw nothing, but it generally seems + // better to draw a one-pixel-wide stripe rather than throwing it + // away + scaledWidth = 1; + } + if (yScale >= 0) { + ty = splashFloor(matrix.y0 + 0.01); + ty2 = splashCeil(matrix.y0 + yScale - 0.01); + } else { + ty = splashCeil(matrix.y0 - 0.01); + ty2 = splashFloor(matrix.y0 + yScale + 0.01); + } + scaledHeight = abs(ty2 - ty); + if (scaledHeight == 0) { + scaledHeight = 1; + } + + orig_width = cairo_image_surface_get_width (orig_surface); + orig_height = cairo_image_surface_get_height (orig_surface); + if (scaledWidth >= orig_width || scaledHeight >= orig_height) + return NULL; + + dest_surface = cairo_surface_create_similar (orig_surface, + cairo_surface_get_content (orig_surface), + scaledWidth, scaledHeight); + dest_buffer = cairo_image_surface_get_data (dest_surface); + dest_stride = cairo_image_surface_get_stride (dest_surface); + + orig_buffer = cairo_image_surface_get_data (orig_surface); + orig_stride = cairo_image_surface_get_stride (orig_surface); + + res = downscale_box_filter((uint32_t *)orig_buffer, + orig_stride, orig_width, orig_height, + scaledWidth, scaledHeight, 0, 0, + scaledWidth, scaledHeight, + (uint32_t *)dest_buffer, dest_stride); + if (!res) { + cairo_surface_destroy (dest_surface); + return NULL; + } + + return dest_surface; + +} + void CairoOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str, int width, int height, GBool invert, GBool interpolate, GBool inlineImg) { @@ -2043,6 +2120,18 @@ } gfree(lookup); + cairo_surface_t *scaled_surface; + + scaled_surface = downscaleSurface (image); + if (scaled_surface) { + if (cairo_surface_status (scaled_surface)) + goto cleanup; + cairo_surface_destroy (image); + image = scaled_surface; + width = cairo_image_surface_get_width (image); + height = cairo_image_surface_get_height (image); + } + cairo_surface_mark_dirty (image); pattern = cairo_pattern_create_for_surface (image); cairo_surface_destroy (image); Index: poppler-0.12.4/poppler/CairoOutputDev.h =================================================================== --- poppler-0.12.4.orig/poppler/CairoOutputDev.h 2010-01-16 19:06:57.000000000 -0500 +++ poppler-0.12.4/poppler/CairoOutputDev.h 2010-07-29 17:40:17.120406565 -0400 @@ -268,6 +268,7 @@ protected: void doPath(cairo_t *cairo, GfxState *state, GfxPath *path); + cairo_surface_t *downscaleSurface(cairo_surface_t *orig_surface); GfxRGB fill_color, stroke_color; cairo_pattern_t *fill_pattern, *stroke_pattern; Index: poppler-0.12.4/poppler/CairoRescaleBox.cc =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ poppler-0.12.4/poppler/CairoRescaleBox.cc 2010-07-29 17:40:17.168125215 -0400 @@ -0,0 +1,352 @@ +/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ +/* + * Copyright © 2009 Mozilla Corporation + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Mozilla Corporation not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Mozilla Corporation makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * MOZILLA CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT + * SHALL MOZILLA CORPORATION BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + * + * Author: Jeff Muizelaar, Mozilla Corp. + */ + +/* This implements a box filter that supports non-integer box sizes */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <stdlib.h> +#include <math.h> +#include "goo/gmem.h" +#include "CairoRescaleBox.h" + +typedef unsigned short int uint16_t; +typedef unsigned int uint32_t; + +/* we work in fixed point where 1. == 1 << 24 */ +#define FIXED_SHIFT 24 + +static void downsample_row_box_filter ( + int start, int width, + uint32_t *src, uint32_t *dest, + int coverage[], int pixel_coverage) +{ + /* we need an array of the pixel contribution of each destination pixel on the boundaries. + * we invert the value to get the value on the other size of the box */ + /* + + value = a * contribution * 1/box_size + value += a * 1/box_size + value += a * 1/box_size + value += a * 1/box_size + value += a * (1 - contribution) * 1/box_size + a * (1/box_size - contribution * 1/box_size) + + box size is constant + + + value = a * contribtion_a * 1/box_size + b * contribution_b * 1/box_size + contribution_b = (1 - contribution_a) + = (1 - contribution_a_next) + */ + + /* box size = ceil(src_width/dest_width) */ + int x = 0; + + /* skip to start */ + /* XXX: it might be possible to do this directly instead of iteratively, however + * the iterative solution is simple */ + while (x < start) + { + int box = 1 << FIXED_SHIFT; + int start_coverage = coverage[x]; + box -= start_coverage; + src++; + while (box >= pixel_coverage) + { + src++; + box -= pixel_coverage; + } + x++; + } + + while (x < start + width) + { + uint32_t a = 0; + uint32_t r = 0; + uint32_t g = 0; + uint32_t b = 0; + int box = 1 << FIXED_SHIFT; + int start_coverage = coverage[x]; + + a = ((*src >> 24) & 0xff) * start_coverage; + r = ((*src >> 16) & 0xff) * start_coverage; + g = ((*src >> 8) & 0xff) * start_coverage; + b = ((*src >> 0) & 0xff) * start_coverage; + src++; + x++; + box -= start_coverage; + + while (box >= pixel_coverage) + { + a += ((*src >> 24) & 0xff) * pixel_coverage; + r += ((*src >> 16) & 0xff) * pixel_coverage; + g += ((*src >> 8) & 0xff) * pixel_coverage; + b += ((*src >> 0) & 0xff) * pixel_coverage; + src++; + + box -= pixel_coverage; + } + + /* multiply by whatever is leftover + * this ensures that we don't bias down. + * i.e. start_coverage + n*pixel_coverage + box == 1 << 24 */ + if (box > 0) + { + a += ((*src >> 24) & 0xff) * box; + r += ((*src >> 16) & 0xff) * box; + g += ((*src >> 8) & 0xff) * box; + b += ((*src >> 0) & 0xff) * box; + } + + a >>= FIXED_SHIFT; + r >>= FIXED_SHIFT; + g >>= FIXED_SHIFT; + b >>= FIXED_SHIFT; + + *dest = (a << 24) | (r << 16) | (g << 8) | b; + dest++; + } +} + +static void downsample_columns_box_filter ( + int n, + int start_coverage, + int pixel_coverage, + uint32_t *src, uint32_t *dest) +{ + int stride = n; + while (n--) { + uint32_t a = 0; + uint32_t r = 0; + uint32_t g = 0; + uint32_t b = 0; + uint32_t *column_src = src; + int box = 1 << FIXED_SHIFT; + + a = ((*column_src >> 24) & 0xff) * start_coverage; + r = ((*column_src >> 16) & 0xff) * start_coverage; + g = ((*column_src >> 8) & 0xff) * start_coverage; + b = ((*column_src >> 0) & 0xff) * start_coverage; + column_src += stride; + box -= start_coverage; + + while (box >= pixel_coverage) + { + a += ((*column_src >> 24) & 0xff) * pixel_coverage; + r += ((*column_src >> 16) & 0xff) * pixel_coverage; + g += ((*column_src >> 8) & 0xff) * pixel_coverage; + b += ((*column_src >> 0) & 0xff) * pixel_coverage; + column_src += stride; + box -= pixel_coverage; + } + + if (box > 0) { + a += ((*column_src >> 24) & 0xff) * box; + r += ((*column_src >> 16) & 0xff) * box; + g += ((*column_src >> 8) & 0xff) * box; + b += ((*column_src >> 0) & 0xff) * box; + } + + a >>= FIXED_SHIFT; + r >>= FIXED_SHIFT; + g >>= FIXED_SHIFT; + b >>= FIXED_SHIFT; + + *dest = (a << 24) | (r << 16) | (g << 8) | b; + dest++; + src++; + } +} + +static int compute_coverage (int coverage[], int src_length, int dest_length) +{ + int i; + /* num = src_length/dest_length + total = sum(pixel) / num + + pixel * 1/num == pixel * dest_length / src_length + */ + /* the average contribution of each source pixel */ + int ratio = ((1 << 24)*(long long int)dest_length)/src_length; + /* because ((1 << 24)*(long long int)dest_length) won't always be divisible by src_length + * we'll need someplace to put the other bits. + * + * We want to ensure a + n*ratio < 1<<24 + * + * 1<<24 + * */ + + double scale = (double)src_length/dest_length; + + /* for each destination pixel compute the coverage of the left most pixel included in the box */ + /* I have a proof of this, which this margin is too narrow to contain */ + for (i=0; i<dest_length; i++) + { + float left_side = i*scale; + float right_side = (i+1)*scale; + float right_fract = right_side - floor (right_side); + float left_fract = ceil (left_side) - left_side; + int overage; + /* find out how many source pixels will be used to fill the box */ + int count = floor (right_side) - ceil (left_side); + /* what's the maximum value this expression can become? + floor((i+1)*scale) - ceil(i*scale) + + (i+1)*scale - i*scale == scale + + since floor((i+1)*scale) <= (i+1)*scale + and ceil(i*scale) >= i*scale + + floor((i+1)*scale) - ceil(i*scale) <= scale + + further since: floor((i+1)*scale) - ceil(i*scale) is an integer + + therefore: + floor((i+1)*scale) - ceil(i*scale) <= floor(scale) + */ + + if (left_fract == 0.) + count--; + + /* compute how much the right-most pixel contributes */ + overage = ratio*(right_fract); + + /* the remainder is the the amount that the left-most pixel + * contributes */ + coverage[i] = (1<<24) - (count * ratio + overage); + } + + return ratio; +} + +GBool downscale_box_filter(uint32_t *orig, int orig_stride, unsigned orig_width, unsigned orig_height, + signed scaled_width, signed scaled_height, + uint16_t start_column, uint16_t start_row, + uint16_t width, uint16_t height, + uint32_t *dest, int dst_stride) +{ + int pixel_coverage_x, pixel_coverage_y; + int dest_y; + int src_y = 0; + uint32_t *scanline = orig; + int *x_coverage = NULL; + int *y_coverage = NULL; + uint32_t *temp_buf = NULL; + GBool retval = gFalse; + + x_coverage = (int *)gmallocn3 (orig_width, 1, sizeof(int)); + y_coverage = (int *)gmallocn3 (orig_height, 1, sizeof(int)); + + /* we need to allocate enough room for ceil(src_height/dest_height)+1 + Example: + src_height = 140 + dest_height = 50 + src_height/dest_height = 2.8 + + |-------------| 2.8 pixels + |----|----|----|----| 4 pixels + need to sample 3 pixels + + |-------------| 2.8 pixels + |----|----|----|----| 4 pixels + need to sample 4 pixels + */ + + temp_buf = (uint32_t *)gmallocn3 ((orig_height + scaled_height-1)/scaled_height+1, scaled_width, sizeof(uint32_t)); + + if (!x_coverage || !y_coverage || !scanline || !temp_buf) + goto cleanup; + + pixel_coverage_x = compute_coverage (x_coverage, orig_width, scaled_width); + pixel_coverage_y = compute_coverage (y_coverage, orig_height, scaled_height); + + assert (width + start_column <= scaled_width); + + /* skip the rows at the beginning */ + for (dest_y = 0; dest_y < start_row; dest_y++) + { + int box = 1 << FIXED_SHIFT; + int start_coverage_y = y_coverage[dest_y]; + box -= start_coverage_y; + src_y++; + while (box >= pixel_coverage_y) + { + box -= pixel_coverage_y; + src_y++; + } + } + + for (; dest_y < start_row + height; dest_y++) + { + int columns = 0; + int box = 1 << FIXED_SHIFT; + int start_coverage_y = y_coverage[dest_y]; + + scanline = orig + src_y * orig_stride / 4; + downsample_row_box_filter (start_column, width, scanline, temp_buf + width * columns, x_coverage, pixel_coverage_x); + columns++; + src_y++; + box -= start_coverage_y; + + while (box >= pixel_coverage_y) + { + scanline = orig + src_y * orig_stride / 4; + downsample_row_box_filter (start_column, width, scanline, temp_buf + width * columns, x_coverage, pixel_coverage_x); + columns++; + src_y++; + box -= pixel_coverage_y; + } + + /* downsample any leftovers */ + if (box > 0) + { + scanline = orig + src_y * orig_stride / 4; + downsample_row_box_filter (start_column, width, scanline, temp_buf + width * columns, x_coverage, pixel_coverage_x); + columns++; + } + + /* now scale the rows we just downsampled in the y direction */ + downsample_columns_box_filter (width, start_coverage_y, pixel_coverage_y, temp_buf, dest); + dest += dst_stride / 4; + +// assert(width*columns <= ((orig_height + scaled_height-1)/scaled_height+1) * width); + } +// assert (src_y<=orig_height); + + retval = gTrue; + +cleanup: + free (x_coverage); + free (y_coverage); + free (temp_buf); + + return gTrue; +} Index: poppler-0.12.4/poppler/CairoRescaleBox.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ poppler-0.12.4/poppler/CairoRescaleBox.h 2010-07-29 17:40:17.168125215 -0400 @@ -0,0 +1,12 @@ +#ifndef CAIRO_RESCALE_BOX_H +#define CAIRO_RESCALE_BOX_H + +#include "goo/gtypes.h" + +GBool downscale_box_filter(unsigned int *orig, int orig_stride, unsigned orig_width, unsigned orig_height, + signed scaled_width, signed scaled_height, + unsigned short int start_column, unsigned short int start_row, + unsigned short int width, unsigned short int height, + unsigned int *dest, int dst_stride); + +#endif /* CAIRO_RESCALE_BOX_H */ Index: poppler-0.12.4/poppler/Makefile.am =================================================================== --- poppler-0.12.4.orig/poppler/Makefile.am 2010-01-16 19:06:57.000000000 -0500 +++ poppler-0.12.4/poppler/Makefile.am 2010-07-29 17:40:17.168125215 -0400 @@ -47,7 +47,9 @@ CairoFontEngine.cc \ CairoFontEngine.h \ CairoOutputDev.cc \ - CairoOutputDev.h + CairoOutputDev.h \ + CairoRescaleBox.cc \ + CairoRescaleBox.h endif Index: poppler-0.12.4/poppler/Makefile.in =================================================================== --- poppler-0.12.4.orig/poppler/Makefile.in 2010-07-29 17:40:34.795906715 -0400 +++ poppler-0.12.4/poppler/Makefile.in 2010-07-29 17:42:39.647908735 -0400 @@ -88,9 +88,9 @@ @BUILD_POPPLER_QT4_TRUE@@build_splash_output_t...@am_libpoppler_arthur_la_rpath = libpoppler_cairo_la_LIBADD = am__libpoppler_cairo_la_SOURCES_DIST = CairoFontEngine.cc \ - CairoFontEngine.h CairoOutputDev.cc CairoOutputDev.h + CairoFontEngine.h CairoOutputDev.cc CairoOutputDev.h CairoRescaleBox.cc CairoRescaleBox.h @build_cairo_output_t...@am_libpoppler_cairo_la_objects = \ -...@build_cairo_output_true@ CairoFontEngine.lo CairoOutputDev.lo +...@build_cairo_output_true@ CairoFontEngine.lo CairoOutputDev.lo CairoRescaleBox.lo libpoppler_cairo_la_OBJECTS = $(am_libpoppler_cairo_la_OBJECTS) @build_cairo_output_t...@am_libpoppler_cairo_la_rpath = am__DEPENDENCIES_1 = @@ -426,7 +426,9 @@ @BUILD_CAIRO_OUTPUT_TRUE@ CairoFontEngine.cc \ @BUILD_CAIRO_OUTPUT_TRUE@ CairoFontEngine.h \ @BUILD_CAIRO_OUTPUT_TRUE@ CairoOutputDev.cc \ -...@build_cairo_output_true@ CairoOutputDev.h +...@build_cairo_output_true@ CairoOutputDev.h \ +...@build_cairo_output_true@ CairoRescaleBox.cc \ +...@build_cairo_output_true@ CairoRescaleBox.h @build_libjpeg_t...@libjpeg_sources = \ @BUILD_LIBJPEG_TRUE@ DCTStream.h \ @@ -739,6 +741,7 @@ @AMDEP_TRUE@@am__include@ @am__qu...@./$(DEPDIR)/cmap....@am__quote@ @AMDEP_TRUE@@am__include@ @am__qu...@./$(DEPDIR)/cairofontengine....@am__quote@ @AMDEP_TRUE@@am__include@ @am__qu...@./$(DEPDIR)/cairooutputdev....@am__quote@ +...@amdep_true@@am__include@ @am__qu...@./$(DEPDIR)/cairorescalebox....@am__quote@ @AMDEP_TRUE@@am__include@ @am__qu...@./$(DEPDIR)/catalog....@am__quote@ @AMDEP_TRUE@@am__include@ @am__qu...@./$(DEPDIR)/charcodetounicode....@am__quote@ @AMDEP_TRUE@@am__include@ @am__qu...@./$(DEPDIR)/dctstream....@am__quote@