[Bug middle-end/111551] Fix for PR106081 is not working with profile feedback on imagemagick

hubicka at gcc dot gnu.org via Gcc-bugs Wed, 05 Mar 2025 09:07:06 -0800

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111551


--- Comment #4 from Jan Hubicka <hubicka at gcc dot gnu.org> ---
>From gcov dump, the normal train run exercises loop:
  742632: 2953:      switch ( method ) {
   742632: 2954:        case ConvolveMorphology:
        -: 2955:            /* Weighted Average of pixels using reflected
kernel
        -: 2956:            **
        -: 2957:            ** NOTE for correct working of this operation for
asymetrical
        -: 2958:            ** kernels, the kernel needs to be applied in its
reflected form.
        -: 2959:            ** That is its values needs to be reversed.
        -: 2960:            **
        -: 2961:            ** Correlation is actually the same as this but
without reflecting
        -: 2962:            ** the kernel, and thus 'lower-level' that
Convolution.  However
        -: 2963:            ** as Convolution is the more common method used,
and it does not
        -: 2964:            ** really cost us much in terms of processing to
use a reflected
        -: 2965:            ** kernel, so it is Convolution that is
implemented.
        -: 2966:            **
        -: 2967:            ** Correlation will have its kernel reflected
before calling
        -: 2968:            ** this function to do a Convolve.
        -: 2969:            **
        -: 2970:            ** For more details of Correlation vs Convolution
see
        -: 2971:            **  
http://www.cs.umd.edu/~djacobs/CMSC426/Convolution.pdf
        -: 2972:            */
   742632: 2973:            k = &kernel->values[ kernel->width*kernel->height-1
];
   742632: 2974:            k_pixels = p;
   742632: 2975:            k_indexes = p_indexes;
   742632: 2976:            if ( ((channel & SyncChannels) == 0 ) ||
   742632: 2977:                 (image->matte == MagickFalse) )
        -: 2978:              { /* No 'Sync' involved.
        -: 2979:                ** Convolution is simple greyscale channel
operation
        -: 2980:                */
    #####: 2981:                for (v=0; v < (ssize_t) kernel->height; v++) {
    #####: 2982:                  for (u=0; u < (ssize_t) kernel->width; u++,
k--) {
    #####: 2983:                    if ( IsNaN(*k) ) continue;
    #####: 2984:                    result.red     += (*k)*k_pixels[u].red;
    #####: 2985:                    result.green   += (*k)*k_pixels[u].green;
    #####: 2986:                    result.blue    += (*k)*k_pixels[u].blue;
    #####: 2987:                    result.opacity += (*k)*k_pixels[u].opacity;
    #####: 2988:                    if ( image->colorspace == CMYKColorspace)
    #####: 2989:                      result.index +=
(*k)*GetPixelIndex(k_indexes+u);
        -: 2990:                  }
    #####: 2991:                  k_pixels += virt_width;
    #####: 2992:                  k_indexes += virt_width;
        -: 2993:                }
    #####: 2994:                if ((channel & RedChannel) != 0)
    #####: 2995:                  SetPixelRed(q,ClampToQuantum((MagickRealType)
result.red));
    #####: 2996:                if ((channel & GreenChannel) != 0)
    #####: 2997:                 
SetPixelGreen(q,ClampToQuantum((MagickRealType) result.green));
    #####: 2998:                if ((channel & BlueChannel) != 0)
    #####: 2999:                 
SetPixelBlue(q,ClampToQuantum((MagickRealType) result.blue));
    #####: 3000:                if (((channel & OpacityChannel) != 0) &&
    #####: 3001:                    (image->matte != MagickFalse))
    #####: 3002:                 
SetPixelOpacity(q,ClampToQuantum((MagickRealType) result.opacity));
    #####: 3003:                if (((channel & IndexChannel) != 0) &&
        -: 3004:                    (image->colorspace == CMYKColorspace))
    #####: 3005:                 
SetPixelIndex(q_indexes+x,ClampToQuantum(result.index));
        -: 3006:              }
        -: 3007:            else
        -: 3008:              { /* Channel 'Sync' Flag, and Alpha Channel
enabled.
        -: 3009:                ** Weight the color channels with Alpha Channel
so that
        -: 3010:                ** transparent pixels are not part of the
results.
        -: 3011:                */
        -: 3012:                double
        -: 3013:                  alpha,  /* alpha weighting for colors : alpha
 */
        -: 3014:                  gamma;  /* divisor, sum of color alpha
weighting */
        -: 3015:
        -: 3016:                size_t
        -: 3017:                  count;  /* alpha valus collected, number
kernel values */
        -: 3018:
        -: 3019:                count=0;
        -: 3020:                gamma=0.0;
 46090224: 3021:                for (v=0; v < (ssize_t) kernel->height; v++) {
9358048944: 3022:                  for (u=0; u < (ssize_t) kernel->width; u++,
k--) {
9312701352: 3023:                    if ( IsNaN(*k) ) continue;
9312701352: 3024:                   
alpha=QuantumScale*(QuantumRange-k_pixels[u].opacity);
9312701352: 3025:                    count++;           /* number of alpha
values collected */
9312701352: 3026:                    alpha*=(*k);  /* include kernel weighting
now */
9312701352: 3027:                    gamma += alpha;    /* normalize alpha
weights only */
9312701352: 3028:                    result.red     += alpha*k_pixels[u].red;
9312701352: 3029:                    result.green   += alpha*k_pixels[u].green;
9312701352: 3030:                    result.blue    += alpha*k_pixels[u].blue;
9312701352: 3031:                    result.opacity +=
(*k)*k_pixels[u].opacity;
9312701352: 3032:                    if ( image->colorspace == CMYKColorspace)
    #####: 3033:                     
result.index+=alpha*GetPixelIndex(k_indexes+u);
        -: 3034:                  }
 45347592: 3035:                  k_pixels += virt_width;
 45347592: 3036:                  k_indexes += virt_width;
        -: 3037:                }
        -: 3038:                /* Sync'ed channels, all channels are modified
*/


While refrate run exercises different variant of the loop in same function

 20889171: 2954:        case ConvolveMorphology:
        -: 2955:            /* Weighted Average of pixels using reflected
kernel
        -: 2956:            **    
        -: 2957:            ** NOTE for correct working of this operation for
asymetrical
        -: 2958:            ** kernels, the kernel needs to be applied in its
reflected form.
        -: 2959:            ** That is its values needs to be reversed.
        -: 2960:            **      
        -: 2961:            ** Correlation is actually the same as this but
without reflecting
        -: 2962:            ** the kernel, and thus 'lower-level' that
Convolution.  However
        -: 2963:            ** as Convolution is the more common method used,
and it does not
        -: 2964:            ** really cost us much in terms of processing to
use a reflected
        -: 2965:            ** kernel, so it is Convolution that is
implemented.
        -: 2966:            **      
        -: 2967:            ** Correlation will have its kernel reflected
before calling
        -: 2968:            ** this function to do a Convolve.
        -: 2969:            **    
        -: 2970:            ** For more details of Correlation vs Convolution
see
        -: 2971:            **  
http://www.cs.umd.edu/~djacobs/CMSC426/Convolution.pdf
        -: 2972:            */  
 20889171: 2973:            k = &kernel->values[ kernel->width*kernel->height-1
];
 20889171: 2974:            k_pixels = p; 
 20889171: 2975:            k_indexes = p_indexes;
 20889171: 2976:            if ( ((channel & SyncChannels) == 0 ) ||
 20889171: 2977:                 (image->matte == MagickFalse) )
        -: 2978:              { /* No 'Sync' involved.
        -: 2979:                ** Convolution is simple greyscale channel
operation
        -: 2980:                */
1393983744: 2981:                for (v=0; v < (ssize_t) kernel->height; v++) {
92615427072: 2982:                  for (u=0; u < (ssize_t) kernel->width; u++,
k--) {
91242332499: 2983:                    if ( IsNaN(*k) ) continue;
91242332499: 2984:                    result.red     += (*k)*k_pixels[u].red;
91242332499: 2985:                    result.green   += (*k)*k_pixels[u].green;
91242332499: 2986:                    result.blue    += (*k)*k_pixels[u].blue;
91242332499: 2987:                    result.opacity +=
(*k)*k_pixels[u].opacity;
91242332499: 2988:                    if ( image->colorspace == CMYKColorspace)
    #####: 2989:                      result.index +=
(*k)*GetPixelIndex(k_indexes+u);
        -: 2990:                  }
1373094573: 2991:                  k_pixels += virt_width; 
1373094573: 2992:                  k_indexes += virt_width;
        -: 2993:                }
 20889171: 2994:                if ((channel & RedChannel) != 0)
 32778616: 2995:                  SetPixelRed(q,ClampToQuantum((MagickRealType)
result.red));
 20889171: 2996:                if ((channel & GreenChannel) != 0)
 33106046: 2997:                 
SetPixelGreen(q,ClampToQuantum((MagickRealType) result.green));
 20889171: 2998:                if ((channel & BlueChannel) != 0)
 33557482: 2999:                 
SetPixelBlue(q,ClampToQuantum((MagickRealType) result.blue));
20889171*: 3000:                if (((channel & OpacityChannel) != 0) && 
    #####: 3001:                    (image->matte != MagickFalse))
    #####: 3002:                 
SetPixelOpacity(q,ClampToQuantum((MagickRealType) result.opacity));
 20889171: 3003:                if (((channel & IndexChannel) != 0) &&
        -: 3004:                    (image->colorspace == CMYKColorspace))
    #####: 3005:                 
SetPixelIndex(q_indexes+x,ClampToQuantum(result.index));

So indeed train run and refrate run simply execute different code...
This can't be mitigated with partial-training here, since it is in same
function.  I wonder if we want to come up with some meaningful definition of
aggressive form of partial training where even code within function having
profile which is executed 0 times is somehow considered possibly hot, but have
hard time to think what can be done here.

[Bug middle-end/111551] Fix for PR106081 is not working with profile feedback on imagemagick

Reply via email to