https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111551
--- Comment #4 from Jan Hubicka <hubicka at gcc dot gnu.org> --- >From gcov dump, the normal train run exercises loop: 742632: 2953: switch ( method ) { 742632: 2954: case ConvolveMorphology: -: 2955: /* Weighted Average of pixels using reflected kernel -: 2956: ** -: 2957: ** NOTE for correct working of this operation for asymetrical -: 2958: ** kernels, the kernel needs to be applied in its reflected form. -: 2959: ** That is its values needs to be reversed. -: 2960: ** -: 2961: ** Correlation is actually the same as this but without reflecting -: 2962: ** the kernel, and thus 'lower-level' that Convolution. However -: 2963: ** as Convolution is the more common method used, and it does not -: 2964: ** really cost us much in terms of processing to use a reflected -: 2965: ** kernel, so it is Convolution that is implemented. -: 2966: ** -: 2967: ** Correlation will have its kernel reflected before calling -: 2968: ** this function to do a Convolve. -: 2969: ** -: 2970: ** For more details of Correlation vs Convolution see -: 2971: ** http://www.cs.umd.edu/~djacobs/CMSC426/Convolution.pdf -: 2972: */ 742632: 2973: k = &kernel->values[ kernel->width*kernel->height-1 ]; 742632: 2974: k_pixels = p; 742632: 2975: k_indexes = p_indexes; 742632: 2976: if ( ((channel & SyncChannels) == 0 ) || 742632: 2977: (image->matte == MagickFalse) ) -: 2978: { /* No 'Sync' involved. -: 2979: ** Convolution is simple greyscale channel operation -: 2980: */ #####: 2981: for (v=0; v < (ssize_t) kernel->height; v++) { #####: 2982: for (u=0; u < (ssize_t) kernel->width; u++, k--) { #####: 2983: if ( IsNaN(*k) ) continue; #####: 2984: result.red += (*k)*k_pixels[u].red; #####: 2985: result.green += (*k)*k_pixels[u].green; #####: 2986: result.blue += (*k)*k_pixels[u].blue; #####: 2987: result.opacity += (*k)*k_pixels[u].opacity; #####: 2988: if ( image->colorspace == CMYKColorspace) #####: 2989: result.index += (*k)*GetPixelIndex(k_indexes+u); -: 2990: } #####: 2991: k_pixels += virt_width; #####: 2992: k_indexes += virt_width; -: 2993: } #####: 2994: if ((channel & RedChannel) != 0) #####: 2995: SetPixelRed(q,ClampToQuantum((MagickRealType) result.red)); #####: 2996: if ((channel & GreenChannel) != 0) #####: 2997: SetPixelGreen(q,ClampToQuantum((MagickRealType) result.green)); #####: 2998: if ((channel & BlueChannel) != 0) #####: 2999: SetPixelBlue(q,ClampToQuantum((MagickRealType) result.blue)); #####: 3000: if (((channel & OpacityChannel) != 0) && #####: 3001: (image->matte != MagickFalse)) #####: 3002: SetPixelOpacity(q,ClampToQuantum((MagickRealType) result.opacity)); #####: 3003: if (((channel & IndexChannel) != 0) && -: 3004: (image->colorspace == CMYKColorspace)) #####: 3005: SetPixelIndex(q_indexes+x,ClampToQuantum(result.index)); -: 3006: } -: 3007: else -: 3008: { /* Channel 'Sync' Flag, and Alpha Channel enabled. -: 3009: ** Weight the color channels with Alpha Channel so that -: 3010: ** transparent pixels are not part of the results. -: 3011: */ -: 3012: double -: 3013: alpha, /* alpha weighting for colors : alpha */ -: 3014: gamma; /* divisor, sum of color alpha weighting */ -: 3015: -: 3016: size_t -: 3017: count; /* alpha valus collected, number kernel values */ -: 3018: -: 3019: count=0; -: 3020: gamma=0.0; 46090224: 3021: for (v=0; v < (ssize_t) kernel->height; v++) { 9358048944: 3022: for (u=0; u < (ssize_t) kernel->width; u++, k--) { 9312701352: 3023: if ( IsNaN(*k) ) continue; 9312701352: 3024: alpha=QuantumScale*(QuantumRange-k_pixels[u].opacity); 9312701352: 3025: count++; /* number of alpha values collected */ 9312701352: 3026: alpha*=(*k); /* include kernel weighting now */ 9312701352: 3027: gamma += alpha; /* normalize alpha weights only */ 9312701352: 3028: result.red += alpha*k_pixels[u].red; 9312701352: 3029: result.green += alpha*k_pixels[u].green; 9312701352: 3030: result.blue += alpha*k_pixels[u].blue; 9312701352: 3031: result.opacity += (*k)*k_pixels[u].opacity; 9312701352: 3032: if ( image->colorspace == CMYKColorspace) #####: 3033: result.index+=alpha*GetPixelIndex(k_indexes+u); -: 3034: } 45347592: 3035: k_pixels += virt_width; 45347592: 3036: k_indexes += virt_width; -: 3037: } -: 3038: /* Sync'ed channels, all channels are modified */ While refrate run exercises different variant of the loop in same function 20889171: 2954: case ConvolveMorphology: -: 2955: /* Weighted Average of pixels using reflected kernel -: 2956: ** -: 2957: ** NOTE for correct working of this operation for asymetrical -: 2958: ** kernels, the kernel needs to be applied in its reflected form. -: 2959: ** That is its values needs to be reversed. -: 2960: ** -: 2961: ** Correlation is actually the same as this but without reflecting -: 2962: ** the kernel, and thus 'lower-level' that Convolution. However -: 2963: ** as Convolution is the more common method used, and it does not -: 2964: ** really cost us much in terms of processing to use a reflected -: 2965: ** kernel, so it is Convolution that is implemented. -: 2966: ** -: 2967: ** Correlation will have its kernel reflected before calling -: 2968: ** this function to do a Convolve. -: 2969: ** -: 2970: ** For more details of Correlation vs Convolution see -: 2971: ** http://www.cs.umd.edu/~djacobs/CMSC426/Convolution.pdf -: 2972: */ 20889171: 2973: k = &kernel->values[ kernel->width*kernel->height-1 ]; 20889171: 2974: k_pixels = p; 20889171: 2975: k_indexes = p_indexes; 20889171: 2976: if ( ((channel & SyncChannels) == 0 ) || 20889171: 2977: (image->matte == MagickFalse) ) -: 2978: { /* No 'Sync' involved. -: 2979: ** Convolution is simple greyscale channel operation -: 2980: */ 1393983744: 2981: for (v=0; v < (ssize_t) kernel->height; v++) { 92615427072: 2982: for (u=0; u < (ssize_t) kernel->width; u++, k--) { 91242332499: 2983: if ( IsNaN(*k) ) continue; 91242332499: 2984: result.red += (*k)*k_pixels[u].red; 91242332499: 2985: result.green += (*k)*k_pixels[u].green; 91242332499: 2986: result.blue += (*k)*k_pixels[u].blue; 91242332499: 2987: result.opacity += (*k)*k_pixels[u].opacity; 91242332499: 2988: if ( image->colorspace == CMYKColorspace) #####: 2989: result.index += (*k)*GetPixelIndex(k_indexes+u); -: 2990: } 1373094573: 2991: k_pixels += virt_width; 1373094573: 2992: k_indexes += virt_width; -: 2993: } 20889171: 2994: if ((channel & RedChannel) != 0) 32778616: 2995: SetPixelRed(q,ClampToQuantum((MagickRealType) result.red)); 20889171: 2996: if ((channel & GreenChannel) != 0) 33106046: 2997: SetPixelGreen(q,ClampToQuantum((MagickRealType) result.green)); 20889171: 2998: if ((channel & BlueChannel) != 0) 33557482: 2999: SetPixelBlue(q,ClampToQuantum((MagickRealType) result.blue)); 20889171*: 3000: if (((channel & OpacityChannel) != 0) && #####: 3001: (image->matte != MagickFalse)) #####: 3002: SetPixelOpacity(q,ClampToQuantum((MagickRealType) result.opacity)); 20889171: 3003: if (((channel & IndexChannel) != 0) && -: 3004: (image->colorspace == CMYKColorspace)) #####: 3005: SetPixelIndex(q_indexes+x,ClampToQuantum(result.index)); So indeed train run and refrate run simply execute different code... This can't be mitigated with partial-training here, since it is in same function. I wonder if we want to come up with some meaningful definition of aggressive form of partial training where even code within function having profile which is executed 0 times is somehow considered possibly hot, but have hard time to think what can be done here.