On 9/9/2017 1:27 PM, Michael Niedermayer wrote:
+ // If the image is sufficiently aligned, compute 8 samples at once
+ if (!(((uintptr_t)dst) & 7)) {
+ uint64_t *dst64 = (uint64_t *)dst;
+ int w = avctx->width>>1;
+ for (x = 0; x < w; x++) {
+ dst64[x] = (dst64[x] << 3) & 0xFCFCFCFCFCFCFCFCULL;
+ }
+ x *= 8;
+ } else
+ x = 0;
+ for (; x < avctx->width * 4; x++) {
dst[x] = dst[x] << 3;
}
Forgive me if I'm not understanding the code correctly, but couldn't you
always apply the optimization if you align the first (up to) 7 samples?
Pseudocode:
uint64_t *dst64 = (uint64_t *)dst;
int w = avctx->width>>1;
x=0
// compute un-aligned beginning samples
for (; x < (avctx->width * 4) && (((uintptr_t)dst) & 7); x++) {
dst[x] = dst[x] << 3;
}
// compute aligned samples
for (; x < w; x+=8) {
dst64[x] = (dst64[x] << 3) & 0xFCFCFCFCFCFCFCFCULL;
}
x -= 8;
// compute un-aligned ending samples
for (; x < avctx->width * 4; x++) {
dst[x] = dst[x] << 3;
}
_______________________________________________
ffmpeg-devel mailing list
[email protected]
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel