Dana 13. 6. 2015. 18:28 osoba "James Almer" <[email protected]> napisala je: > > Signed-off-by: James Almer <[email protected]> > --- > Only sample i could find using reversible wavelet transform is http://www.fnordware.com/j2k/relax.jp2 > > libavcodec/jpeg2000.c | 1 + > libavcodec/x86/jpeg2000dsp.asm | 36 ++++++++++++++++++++++++++++++++++++ > libavcodec/x86/jpeg2000dsp_init.c | 10 ++++++++++ > 3 files changed, 47 insertions(+) > > diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c > index af24e99..ec00ebc 100644 > --- a/libavcodec/jpeg2000.c > +++ b/libavcodec/jpeg2000.c > @@ -221,6 +221,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp, > if (!comp->f_data) > return AVERROR(ENOMEM); > } else { > + csize += FF_INPUT_BUFFER_PADDING_SIZE / sizeof(*comp->i_data); > comp->f_data = NULL; > comp->i_data = av_mallocz_array(csize, sizeof(*comp->i_data)); > if (!comp->i_data) > diff --git a/libavcodec/x86/jpeg2000dsp.asm b/libavcodec/x86/jpeg2000dsp.asm > index 0d79ab7..712a298 100644 > --- a/libavcodec/x86/jpeg2000dsp.asm > +++ b/libavcodec/x86/jpeg2000dsp.asm > @@ -106,3 +106,39 @@ INIT_XMM sse > ICT_FLOAT 10 > INIT_YMM avx > ICT_FLOAT 9 > + > +;*************************************************************************** > +; ff_rct_int_<opt>(int32_t *src0, int32_t *src1, int32_t *src2, int csize) > +;*************************************************************************** > +%macro RCT_INT 0 > +cglobal rct_int, 4, 4, 4, src0, src1, src2, csize > + shl csized, 2 > + add src0q, csizeq > + add src1q, csizeq > + add src2q, csizeq > + neg csizeq > + > +align 16 > +.loop: > + mova m1, [src1q+csizeq] > + mova m2, [src2q+csizeq] > + mova m0, [src0q+csizeq] > + paddd m3, m1, m2 > + psrad m3, 2 > + psubd m0, m3 > + paddd m1, m0 > + paddd m2, m0 > + mova [src1q+csizeq], m0 > + mova [src2q+csizeq], m1 > + mova [src0q+csizeq], m2 > + add csizeq, mmsize > + jl .loop > + REP_RET > +%endmacro > + > +INIT_XMM sse2 > +RCT_INT > +%if HAVE_AVX2_EXTERNAL > +INIT_YMM avx2 > +RCT_INT > +%endif > diff --git a/libavcodec/x86/jpeg2000dsp_init.c b/libavcodec/x86/jpeg2000dsp_init.c > index 43b9ccd..0dbd2db 100644 > --- a/libavcodec/x86/jpeg2000dsp_init.c > +++ b/libavcodec/x86/jpeg2000dsp_init.c > @@ -26,6 +26,8 @@ > > void ff_ict_float_sse(void *src0, void *src1, void *src2, int csize); > void ff_ict_float_avx(void *src0, void *src1, void *src2, int csize); > +void ff_rct_int_sse2 (void *src0, void *src1, void *src2, int csize); > +void ff_rct_int_avx2 (void *src0, void *src1, void *src2, int csize); > > av_cold void ff_jpeg2000dsp_init_x86(Jpeg2000DSPContext *c) > { > @@ -34,7 +36,15 @@ av_cold void ff_jpeg2000dsp_init_x86(Jpeg2000DSPContext *c) > c->mct_decode[FF_DWT97] = ff_ict_float_sse; > } > > + if (EXTERNAL_SSE2(cpu_flags)) { > + c->mct_decode[FF_DWT53] = ff_rct_int_sse2; > + } > + > if (EXTERNAL_AVX_FAST(cpu_flags)) { > c->mct_decode[FF_DWT97] = ff_ict_float_avx; > } > + > + if (EXTERNAL_AVX2(cpu_flags)) { > + c->mct_decode[FF_DWT53] = ff_rct_int_avx2; > + } > } > -- > 2.4.3 > > _______________________________________________ > ffmpeg-devel mailing list > [email protected] > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
How much is this faster? _______________________________________________ ffmpeg-devel mailing list [email protected] http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
