Hi, 2014-06-06 8:27 GMT+02:00 Zhao, Yakui <[email protected]>: > On Thu, 2014-06-05 at 17:46 -0600, Gwenole Beauchesne wrote: >> In strict MVC decoding mode, when only the necessary set of inter-view >> reference pictures are passed to the ReferenceFrames array for decoding >> the current picture, we should not re-use a frame store id that might >> be needed for decoding another view component in the same access unit. >> >> One way to solve this problem is to track when the VA surface in a >> specified frame store id was last referenced. So, a "ref_poc" (RefPOC) >> field is introduced to the GenFrameStore struct and is updated whenever >> the surface is being referenced. >> > > The purpose of this patch is to sort the retired free slots so that it > can defer the usage of reference picture later used to hold another new > reference picture in DPB although it is not used when decoding current > frame. > > But I don't think that the POC delta is good way to sort the retired > free_slot.
Agreed, my initial intention behind the GenFrameStoreContext was to store a serial id in it, and use it incrementally (on POC change). But using the POC could still be fine. We are only interested in relative difference in this algorithm. >> Additionally, the list of retired refs candidates (free_refs) is kept >> ordered by increasing RefPOC. That way, we can immediately know what >> is the oldest frame store id to recycle. >> >> Let dPOC = |RefPOC - CurrPOC| > > Why not uses the concept of POC difference in H264 spec? > >> If dPOC > 1, we know for sure that the VA surface is gone ; > > Why do you mean that the VA surface is gone when the dPOC > 1 ? Is it > from the spec? No, it's from my algorithm. :) I think I will go back to the initial GenFrameSoreContext struct as otherwise, the Frame Store logic data is going to be spread over various data structures. Here, for instance, if I want to use a serial, I'd need to add it in the decode_state. Though, that approach would have the benefit to pre-initialize less data for each generation, since it would be fine to have the "age" (s/ref_poc/age BTW) starting off zero, which is the case when the decode_state is calloc()'ed. So, that second option would be a better interim solution. I will test this right now. Regards, Gwenole. > BTW: For one I P B B sequence, the corresponding POC can also be "0, 12, > 4, 8" based on the spec. > > >> If dPOC = 1, the surface could be re-used for inter prediction ; >> If dPOC = 0, the surface could be re-used for inter-view prediction. >> >> In practice, we are only interested in keeping the relative order. >> The exact difference is not maintained due to possible wrap arounds >> that would complicate the algorithm for no benefit. >> >> Signed-off-by: Gwenole Beauchesne <[email protected]> >> --- >> src/gen6_mfd.c | 2 + >> src/gen75_mfd.c | 2 + >> src/gen7_mfd.c | 2 + >> src/gen8_mfd.c | 7 ++-- >> src/i965_avc_bsd.c | 1 + >> src/i965_decoder.h | 1 + >> src/i965_decoder_utils.c | 103 >> +++++++++++++++++++++++++++++++---------------- >> src/i965_media_h264.c | 8 +--- >> src/intel_media.h | 1 + >> src/sysdeps.h | 1 + >> 10 files changed, 83 insertions(+), 45 deletions(-) >> >> diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c >> index 437ad3b..ebf13e5 100755 >> --- a/src/gen6_mfd.c >> +++ b/src/gen6_mfd.c >> @@ -61,6 +61,7 @@ gen6_mfd_init_avc_surface(VADriverContextP ctx, >> >> if (!gen6_avc_surface) { >> gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1); >> + gen6_avc_surface->frame_store_id = -1; >> assert((obj_surface->size & 0x3f) == 0); >> obj_surface->private_data = gen6_avc_surface; >> } >> @@ -1926,6 +1927,7 @@ gen6_dec_hw_context_init(VADriverContextP ctx, struct >> object_config *obj_config) >> gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID; >> gen6_mfd_context->reference_surface[i].frame_store_id = -1; >> gen6_mfd_context->reference_surface[i].obj_surface = NULL; >> + gen6_mfd_context->reference_surface[i].ref_poc = INT_MAX; >> } >> >> gen6_mfd_context->wa_mpeg2_slice_vertical_position = -1; >> diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c >> index 67d4c51..2ebc25b 100644 >> --- a/src/gen75_mfd.c >> +++ b/src/gen75_mfd.c >> @@ -67,6 +67,7 @@ gen75_mfd_init_avc_surface(VADriverContextP ctx, >> >> if (!gen7_avc_surface) { >> gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1); >> + gen7_avc_surface->frame_store_id = -1; >> assert((obj_surface->size & 0x3f) == 0); >> obj_surface->private_data = gen7_avc_surface; >> } >> @@ -3233,6 +3234,7 @@ gen75_dec_hw_context_init(VADriverContextP ctx, struct >> object_config *obj_config >> gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID; >> gen7_mfd_context->reference_surface[i].frame_store_id = -1; >> gen7_mfd_context->reference_surface[i].obj_surface = NULL; >> + gen7_mfd_context->reference_surface[i].ref_poc = INT_MAX; >> } >> >> gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE; >> diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c >> index bda104e..fab83e7 100755 >> --- a/src/gen7_mfd.c >> +++ b/src/gen7_mfd.c >> @@ -65,6 +65,7 @@ gen7_mfd_init_avc_surface(VADriverContextP ctx, >> >> if (!gen7_avc_surface) { >> gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1); >> + gen7_avc_surface->frame_store_id = -1; >> assert((obj_surface->size & 0x3f) == 0); >> obj_surface->private_data = gen7_avc_surface; >> } >> @@ -2678,6 +2679,7 @@ gen7_dec_hw_context_init(VADriverContextP ctx, struct >> object_config *obj_config) >> gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID; >> gen7_mfd_context->reference_surface[i].frame_store_id = -1; >> gen7_mfd_context->reference_surface[i].obj_surface = NULL; >> + gen7_mfd_context->reference_surface[i].ref_poc = INT_MAX; >> } >> >> gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE; >> diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c >> index 065302d..3800fd2 100644 >> --- a/src/gen8_mfd.c >> +++ b/src/gen8_mfd.c >> @@ -27,10 +27,7 @@ >> * >> */ >> >> -#include <stdio.h> >> -#include <stdlib.h> >> -#include <string.h> >> -#include <assert.h> >> +#include "sysdeps.h" >> #include <math.h> >> #include <va/va_dec_jpeg.h> >> #include <va/va_dec_vp8.h> >> @@ -74,6 +71,7 @@ gen8_mfd_init_avc_surface(VADriverContextP ctx, >> >> if (!gen7_avc_surface) { >> gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1); >> + gen7_avc_surface->frame_store_id = -1; >> assert((obj_surface->size & 0x3f) == 0); >> obj_surface->private_data = gen7_avc_surface; >> } >> @@ -3160,6 +3158,7 @@ gen8_dec_hw_context_init(VADriverContextP ctx, struct >> object_config *obj_config) >> for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) { >> gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID; >> gen7_mfd_context->reference_surface[i].frame_store_id = -1; >> + gen7_mfd_context->reference_surface[i].ref_poc = INT_MAX; >> } >> >> gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE; >> diff --git a/src/i965_avc_bsd.c b/src/i965_avc_bsd.c >> index aca3c01..09f7591 100644 >> --- a/src/i965_avc_bsd.c >> +++ b/src/i965_avc_bsd.c >> @@ -51,6 +51,7 @@ i965_avc_bsd_init_avc_bsd_surface(VADriverContextP ctx, >> >> if (!avc_bsd_surface) { >> avc_bsd_surface = calloc(sizeof(GenAvcSurface), 1); >> + avc_bsd_surface->frame_store_id = -1; >> assert((obj_surface->size & 0x3f) == 0); >> obj_surface->private_data = avc_bsd_surface; >> } >> diff --git a/src/i965_decoder.h b/src/i965_decoder.h >> index 01c093f..c568417 100644 >> --- a/src/i965_decoder.h >> +++ b/src/i965_decoder.h >> @@ -38,6 +38,7 @@ typedef struct gen_frame_store GenFrameStore; >> struct gen_frame_store { >> VASurfaceID surface_id; >> int frame_store_id; >> + int ref_poc; /* only used for H.264 on earlier generations >> (<HSW) */ >> struct object_surface *obj_surface; >> }; >> >> diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c >> index 7833919..4accbc9 100644 >> --- a/src/i965_decoder_utils.c >> +++ b/src/i965_decoder_utils.c >> @@ -26,6 +26,7 @@ >> #include <alloca.h> >> >> #include "intel_batchbuffer.h" >> +#include "intel_media.h" >> #include "i965_drv_video.h" >> #include "i965_decoder_utils.h" >> #include "i965_defines.h" >> @@ -254,6 +255,21 @@ avc_gen_default_iq_matrix(VAIQMatrixBufferH264 >> *iq_matrix) >> memset(&iq_matrix->ScalingList8x8, 16, >> sizeof(iq_matrix->ScalingList8x8)); >> } >> >> +/* Returns the POC of the supplied VA picture */ >> +static int >> +avc_get_picture_poc(const VAPictureH264 *va_pic) >> +{ >> + int structure, field_poc[2]; >> + >> + structure = va_pic->flags & >> + (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD); >> + field_poc[0] = structure != VA_PICTURE_H264_BOTTOM_FIELD ? >> + va_pic->TopFieldOrderCnt : INT_MAX; >> + field_poc[1] = structure != VA_PICTURE_H264_TOP_FIELD ? >> + va_pic->BottomFieldOrderCnt : INT_MAX; >> + return MIN(field_poc[0], field_poc[1]); >> +} >> + >> /* Returns a unique picture ID that represents the supplied VA surface >> object */ >> int >> avc_get_picture_id(struct object_surface *obj_surface) >> @@ -471,6 +487,20 @@ gen6_send_avc_ref_idx_state( >> ); >> } >> >> +/* Comparison function for sorting out the array of free frame store >> entries */ >> +static int >> +compare_avc_ref_store_func(const void *p1, const void *p2) >> +{ >> + const GenFrameStore * const fs1 = *((GenFrameStore **)p1); >> + const GenFrameStore * const fs2 = *((GenFrameStore **)p2); >> + >> + if (fs1->ref_poc == INT_MAX) >> + return -1; >> + if (fs2->ref_poc == INT_MAX) >> + return 1; >> + return fs1->ref_poc - fs2->ref_poc; >> +} >> + >> void >> intel_update_avc_frame_store_index( >> VADriverContextP ctx, >> @@ -480,59 +510,62 @@ intel_update_avc_frame_store_index( >> ) >> { >> GenFrameStore *free_refs[MAX_GEN_REFERENCE_FRAMES]; >> - int i, j, n, num_free_refs; >> + uint32_t used_refs = 0, add_refs = 0; >> + int i, n, num_free_refs; >> >> - /* Remove obsolete entries from the internal DPB */ >> - for (i = 0, n = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) { >> - GenFrameStore * const fs = &frame_store[i]; >> - if (fs->surface_id == VA_INVALID_ID || !fs->obj_surface) { >> - free_refs[n++] = fs; >> + const int poc = avc_get_picture_poc(&pic_param->CurrPic); >> + >> + /* Tag entries that are still available in our Frame Store */ >> + for (i = 0; i < ARRAY_ELEMS(decode_state->reference_objects); i++) { >> + struct object_surface * const obj_surface = >> + decode_state->reference_objects[i]; >> + if (!obj_surface) >> continue; >> - } >> >> - // Find whether the current entry is still a valid reference frame >> - for (j = 0; j < ARRAY_ELEMS(decode_state->reference_objects); j++) { >> - struct object_surface * const obj_surface = >> - decode_state->reference_objects[j]; >> - if (obj_surface && obj_surface == fs->obj_surface) >> - break; >> + GenAvcSurface * const avc_surface = obj_surface->private_data; >> + if (avc_surface->frame_store_id >= 0) { >> + GenFrameStore * const fs = >> + &frame_store[avc_surface->frame_store_id]; >> + if (fs->surface_id == obj_surface->base.id) { >> + fs->obj_surface = obj_surface; >> + fs->ref_poc = poc; >> + used_refs |= 1 << fs->frame_store_id; >> + continue; >> + } >> } >> + add_refs |= 1 << i; >> + } >> >> - // ... or remove it >> - if (j == ARRAY_ELEMS(decode_state->reference_objects)) { >> - fs->surface_id = VA_INVALID_ID; >> + /* Build and sort out the list of retired candidates. The resulting >> + list is ordered by increasing POC when they were last used */ >> + for (i = 0, n = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) { >> + if (!(used_refs & (1 << i))) { >> + GenFrameStore * const fs = &frame_store[i]; >> fs->obj_surface = NULL; >> - fs->frame_store_id = -1; >> free_refs[n++] = fs; >> } >> } >> num_free_refs = n; >> + qsort(&free_refs[0], n, sizeof(free_refs[0]), >> compare_avc_ref_store_func); >> >> /* Append the new reference frames */ >> for (i = 0, n = 0; i < ARRAY_ELEMS(decode_state->reference_objects); >> i++) { >> struct object_surface * const obj_surface = >> decode_state->reference_objects[i]; >> - if (!obj_surface) >> + if (!obj_surface || !(add_refs & (1 << i))) >> continue; >> >> - // Find whether the current frame is not already in our frame store >> - for (j = 0; j < MAX_GEN_REFERENCE_FRAMES; j++) { >> - GenFrameStore * const fs = &frame_store[j]; >> - if (fs->obj_surface == obj_surface) >> - break; >> - } >> - >> - // ... or add it >> - if (j == MAX_GEN_REFERENCE_FRAMES) { >> - if (n < num_free_refs) { >> - GenFrameStore * const fs = free_refs[n++]; >> - fs->surface_id = obj_surface->base.id; >> - fs->obj_surface = obj_surface; >> - fs->frame_store_id = fs - frame_store; >> - continue; >> - } >> - WARN_ONCE("No free slot found for DPB reference list!!!\n"); >> + GenAvcSurface * const avc_surface = obj_surface->private_data; >> + if (n < num_free_refs) { >> + GenFrameStore * const fs = free_refs[n++]; >> + fs->surface_id = obj_surface->base.id; >> + fs->obj_surface = obj_surface; >> + fs->frame_store_id = fs - frame_store; >> + fs->ref_poc = poc; >> + avc_surface->frame_store_id = fs->frame_store_id; >> + continue; >> } >> + WARN_ONCE("No free slot found for DPB reference list!!!\n"); >> } >> } >> >> diff --git a/src/i965_media_h264.c b/src/i965_media_h264.c >> index 8ec7e4f..4b37fcb 100644 >> --- a/src/i965_media_h264.c >> +++ b/src/i965_media_h264.c >> @@ -1,9 +1,4 @@ >> -#include <stdlib.h> >> -#include <stdio.h> >> -#include <string.h> >> -#include <assert.h> >> - >> - >> +#include "sysdeps.h" >> #include "intel_batchbuffer.h" >> #include "intel_driver.h" >> >> @@ -870,6 +865,7 @@ i965_media_h264_dec_context_init(VADriverContextP ctx, >> struct i965_media_context >> for (i = 0; i < 16; i++) { >> i965_h264_context->fsid_list[i].surface_id = VA_INVALID_ID; >> i965_h264_context->fsid_list[i].frame_store_id = -1; >> + i965_h264_context->fsid_list[i].ref_poc = INT_MAX; >> } >> >> i965_h264_context->batch = media_context->base.batch; >> diff --git a/src/intel_media.h b/src/intel_media.h >> index b30740a..55136d6 100644 >> --- a/src/intel_media.h >> +++ b/src/intel_media.h >> @@ -39,6 +39,7 @@ struct gen_avc_surface >> dri_bo *dmv_top; >> dri_bo *dmv_bottom; >> int dmv_bottom_flag; >> + int frame_store_id; /* only used for H.264 on earlier generations >> (<HSW) */ >> }; >> >> extern void gen_free_avc_surface(void **data); >> diff --git a/src/sysdeps.h b/src/sysdeps.h >> index 71bfb4d..cc7144a 100644 >> --- a/src/sysdeps.h >> +++ b/src/sysdeps.h >> @@ -43,5 +43,6 @@ >> #include <string.h> >> #include <stdint.h> >> #include <assert.h> >> +#include <limits.h> >> >> #endif /* SYSDEPS_H */ > > _______________________________________________ Libva mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/libva
