any update on that?
I could take care of extracting the bits myself, I was just expecting that you would take care of that. On Thu, Jun 28, 2018 at 6:53 PM Manolova, Plamena <[email protected]> wrote: > > Hi Karol, > Thank you for reviewing! I'll go ahead and push the changes you need from > nir_lower_system_values.c to master. > > Thank you, > Pam > > On Thu, Jun 28, 2018 at 5:50 AM, Karol Herbst <[email protected]> wrote: >> >> Hi, >> >> if the changes inside "src/compiler/nir/nir_lower_system_values.c" are >> extracted into a seperate patch, this patch with the equal changes >> would be >> >> Reviewed-by: Karol Herbst <[email protected]> >> >> I would need that for a nir to codegen pass for Nouveau and maybe it >> will help other drivers implementing this extension as well. I don't >> think it would hurt to extract those, right? >> >> Thanks! >> >> On Thu, Jun 7, 2018 at 5:34 PM, Plamena Manolova >> <[email protected]> wrote: >> > This patch adds the implementation of ARB_compute_variable_group_size >> > for i965. We do this by storing the group size in a buffer surface, >> > similarly to the work group number. >> > >> > v2: Fix some indentation inconsistencies (Jordan, Ilia) >> > Do DIV_ROUND_UP correctly in brw_nir_lower_cs_intrinsics.c (Jordan) >> > Use alphabetical order in features.txt (Matt) >> > Set the extension constants properly in brw_context.c >> > >> > Signed-off-by: Plamena Manolova <[email protected]> >> > --- >> > docs/features.txt | 2 +- >> > docs/relnotes/18.2.0.html | 1 + >> > src/compiler/nir/nir_lower_system_values.c | 13 ++++ >> > src/intel/compiler/brw_compiler.h | 2 + >> > src/intel/compiler/brw_fs.cpp | 45 ++++++++---- >> > src/intel/compiler/brw_fs_nir.cpp | 20 ++++++ >> > src/intel/compiler/brw_nir_lower_cs_intrinsics.c | 88 >> > +++++++++++++++++------- >> > src/mesa/drivers/dri/i965/brw_compute.c | 25 ++++++- >> > src/mesa/drivers/dri/i965/brw_context.c | 6 ++ >> > src/mesa/drivers/dri/i965/brw_context.h | 1 + >> > src/mesa/drivers/dri/i965/brw_cs.c | 4 ++ >> > src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 27 +++++++- >> > src/mesa/drivers/dri/i965/intel_extensions.c | 1 + >> > 13 files changed, 193 insertions(+), 42 deletions(-) >> > >> > diff --git a/docs/features.txt b/docs/features.txt >> > index ed4050cf98..81b6663288 100644 >> > --- a/docs/features.txt >> > +++ b/docs/features.txt >> > @@ -298,7 +298,7 @@ Khronos, ARB, and OES extensions that are not part of >> > any OpenGL or OpenGL ES ve >> > >> > GL_ARB_bindless_texture DONE (nvc0, >> > radeonsi) >> > GL_ARB_cl_event not started >> > - GL_ARB_compute_variable_group_size DONE (nvc0, >> > radeonsi) >> > + GL_ARB_compute_variable_group_size DONE (i965, nvc0, >> > radeonsi) >> > GL_ARB_ES3_2_compatibility DONE (i965/gen8+) >> > GL_ARB_fragment_shader_interlock DONE (i965) >> > GL_ARB_gpu_shader_int64 DONE (i965/gen8+, >> > nvc0, radeonsi, softpipe, llvmpipe) >> > diff --git a/docs/relnotes/18.2.0.html b/docs/relnotes/18.2.0.html >> > index 0db37b620d..7475a56633 100644 >> > --- a/docs/relnotes/18.2.0.html >> > +++ b/docs/relnotes/18.2.0.html >> > @@ -52,6 +52,7 @@ Note: some of the new features are only available with >> > certain drivers. >> > >> > <ul> >> > <li>GL_ARB_fragment_shader_interlock on i965</li> >> > +<li>GL_ARB_compute_variable_group_size on i965</li> >> > </ul> >> > >> > <h2>Bug fixes</h2> >> > diff --git a/src/compiler/nir/nir_lower_system_values.c >> > b/src/compiler/nir/nir_lower_system_values.c >> > index 487da04262..7ab005b000 100644 >> > --- a/src/compiler/nir/nir_lower_system_values.c >> > +++ b/src/compiler/nir/nir_lower_system_values.c >> > @@ -57,6 +57,14 @@ convert_block(nir_block *block, nir_builder *b) >> > * gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID" >> > */ >> > >> > + /* >> > + * If the local work group size is variable we can't lower the >> > global >> > + * invocation id here. >> > + */ >> > + if (b->shader->info.cs.local_size_variable) { >> > + break; >> > + } >> > + >> > nir_const_value local_size; >> > memset(&local_size, 0, sizeof(local_size)); >> > local_size.u32[0] = b->shader->info.cs.local_size[0]; >> > @@ -102,6 +110,11 @@ convert_block(nir_block *block, nir_builder *b) >> > } >> > >> > case SYSTEM_VALUE_LOCAL_GROUP_SIZE: { >> > + /* If the local work group size is variable we can't lower it >> > here */ >> > + if (b->shader->info.cs.local_size_variable) { >> > + break; >> > + } >> > + >> > nir_const_value local_size; >> > memset(&local_size, 0, sizeof(local_size)); >> > local_size.u32[0] = b->shader->info.cs.local_size[0]; >> > diff --git a/src/intel/compiler/brw_compiler.h >> > b/src/intel/compiler/brw_compiler.h >> > index 8b4e6fe2e2..f54952c28f 100644 >> > --- a/src/intel/compiler/brw_compiler.h >> > +++ b/src/intel/compiler/brw_compiler.h >> > @@ -759,6 +759,7 @@ struct brw_cs_prog_data { >> > unsigned threads; >> > bool uses_barrier; >> > bool uses_num_work_groups; >> > + bool uses_variable_group_size; >> > >> > struct { >> > struct brw_push_const_block cross_thread; >> > @@ -771,6 +772,7 @@ struct brw_cs_prog_data { >> > * surface indices the CS-specific surfaces >> > */ >> > uint32_t work_groups_start; >> > + uint32_t work_group_size_start; >> > /** @} */ >> > } binding_table; >> > }; >> > diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp >> > index d67c0a4192..28730af47b 100644 >> > --- a/src/intel/compiler/brw_fs.cpp >> > +++ b/src/intel/compiler/brw_fs.cpp >> > @@ -7228,18 +7228,32 @@ brw_compile_cs(const struct brw_compiler >> > *compiler, void *log_data, >> > int shader_time_index, >> > char **error_str) >> > { >> > - prog_data->local_size[0] = src_shader->info.cs.local_size[0]; >> > - prog_data->local_size[1] = src_shader->info.cs.local_size[1]; >> > - prog_data->local_size[2] = src_shader->info.cs.local_size[2]; >> > - unsigned local_workgroup_size = >> > - src_shader->info.cs.local_size[0] * >> > src_shader->info.cs.local_size[1] * >> > - src_shader->info.cs.local_size[2]; >> > - >> > - unsigned min_dispatch_width = >> > - DIV_ROUND_UP(local_workgroup_size, >> > compiler->devinfo->max_cs_threads); >> > - min_dispatch_width = MAX2(8, min_dispatch_width); >> > - min_dispatch_width = util_next_power_of_two(min_dispatch_width); >> > - assert(min_dispatch_width <= 32); >> > + unsigned min_dispatch_width; >> > + >> > + if (!src_shader->info.cs.local_size_variable) { >> > + unsigned local_workgroup_size = >> > + src_shader->info.cs.local_size[0] * >> > src_shader->info.cs.local_size[1] * >> > + src_shader->info.cs.local_size[2]; >> > + >> > + min_dispatch_width = >> > + DIV_ROUND_UP(local_workgroup_size, >> > compiler->devinfo->max_cs_threads); >> > + min_dispatch_width = MAX2(8, min_dispatch_width); >> > + min_dispatch_width = util_next_power_of_two(min_dispatch_width); >> > + assert(min_dispatch_width <= 32); >> > + >> > + prog_data->local_size[0] = src_shader->info.cs.local_size[0]; >> > + prog_data->local_size[1] = src_shader->info.cs.local_size[1]; >> > + prog_data->local_size[2] = src_shader->info.cs.local_size[2]; >> > + prog_data->uses_variable_group_size = false; >> > + } else { >> > + /* >> > + * If the local work group size is variable we have to use a >> > dispatch >> > + * width of 32 here, since at this point we don't know the actual >> > size of >> > + * the workload. >> > + */ >> > + min_dispatch_width = 32; >> > + prog_data->uses_variable_group_size = true; >> > + } >> > >> > fs_visitor *v8 = NULL, *v16 = NULL, *v32 = NULL; >> > cfg_t *cfg = NULL; >> > @@ -7324,7 +7338,12 @@ brw_compile_cs(const struct brw_compiler *compiler, >> > void *log_data, >> > } >> > } else { >> > cfg = v32->cfg; >> > - cs_set_simd_size(prog_data, 32); >> > + if (!src_shader->info.cs.local_size_variable) { >> > + cs_set_simd_size(prog_data, 32); >> > + } else { >> > + prog_data->simd_size = 32; >> > + prog_data->threads = compiler->devinfo->max_cs_threads; >> > + } >> > cs_fill_push_const_info(compiler->devinfo, prog_data); >> > promoted_constants = v32->promoted_constants; >> > } >> > diff --git a/src/intel/compiler/brw_fs_nir.cpp >> > b/src/intel/compiler/brw_fs_nir.cpp >> > index 166da0aa6d..c4948c2347 100644 >> > --- a/src/intel/compiler/brw_fs_nir.cpp >> > +++ b/src/intel/compiler/brw_fs_nir.cpp >> > @@ -3766,6 +3766,26 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder >> > &bld, >> > break; >> > } >> > >> > + case nir_intrinsic_load_local_group_size: { >> > + const unsigned surface = >> > + cs_prog_data->binding_table.work_group_size_start; >> > + >> > + fs_reg surf_index = brw_imm_ud(surface); >> > + brw_mark_surface_used(prog_data, surface); >> > + >> > + /* Read the 3 GLuint components of gl_NumWorkGroups */ >> > + for (unsigned i = 0; i < 3; i++) { >> > + fs_reg read_result = >> > + emit_untyped_read(bld, surf_index, >> > + brw_imm_ud(i << 2), >> > + 1 /* dims */, 1 /* size */, >> > + BRW_PREDICATE_NONE); >> > + read_result.type = dest.type; >> > + bld.MOV(dest, read_result); >> > + dest = offset(dest, bld, 1); >> > + } >> > + break; >> > + } >> > default: >> > nir_emit_intrinsic(bld, instr); >> > break; >> > diff --git a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c >> > b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c >> > index bfbdea0e8f..096e86db19 100644 >> > --- a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c >> > +++ b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c >> > @@ -58,10 +58,12 @@ lower_cs_intrinsics_convert_block(struct >> > lower_intrinsics_state *state, >> > * cs_thread_local_id + subgroup_invocation; >> > */ >> > nir_ssa_def *subgroup_id; >> > - if (state->local_workgroup_size <= state->dispatch_width) >> > + if ((state->local_workgroup_size <= state->dispatch_width) && >> > + !state->nir->info.cs.local_size_variable) { >> > subgroup_id = nir_imm_int(b, 0); >> > - else >> > + } else { >> > subgroup_id = nir_load_subgroup_id(b); >> > + } >> > >> > nir_ssa_def *thread_local_id = >> > nir_imul(b, subgroup_id, nir_imm_int(b, >> > state->dispatch_width)); >> > @@ -84,43 +86,81 @@ lower_cs_intrinsics_convert_block(struct >> > lower_intrinsics_state *state, >> > * (gl_WorkGroupSize.x * gl_WorkGroupSize.y)) % >> > * gl_WorkGroupSize.z; >> > */ >> > - unsigned *size = nir->info.cs.local_size; >> > - >> > nir_ssa_def *local_index = nir_load_local_invocation_index(b); >> > - >> > - nir_const_value uvec3; >> > - memset(&uvec3, 0, sizeof(uvec3)); >> > - uvec3.u32[0] = 1; >> > - uvec3.u32[1] = size[0]; >> > - uvec3.u32[2] = size[0] * size[1]; >> > - nir_ssa_def *div_val = nir_build_imm(b, 3, 32, uvec3); >> > - uvec3.u32[0] = size[0]; >> > - uvec3.u32[1] = size[1]; >> > - uvec3.u32[2] = size[2]; >> > - nir_ssa_def *mod_val = nir_build_imm(b, 3, 32, uvec3); >> > - >> > - sysval = nir_umod(b, nir_udiv(b, local_index, div_val), mod_val); >> > + if (!state->nir->info.cs.local_size_variable) { >> > + unsigned *size = nir->info.cs.local_size; >> > + >> > + nir_const_value uvec3; >> > + memset(&uvec3, 0, sizeof(uvec3)); >> > + uvec3.u32[0] = 1; >> > + uvec3.u32[1] = size[0]; >> > + uvec3.u32[2] = size[0] * size[1]; >> > + nir_ssa_def *div_val = nir_build_imm(b, 3, 32, uvec3); >> > + uvec3.u32[0] = size[0]; >> > + uvec3.u32[1] = size[1]; >> > + uvec3.u32[2] = size[2]; >> > + nir_ssa_def *mod_val = nir_build_imm(b, 3, 32, uvec3); >> > + >> > + sysval = nir_umod(b, nir_udiv(b, local_index, div_val), >> > mod_val); >> > + } else { >> > + nir_ssa_def *group_size_xyz = nir_load_local_group_size(b); >> > + nir_ssa_def *group_size_x = nir_channel(b, group_size_xyz, 0); >> > + nir_ssa_def *group_size_y = nir_channel(b, group_size_xyz, 1); >> > + nir_ssa_def *group_size_z = nir_channel(b, group_size_xyz, 2); >> > + nir_ssa_def *result[3]; >> > + result[0] = nir_umod(b, local_index, group_size_x); >> > + result[1] = nir_umod(b, nir_udiv(b, local_index, >> > group_size_x), >> > + group_size_y); >> > + result[2] = nir_umod(b, nir_udiv(b, local_index, >> > + nir_umul_high(b, group_size_x, group_size_y)), >> > group_size_z); >> > + >> > + sysval = nir_vec(b, result, 3); >> > + } >> > break; >> > } >> > >> > case nir_intrinsic_load_subgroup_id: >> > - if (state->local_workgroup_size > 8) >> > + if (state->local_workgroup_size > 8 || >> > + state->nir->info.cs.local_size_variable) { >> > continue; >> > + } >> > >> > /* For small workgroup sizes, we know subgroup_id will be zero */ >> > sysval = nir_imm_int(b, 0); >> > break; >> > >> > case nir_intrinsic_load_num_subgroups: { >> > - unsigned local_workgroup_size = >> > - nir->info.cs.local_size[0] * nir->info.cs.local_size[1] * >> > - nir->info.cs.local_size[2]; >> > - unsigned num_subgroups = >> > - DIV_ROUND_UP(local_workgroup_size, state->dispatch_width); >> > - sysval = nir_imm_int(b, num_subgroups); >> > + if (!state->nir->info.cs.local_size_variable) { >> > + unsigned num_subgroups; >> > + unsigned local_workgroup_size = >> > + nir->info.cs.local_size[0] * nir->info.cs.local_size[1] * >> > + nir->info.cs.local_size[2]; >> > + num_subgroups = >> > + DIV_ROUND_UP(local_workgroup_size, state->dispatch_width); >> > + sysval = nir_imm_int(b, num_subgroups); >> > + } else { >> > + nir_ssa_def *group_size_xyz = nir_load_local_group_size(b); >> > + nir_ssa_def *group_size_x = nir_channel(b, group_size_xyz, 0); >> > + nir_ssa_def *group_size_y = nir_channel(b, group_size_xyz, 1); >> > + nir_ssa_def *group_size_z = nir_channel(b, group_size_xyz, 2); >> > + nir_ssa_def *group_size = nir_imul(b, group_size_x, >> > nir_imul(b, >> > + group_size_y, group_size_z)); >> > + nir_ssa_def *dispatch_width = nir_imm_int(b, >> > + state->dispatch_width - 1); >> > + >> > + sysval = nir_udiv(b, group_size, dispatch_width); >> > + } >> > break; >> > } >> > >> > + case nir_intrinsic_load_global_invocation_id: { >> > + nir_ssa_def *group_id = nir_load_work_group_id(b); >> > + nir_ssa_def *local_id = nir_load_local_invocation_id(b); >> > + nir_ssa_def *group_size = nir_load_local_group_size(b); >> > + >> > + sysval = nir_iadd(b, nir_imul(b, group_id, group_size), >> > local_id); >> > + break; >> > + } >> > default: >> > continue; >> > } >> > diff --git a/src/mesa/drivers/dri/i965/brw_compute.c >> > b/src/mesa/drivers/dri/i965/brw_compute.c >> > index de08fc3ac1..7949e0ff51 100644 >> > --- a/src/mesa/drivers/dri/i965/brw_compute.c >> > +++ b/src/mesa/drivers/dri/i965/brw_compute.c >> > @@ -121,8 +121,11 @@ brw_emit_gpgpu_walker(struct brw_context *brw) >> > } >> > >> > const unsigned simd_size = prog_data->simd_size; >> > - unsigned group_size = prog_data->local_size[0] * >> > - prog_data->local_size[1] * prog_data->local_size[2]; >> > + unsigned group_size = brw->compute.group_size != NULL ? >> > + brw->compute.group_size[0] * brw->compute.group_size[1] * >> > + brw->compute.group_size[2] : prog_data->local_size[0] * >> > + prog_data->local_size[1] * prog_data->local_size[2]; >> > + >> > unsigned thread_width_max = >> > (group_size + simd_size - 1) / simd_size; >> > >> > @@ -229,6 +232,7 @@ brw_dispatch_compute(struct gl_context *ctx, const >> > GLuint *num_groups) { >> > >> > brw->compute.num_work_groups_bo = NULL; >> > brw->compute.num_work_groups = num_groups; >> > + brw->compute.group_size = NULL; >> > ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS; >> > >> > brw_dispatch_compute_common(ctx); >> > @@ -248,6 +252,22 @@ brw_dispatch_compute_indirect(struct gl_context *ctx, >> > GLintptr indirect) >> > brw->compute.num_work_groups_bo = bo; >> > brw->compute.num_work_groups_offset = indirect; >> > brw->compute.num_work_groups = indirect_group_counts; >> > + brw->compute.group_size = NULL; >> > + ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS; >> > + >> > + brw_dispatch_compute_common(ctx); >> > +} >> > + >> > +static void >> > +brw_dispatch_compute_group_size(struct gl_context *ctx, >> > + const GLuint *num_groups, >> > + const GLuint *group_size) >> > +{ >> > + struct brw_context *brw = brw_context(ctx); >> > + >> > + brw->compute.num_work_groups_bo = NULL; >> > + brw->compute.num_work_groups = num_groups; >> > + brw->compute.group_size = group_size; >> > ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS; >> > >> > brw_dispatch_compute_common(ctx); >> > @@ -258,4 +278,5 @@ brw_init_compute_functions(struct dd_function_table >> > *functions) >> > { >> > functions->DispatchCompute = brw_dispatch_compute; >> > functions->DispatchComputeIndirect = brw_dispatch_compute_indirect; >> > + functions->DispatchComputeGroupSize = brw_dispatch_compute_group_size; >> > } >> > diff --git a/src/mesa/drivers/dri/i965/brw_context.c >> > b/src/mesa/drivers/dri/i965/brw_context.c >> > index 9ced230ec1..25d354e155 100644 >> > --- a/src/mesa/drivers/dri/i965/brw_context.c >> > +++ b/src/mesa/drivers/dri/i965/brw_context.c >> > @@ -766,6 +766,12 @@ brw_initialize_cs_context_constants(struct >> > brw_context *brw) >> > ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations; >> > ctx->Const.MaxComputeWorkGroupInvocations = max_invocations; >> > ctx->Const.MaxComputeSharedMemorySize = 64 * 1024; >> > + >> > + /* ARB_compute_variable_group_size constants */ >> > + ctx->Const.MaxComputeVariableGroupSize[0] = max_invocations; >> > + ctx->Const.MaxComputeVariableGroupSize[1] = max_invocations; >> > + ctx->Const.MaxComputeVariableGroupSize[2] = max_invocations; >> > + ctx->Const.MaxComputeVariableGroupInvocations = max_invocations; >> > } >> > >> > /** >> > diff --git a/src/mesa/drivers/dri/i965/brw_context.h >> > b/src/mesa/drivers/dri/i965/brw_context.h >> > index 2613b9fda2..0fb533c369 100644 >> > --- a/src/mesa/drivers/dri/i965/brw_context.h >> > +++ b/src/mesa/drivers/dri/i965/brw_context.h >> > @@ -931,6 +931,7 @@ struct brw_context >> > struct brw_bo *num_work_groups_bo; >> > GLintptr num_work_groups_offset; >> > const GLuint *num_work_groups; >> > + const GLuint *group_size; >> > } compute; >> > >> > struct { >> > diff --git a/src/mesa/drivers/dri/i965/brw_cs.c >> > b/src/mesa/drivers/dri/i965/brw_cs.c >> > index e3f8fc67a4..007273390b 100644 >> > --- a/src/mesa/drivers/dri/i965/brw_cs.c >> > +++ b/src/mesa/drivers/dri/i965/brw_cs.c >> > @@ -43,6 +43,10 @@ assign_cs_binding_table_offsets(const struct >> > gen_device_info *devinfo, >> > prog_data->binding_table.work_groups_start = next_binding_table_offset; >> > next_binding_table_offset++; >> > >> > + /* May not be used if the work group size is not variable. */ >> > + prog_data->binding_table.work_group_size_start = >> > next_binding_table_offset; >> > + next_binding_table_offset++; >> > + >> > brw_assign_common_binding_table_offsets(devinfo, prog, >> > &prog_data->base, >> > next_binding_table_offset); >> > } >> > diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c >> > b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c >> > index 73cae9ef7c..fa8851e2b4 100644 >> > --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c >> > +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c >> > @@ -1634,7 +1634,7 @@ const struct brw_tracked_state brw_wm_image_surfaces >> > = { >> > }; >> > >> > static void >> > -brw_upload_cs_work_groups_surface(struct brw_context *brw) >> > +brw_upload_cs_variable_surfaces(struct brw_context *brw) >> > { >> > struct gl_context *ctx = &brw->ctx; >> > /* _NEW_PROGRAM */ >> > @@ -1671,6 +1671,29 @@ brw_upload_cs_work_groups_surface(struct >> > brw_context *brw) >> > RELOC_WRITE); >> > brw->ctx.NewDriverState |= BRW_NEW_SURFACES; >> > } >> > + >> > + if (prog && cs_prog_data->uses_variable_group_size) { >> > + const unsigned surf_idx = >> > + cs_prog_data->binding_table.work_group_size_start; >> > + uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx]; >> > + struct brw_bo *bo; >> > + uint32_t bo_offset; >> > + >> > + bo = NULL; >> > + brw_upload_data(&brw->upload, >> > + (void *)brw->compute.group_size, >> > + 3 * sizeof(GLuint), >> > + sizeof(GLuint), >> > + &bo, >> > + &bo_offset); >> > + >> > + brw_emit_buffer_surface_state(brw, surf_offset, >> > + bo, bo_offset, >> > + ISL_FORMAT_RAW, >> > + 3 * sizeof(GLuint), 1, >> > + RELOC_WRITE); >> > + brw->ctx.NewDriverState |= BRW_NEW_SURFACES; >> > + } >> > } >> > >> > const struct brw_tracked_state brw_cs_work_groups_surface = { >> > @@ -1678,5 +1701,5 @@ const struct brw_tracked_state >> > brw_cs_work_groups_surface = { >> > .brw = BRW_NEW_CS_PROG_DATA | >> > BRW_NEW_CS_WORK_GROUPS >> > }, >> > - .emit = brw_upload_cs_work_groups_surface, >> > + .emit = brw_upload_cs_variable_surfaces, >> > }; >> > diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c >> > b/src/mesa/drivers/dri/i965/intel_extensions.c >> > index 5a9369d7b4..f213360ed8 100644 >> > --- a/src/mesa/drivers/dri/i965/intel_extensions.c >> > +++ b/src/mesa/drivers/dri/i965/intel_extensions.c >> > @@ -258,6 +258,7 @@ intelInitExtensions(struct gl_context *ctx) >> > ctx->Extensions.ARB_compute_shader = true; >> > ctx->Extensions.ARB_ES3_1_compatibility = >> > devinfo->gen >= 8 || devinfo->is_haswell; >> > + ctx->Extensions.ARB_compute_variable_group_size = true; >> > } >> > >> > if (can_do_predicate_writes(brw->screen)) { >> > -- >> > 2.11.0 >> > _______________________________________________ >> > mesa-dev mailing list >> > [email protected] >> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > > _______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
