fs: Introduce regioning lowering pass.

Francisco Jerez Sat, 05 Jan 2019 13:51:44 -0800

Francisco Jerez <[email protected]> writes:

> Iago Toral <[email protected]> writes:
>
>> On Sat, 2018-12-29 at 12:39 -0800, Francisco Jerez wrote:
>>> This legalization pass is meant to handle situations where the source
>>> or destination regioning controls of an instruction are unsupported
>>> by
>>> the hardware and need to be lowered away into separate instructions.
>>> This should be more reliable and future-proof than the current
>>> approach of handling CHV/BXT restrictions manually all over the
>>> visitor.  The same mechanism is leveraged to lower unsupported type
>>> conversions easily, which obsoletes the lower_conversions pass.
>>> ---
>>>  src/intel/Makefile.sources                    |   1 +
>>>  src/intel/compiler/brw_fs.cpp                 |   5 +-
>>>  src/intel/compiler/brw_fs.h                   |  21 +-
>>>  src/intel/compiler/brw_fs_lower_regioning.cpp | 382
>>> ++++++++++++++++++
>>>  src/intel/compiler/brw_ir_fs.h                |  10 +
>>>  src/intel/compiler/meson.build                |   1 +
>>>  6 files changed, 401 insertions(+), 19 deletions(-)
>>>  create mode 100644 src/intel/compiler/brw_fs_lower_regioning.cpp
>>> 
>>> diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources
>>> index 5e7d32293b7..6b9874d2b80 100644
>>> --- a/src/intel/Makefile.sources
>>> +++ b/src/intel/Makefile.sources
>>> @@ -64,6 +64,7 @@ COMPILER_FILES = \
>>>     compiler/brw_fs_live_variables.h \
>>>     compiler/brw_fs_lower_conversions.cpp \
>>>     compiler/brw_fs_lower_pack.cpp \
>>> +   compiler/brw_fs_lower_regioning.cpp \
>>>     compiler/brw_fs_nir.cpp \
>>>     compiler/brw_fs_reg_allocate.cpp \
>>>     compiler/brw_fs_register_coalesce.cpp \
>>> diff --git a/src/intel/compiler/brw_fs.cpp
>>> b/src/intel/compiler/brw_fs.cpp
>>> index 889509badab..caa7a798332 100644
>>> --- a/src/intel/compiler/brw_fs.cpp
>>> +++ b/src/intel/compiler/brw_fs.cpp
>>> @@ -6471,7 +6471,10 @@ fs_visitor::optimize()
>>>        OPT(dead_code_eliminate);
>>>     }
>>>  
>>> -   if (OPT(lower_conversions)) {
>>> +   progress = false;
>>> +   OPT(lower_conversions);
>>> +   OPT(lower_regioning);
>>> +   if (progress) {
>>>        OPT(opt_copy_propagation);
>>>        OPT(dead_code_eliminate);
>>>        OPT(lower_simd_width);
>>> diff --git a/src/intel/compiler/brw_fs.h
>>> b/src/intel/compiler/brw_fs.h
>>> index dc36ecc21ac..36825754931 100644
>>> --- a/src/intel/compiler/brw_fs.h
>>> +++ b/src/intel/compiler/brw_fs.h
>>> @@ -164,6 +164,7 @@ public:
>>>     void lower_uniform_pull_constant_loads();
>>>     bool lower_load_payload();
>>>     bool lower_pack();
>>> +   bool lower_regioning();
>>>     bool lower_conversions();
>>>     bool lower_logical_sends();
>>>     bool lower_integer_multiplication();
>>> @@ -536,24 +537,8 @@ namespace brw {
>>>        }
>>>     }
>>>  
>>> -   /**
>>> -    * Remove any modifiers from the \p i-th source region of the
>>> instruction,
>>> -    * including negate, abs and any implicit type conversion to the
>>> execution
>>> -    * type.  Instead any source modifiers will be implemented as a
>>> separate
>>> -    * MOV instruction prior to the original instruction.
>>> -    */
>>> -   inline bool
>>> -   lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst
>>> *inst, unsigned i)
>>> -   {
>>> -      assert(inst->components_read(i) == 1);
>>> -      const fs_builder ibld(v, block, inst);
>>> -      const fs_reg tmp = ibld.vgrf(get_exec_type(inst));
>>> -
>>> -      ibld.MOV(tmp, inst->src[i]);
>>> -      inst->src[i] = tmp;
>>> -
>>> -      return true;
>>> -   }
>>> +   bool
>>> +   lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst
>>> *inst, unsigned i);
>>>  }
>>>  
>>>  void shuffle_from_32bit_read(const brw::fs_builder &bld,
>>> diff --git a/src/intel/compiler/brw_fs_lower_regioning.cpp
>>> b/src/intel/compiler/brw_fs_lower_regioning.cpp
>>> new file mode 100644
>>> index 00000000000..9578622401d
>>> --- /dev/null
>>> +++ b/src/intel/compiler/brw_fs_lower_regioning.cpp
>>> @@ -0,0 +1,382 @@
>>> +/*
>>> + * Copyright © 2018 Intel Corporation
>>> + *
>>> + * Permission is hereby granted, free of charge, to any person
>>> obtaining a
>>> + * copy of this software and associated documentation files (the
>>> "Software"),
>>> + * to deal in the Software without restriction, including without
>>> limitation
>>> + * the rights to use, copy, modify, merge, publish, distribute,
>>> sublicense,
>>> + * and/or sell copies of the Software, and to permit persons to whom
>>> the
>>> + * Software is furnished to do so, subject to the following
>>> conditions:
>>> + *
>>> + * The above copyright notice and this permission notice (including
>>> the next
>>> + * paragraph) shall be included in all copies or substantial
>>> portions of the
>>> + * Software.
>>> + *
>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>>> EXPRESS OR
>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>>> MERCHANTABILITY,
>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO
>>> EVENT SHALL
>>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
>>> OR OTHER
>>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>>> ARISING
>>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>>> OTHER DEALINGS
>>> + * IN THE SOFTWARE.
>>> + */
>>> +
>>> +#include "brw_fs.h"
>>> +#include "brw_cfg.h"
>>> +#include "brw_fs_builder.h"
>>> +
>>> +using namespace brw;
>>> +
>>> +namespace {
>>> +   /* From the SKL PRM Vol 2a, "Move":
>>> +    *
>>> +    * "A mov with the same source and destination type, no source
>>> modifier,
>>> +    *  and no saturation is a raw move. A packed byte destination
>>> region (B
>>> +    *  or UB type with HorzStride == 1 and ExecSize > 1) can only be
>>> written
>>> +    *  using raw move."
>>> +    */
>>> +   bool
>>> +   is_byte_raw_mov(const fs_inst *inst)
>>> +   {
>>> +      return type_sz(inst->dst.type) == 1 &&
>>> +             inst->opcode == BRW_OPCODE_MOV &&
>>> +             inst->src[0].type == inst->dst.type &&
>>> +             !inst->saturate &&
>>> +             !inst->src[0].negate &&
>>> +             !inst->src[0].abs;
>>> +   }
>>> +
>>> +   /*
>>> +    * Return an acceptable byte stride for the destination of an
>>> instruction
>>> +    * that requires it to have some particular alignment.
>>> +    */
>>> +   unsigned
>>> +   required_dst_byte_stride(const fs_inst *inst)
>>> +   {
>>> +      if (type_sz(inst->dst.type) < get_exec_type_size(inst) &&
>>> +          !is_byte_raw_mov(inst)) {
>>> +         return get_exec_type_size(inst);
>>> +      } else {
>>> +         unsigned stride = inst->dst.stride * type_sz(inst-
>>> >dst.type);
>>> +
>>> +         for (unsigned i = 0; i < inst->sources; i++) {
>>> +            if (!is_uniform(inst->src[i]))
>>> +               stride = MAX2(stride, inst->src[i].stride *
>>> +                             type_sz(inst->src[i].type));
>>> +         }
>>> +
>>> +         return stride;
>>> +      }
>>> +   }
>>> +
>>> +   /*
>>> +    * Return an acceptable byte sub-register offset for the
>>> destination of an
>>> +    * instruction that requires it to be aligned to the sub-register 
>>> offset of
>>> +    * the sources.
>>> +    */
>>> +   unsigned
>>> +   required_dst_byte_offset(const fs_inst *inst)
>>> +   {
>>> +      for (unsigned i = 0; i < inst->sources; i++) {
>>> +         if (!is_uniform(inst->src[i]))
>>> +            if (reg_offset(inst->src[i]) % REG_SIZE !=
>>> +                reg_offset(inst->dst) % REG_SIZE)
>>> +               return 0;
>>> +      }
>>> +
>>> +      return reg_offset(inst->dst) % REG_SIZE;
>>> +   }
>>> +
>>> +   /*
>>> +    * Return whether the instruction has an unsupported channel bit
>>> layout
>>> +    * specified for the i-th source region.
>>> +    */
>>> +   bool
>>> +   has_invalid_src_region(const gen_device_info *devinfo, const
>>> fs_inst *inst,
>>> +                          unsigned i)
>>> +   {
>>> +      if (is_unordered(inst)) {
>>> +         return false;
>>> +      } else {
>>> +         const unsigned dst_byte_stride = inst->dst.stride *
>>> type_sz(inst->dst.type);
>>> +         const unsigned src_byte_stride = inst->src[i].stride *
>>> +            type_sz(inst->src[i].type);
>>> +         const unsigned dst_byte_offset = reg_offset(inst->dst) %
>>> REG_SIZE;
>>> +         const unsigned src_byte_offset = reg_offset(inst->src[i]) %
>>> REG_SIZE;
>>> +
>>> +         return has_dst_aligned_region_restriction(devinfo, inst) &&
>>> +                !is_uniform(inst->src[i]) &&
>>> +                (src_byte_stride != dst_byte_stride ||
>>> +                 src_byte_offset != dst_byte_offset);
>>> +      }
>>> +   }
>>> +
>>> +   /*
>>> +    * Return whether the instruction has an unsupported channel bit
>>> layout
>>> +    * specified for the destination region.
>>> +    */
>>> +   bool
>>> +   has_invalid_dst_region(const gen_device_info *devinfo,
>>> +                          const fs_inst *inst)
>>> +   {
>>> +      if (is_unordered(inst)) {
>>> +         return false;
>>> +      } else {
>>> +         const brw_reg_type exec_type = get_exec_type(inst);
>>> +         const unsigned dst_byte_offset = reg_offset(inst->dst) %
>>> REG_SIZE;
>>> +         const unsigned dst_byte_stride = inst->dst.stride *
>>> type_sz(inst->dst.type);
>>> +         const bool is_narrowing_conversion = !is_byte_raw_mov(inst)
>>> &&
>>> +            type_sz(inst->dst.type) < type_sz(exec_type);
>>> +
>>> +         return (has_dst_aligned_region_restriction(devinfo, inst)
>>> &&
>>> +                 (required_dst_byte_stride(inst) != dst_byte_stride
>>> ||
>>> +                  required_dst_byte_offset(inst) !=
>>> dst_byte_offset)) ||
>>> +                (is_narrowing_conversion &&
>>> +                 required_dst_byte_stride(inst) != dst_byte_stride);
>>> +      }
>>> +   }
>>> +
>>> +   /*
>>> +    * Return whether the instruction has unsupported source
>>> modifiers
>>> +    * specified for the i-th source region.
>>> +    */
>>> +   bool
>>> +   has_invalid_src_modifiers(const gen_device_info *devinfo, const
>>> fs_inst *inst,
>>> +                             unsigned i)
>>> +   {
>>> +      return !inst->can_do_source_mods(devinfo) &&
>>> +             (inst->src[i].negate || inst->src[i].abs);
>>> +   }
>>> +
>>> +   /*
>>> +    * Return whether the instruction has an unsupported type
>>> conversion
>>> +    * specified for the destination.
>>> +    */
>>> +   bool
>>> +   has_invalid_conversion(const gen_device_info *devinfo, const
>>> fs_inst *inst)
>>> +   {
>>> +      switch (inst->opcode) {
>>> +      case BRW_OPCODE_MOV:
>>> +         return false;
>>> +      case BRW_OPCODE_SEL:
>>> +         return inst->dst.type != get_exec_type(inst);
>>> +      case SHADER_OPCODE_BROADCAST:
>>> +      case SHADER_OPCODE_MOV_INDIRECT:
>>> +         /* The source and destination types of these may are hard-
>>> coded to
>>> +          * integer at codegen time due to hardware limitations of
>>> 64-bit
>>> +          * types.
>>> +          */
>>> +         return ((devinfo->gen == 7 && !devinfo->is_haswell) ||
>>> +                 devinfo->is_cherryview ||
>>> gen_device_info_is_9lp(devinfo)) &&
>>> +                type_sz(inst->src[0].type) > 4 &&
>>> +                inst->dst.type != inst->src[0].type;
>>> +      default:
>>> +         /* FIXME: We assume the opcodes don't explicitly mentioned
>>> before
>>> +          * just work fine with arbitrary conversions.
>>> +          */
>>> +         return false;
>>> +      }
>>> +   }
>>> +
>>> +   bool
>>> +   lower_instruction(fs_visitor *v, bblock_t *block, fs_inst *inst);
>>> +}
>>> +
>>> +namespace brw {
>>> +   /**
>>> +    * Remove any modifiers from the \p i-th source region of the
>>> instruction,
>>> +    * including negate, abs and any implicit type conversion to the
>>> execution
>>> +    * type.  Instead any source modifiers will be implemented as a
>>> separate
>>> +    * MOV instruction prior to the original instruction.
>>> +    */
>>> +   bool
>>> +   lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst
>>> *inst, unsigned i)
>>> +   {
>>> +      assert(inst->components_read(i) == 1);
>>> +      const fs_builder ibld(v, block, inst);
>>> +      const fs_reg tmp = ibld.vgrf(get_exec_type(inst));
>>> +
>>> +      lower_instruction(v, block, ibld.MOV(tmp, inst->src[i]));
>>> +      inst->src[i] = tmp;
>>> +
>>> +      return true;
>>> +   }
>>> +}
>>> +
>>> +namespace {
>>> +   /**
>>> +    * Remove any modifiers from the destination region of the
>>> instruction,
>>> +    * including saturate, conditional mod and any implicit type
>>> conversion
>>> +    * from the execution type.  Instead any destination modifiers
>>> will be
>>> +    * implemented as a separate MOV instruction after the original
>>> +    * instruction.
>>> +    */
>>> +   bool
>>> +   lower_dst_modifiers(fs_visitor *v, bblock_t *block, fs_inst
>>> *inst)
>>> +   {
>>> +      const fs_builder ibld(v, block, inst);
>>> +      const brw_reg_type type = get_exec_type(inst);
>>> +      /* Not strictly necessary, but if possible use a temporary
>>> with the same
>>> +       * channel alignment as the current destination in order to
>>> avoid
>>> +       * violating the restrictions enforced later on by
>>> lower_src_region()
>>> +       * and lower_dst_region(), which would introduce additional
>>> copy
>>> +       * instructions into the program unnecessarily.
>>> +       */
>>> +      const unsigned stride =
>>> +         type_sz(inst->dst.type) * inst->dst.stride <= type_sz(type)
>>> ? 1 :
>>> +         type_sz(inst->dst.type) * inst->dst.stride / type_sz(type);
>>> +      const fs_reg tmp = horiz_stride(ibld.vgrf(type, stride),
>>> stride);
>>> +
>>> +      /* Emit a MOV taking care of all the destination modifiers. */
>>> +      fs_inst *mov = ibld.at(block, inst->next).MOV(inst->dst, tmp);
>>> +      mov->saturate = inst->saturate;
>>> +      mov->conditional_mod = inst->conditional_mod;
>>> +      if (inst->opcode != BRW_OPCODE_SEL) {
>>> +         mov->predicate = inst->predicate;
>>> +         mov->predicate_inverse = inst->predicate_inverse;
>>> +      }
>>> +      mov->flag_subreg = inst->flag_subreg;
>>> +      lower_instruction(v, block, mov);
>>> +
>>> +      /* Point the original instruction at the temporary, and clean
>>> up any
>>> +       * destination modifiers.
>>> +       */
>>> +      assert(inst->size_written == inst->dst.component_size(inst-
>>> >exec_size));
>>> +      inst->dst = tmp;
>>> +      inst->size_written = inst->dst.component_size(inst-
>>> >exec_size);
>>> +      inst->saturate = false;
>>> +      inst->conditional_mod = BRW_CONDITIONAL_NONE;
>>
>> I think this is not correct if if inst is a SEL instruction. Here is an
>> example that is causing a regresion for me:
>>
>> With the original lower_conversions pass I have:
>>
>> sel.l(8) vgrf15+0.0:W, vgrf3+0.0:B, vgrf5+0.0:B 
>> mov(8) vgrf16+0.0<2>:B, vgrf15+0.0:W 
>>
>> Now, with this pass this is turned into:
>>
>> sel(8) vgrf15+0.0:W, vgrf3+0.0:B, vgrf5+0.0:B 
>> mov.l.f0.0(8) vgrf16+0.0<2>:B, vgrf15+0.0:W 
>>
>> So I guess the SEL instruction no longer works since it is not
>> predicated and we have removed the conditional modifier as well.
>>
>> Maybe only move the conditional modifier to the MOV when the
>> instruction is not a SEL like we do for the predicate?
>>
>
> Yeah good point, the semantics of conditional mods are inconsistent for
> the SEL instruction -- Fixed up locally in the same way predicates are
> handled for SEL in the same function.
>


It just occurred to me that there are a couple other ISA instructions
with non-standard semantics for the conditional mod that need to be
special-cased here as well.   I'll reply with a fix in a minute.

>>> +      return true;
>>> +   }
>>> +
>>> +   /**
>>> +    * Remove any non-trivial shuffling of data from the \p i-th
>>> source region
>>> +    * of the instruction.  Instead implement the region as a series
>>> of integer
>>> +    * copies into a temporary with the same channel layout as the
>>> destination.
>>> +    */
>>> +   bool
>>> +   lower_src_region(fs_visitor *v, bblock_t *block, fs_inst *inst,
>>> unsigned i)
>>> +   {
>>> +      assert(inst->components_read(i) == 1);
>>> +      const fs_builder ibld(v, block, inst);
>>> +      const unsigned stride = type_sz(inst->dst.type) * inst-
>>> >dst.stride /
>>> +                              type_sz(inst->src[i].type);
>>> +      assert(stride > 0);
>>> +      const fs_reg tmp = horiz_stride(ibld.vgrf(inst->src[i].type,
>>> stride),
>>> +                                      stride);
>>> +
>>> +      /* Emit a series of 32-bit integer copies with any source
>>> modifiers
>>> +       * cleaned up (because their semantics are dependent on the
>>> type).
>>> +       */
>>> +      const brw_reg_type raw_type =
>>> brw_int_type(MIN2(type_sz(tmp.type), 4),
>>> +                                                 false);
>>> +      const unsigned n = type_sz(tmp.type) / type_sz(raw_type);
>>> +      fs_reg raw_src = inst->src[i];
>>> +      raw_src.negate = false;
>>> +      raw_src.abs = false;
>>> +
>>> +      for (unsigned j = 0; j < n; j++)
>>> +         ibld.MOV(subscript(tmp, raw_type, j), subscript(raw_src,
>>> raw_type, j));
>>> +
>>> +      /* Point the original instruction at the temporary, making
>>> sure to keep
>>> +       * any source modifiers in the instruction.
>>> +       */
>>> +      fs_reg lower_src = tmp;
>>> +      lower_src.negate = inst->src[i].negate;
>>> +      lower_src.abs = inst->src[i].abs;
>>> +      inst->src[i] = lower_src;
>>> +
>>> +      return true;
>>> +   }
>>> +
>>> +   /**
>>> +    * Remove any non-trivial shuffling of data from the destination
>>> region of
>>> +    * the instruction.  Instead implement the region as a series of
>>> integer
>>> +    * copies from a temporary with a channel layout compatible with
>>> the
>>> +    * sources.
>>> +    */
>>> +   bool
>>> +   lower_dst_region(fs_visitor *v, bblock_t *block, fs_inst *inst)
>>> +   {
>>> +      const fs_builder ibld(v, block, inst);
>>> +      const unsigned stride = required_dst_byte_stride(inst) /
>>> +                              type_sz(inst->dst.type);
>>> +      assert(stride > 0);
>>> +      const fs_reg tmp = horiz_stride(ibld.vgrf(inst->dst.type,
>>> stride),
>>> +                                      stride);
>>> +
>>> +      /* Emit a series of 32-bit integer copies from the temporary
>>> into the
>>> +       * original destination.
>>> +       */
>>> +      const brw_reg_type raw_type =
>>> brw_int_type(MIN2(type_sz(tmp.type), 4),
>>> +                                                 false);
>>> +      const unsigned n = type_sz(tmp.type) / type_sz(raw_type);
>>> +
>>> +      if (inst->predicate && inst->opcode != BRW_OPCODE_SEL) {
>>> +         /* Note that in general we cannot simply predicate the
>>> copies on the
>>> +          * same flag register as the original instruction, since it
>>> may have
>>> +          * been overwritten by the instruction itself.  Instead
>>> initialize
>>> +          * the temporary with the previous contents of the
>>> destination
>>> +          * register.
>>> +          */
>>> +         for (unsigned j = 0; j < n; j++)
>>> +            ibld.MOV(subscript(tmp, raw_type, j),
>>> +                     subscript(inst->dst, raw_type, j));
>>> +      }
>>> +
>>> +      for (unsigned j = 0; j < n; j++)
>>> +         ibld.at(block, inst->next).MOV(subscript(inst->dst,
>>> raw_type, j),
>>> +                                        subscript(tmp, raw_type,
>>> j));
>>> +
>>> +      /* Point the original instruction at the temporary, making
>>> sure to keep
>>> +       * any destination modifiers in the instruction.
>>> +       */
>>> +      assert(inst->size_written == inst->dst.component_size(inst-
>>> >exec_size));
>>> +      inst->dst = tmp;
>>> +      inst->size_written = inst->dst.component_size(inst-
>>> >exec_size);
>>> +
>>> +      return true;
>>> +   }
>>> +
>>> +   /**
>>> +    * Legalize the source and destination regioning controls of the
>>> specified
>>> +    * instruction.
>>> +    */
>>> +   bool
>>> +   lower_instruction(fs_visitor *v, bblock_t *block, fs_inst *inst)
>>> +   {
>>> +      const gen_device_info *devinfo = v->devinfo;
>>> +      bool progress = false;
>>> +
>>> +      if (has_invalid_conversion(devinfo, inst))
>>> +         progress |= lower_dst_modifiers(v, block, inst);
>>> +
>>> +      if (has_invalid_dst_region(devinfo, inst))
>>> +         progress |= lower_dst_region(v, block, inst);
>>> +
>>> +      for (unsigned i = 0; i < inst->sources; i++) {
>>> +         if (has_invalid_src_modifiers(devinfo, inst, i))
>>> +            progress |= lower_src_modifiers(v, block, inst, i);
>>> +
>>> +         if (has_invalid_src_region(devinfo, inst, i))
>>> +            progress |= lower_src_region(v, block, inst, i);
>>> +      }
>>> +
>>> +      return progress;
>>> +   }
>>> +}
>>> +
>>> +bool
>>> +fs_visitor::lower_regioning()
>>> +{
>>> +   bool progress = false;
>>> +
>>> +   foreach_block_and_inst(block, fs_inst, inst, cfg)
>>> +      progress |= lower_instruction(this, block, inst);
>>> +
>>> +   if (progress)
>>> +      invalidate_live_intervals();
>>> +
>>> +   return progress;
>>> +}
>>> diff --git a/src/intel/compiler/brw_ir_fs.h
>>> b/src/intel/compiler/brw_ir_fs.h
>>> index 5bb92e4cc86..3c23fb375e4 100644
>>> --- a/src/intel/compiler/brw_ir_fs.h
>>> +++ b/src/intel/compiler/brw_ir_fs.h
>>> @@ -486,6 +486,16 @@ get_exec_type_size(const fs_inst *inst)
>>>     return type_sz(get_exec_type(inst));
>>>  }
>>>  
>>> +/**
>>> + * Return whether the instruction isn't an ALU instruction and
>>> cannot be
>>> + * assumed to complete in-order.
>>> + */
>>> +static inline bool
>>> +is_unordered(const fs_inst *inst)
>>> +{
>>> +   return inst->mlen || inst->is_send_from_grf() || inst->is_math();
>>> +}
>>> +
>>>  /**
>>>   * Return whether the following regioning restriction applies to the
>>> specified
>>>   * instruction.  From the Cherryview PRM Vol 7. "Register Region
>>> diff --git a/src/intel/compiler/meson.build
>>> b/src/intel/compiler/meson.build
>>> index 69ce2eab4cf..4af134b418e 100644
>>> --- a/src/intel/compiler/meson.build
>>> +++ b/src/intel/compiler/meson.build
>>> @@ -57,6 +57,7 @@ libintel_compiler_files = files(
>>>    'brw_fs_live_variables.h',
>>>    'brw_fs_lower_conversions.cpp',
>>>    'brw_fs_lower_pack.cpp',
>>> +  'brw_fs_lower_regioning.cpp',
>>>    'brw_fs_nir.cpp',
>>>    'brw_fs_reg_allocate.cpp',
>>>    'brw_fs_register_coalesce.cpp',
> _______________________________________________
> mesa-dev mailing list
> [email protected]
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

signature.asc
Description: PGP signature

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 07/10] intel/fs: Introduce regioning lowering pass.

Reply via email to