On 04:17 PM - Apr 17 2016, Ilia Mirkin wrote: > On Sun, Apr 17, 2016 at 4:07 PM, Pierre Moreau <[email protected]> wrote: > > Ping :-) > > > > On 10:56 PM - Mar 19 2016, Pierre Moreau wrote: > >> Generating a `cvt u32 $r0 u64 $r1d` or a `cvt u64 $r0d u32 $r2` makes the > >> GPU > >> unhappy. Instead, manually handle the conversion between 64-bit and 32-bit > >> values, and use `cvt` to convert between the original target (resp. source) > >> and 32-bit value. This happens to be the behaviour of NVIDIA's driver. > >> > >> Signed-off-by: Pierre Moreau <[email protected]> > >> --- > >> .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 59 > >> ++++++++++++++++++++++ > >> .../nouveau/codegen/nv50_ir_lowering_nvc0.h | 1 + > >> 2 files changed, 60 insertions(+) > >> > >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > >> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > >> index 2719f2c..c419a68 100644 > >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > >> @@ -1859,6 +1859,63 @@ NVC0LoweringPass::handleOUT(Instruction *i) > >> return true; > >> } > >> > >> +bool > >> +NVC0LoweringPass::handleCVT(Instruction *i) > >> +{ > >> + if (isFloatType(i->dType) || isFloatType(i->sType) || > >> + isSignedIntType(i->dType) xor isSignedIntType(i->sType)) > > I know pre-C89 features are cool, but let's avoid using them. I know > characters like ^ were uncommon on the 1960's and 1970's teletypes, > but I think we're past those days now.
Yeah… Will fix that.
>
> >> + return false;
> >> +
> >> + if (typeSizeof(i->sType) == 8) {
> >> + Value *toSplit = i->getSrc(0);
> >> + if (i->saturate) {
> >> + Value *minValue = bld.loadImm(bld.getSSA(8), 0ul);
> >> + Value *maxValue = bld.loadImm(bld.getSSA(8), UINT32_MAX);
> >> + if (isSignedType(i->sType)) {
> >> + minValue = bld.loadImm(bld.getSSA(8), INT32_MIN);
> >> + maxValue = bld.loadImm(bld.getSSA(8), INT32_MAX);
> >> + }
> >> + Value *minRes = bld.mkOp2v(OP_MAX, i->sType, bld.getSSA(8),
> >> toSplit,
> >> + minValue);
> >> + toSplit = bld.mkOp2v(OP_MIN, i->sType, bld.getSSA(8), minRes,
> >> + maxValue);
>
> Aren't you assuming that i->dType == 4 here? It could be an unsigned
> <-> signed conversion, at 64-bit. So the clamp values would be
I am assuming `i->dType <= 4`: remember the ^ from before! ;-) But, it
could be a U64 <=> U64 or S64 <=> S64 conversion, which would then fail…
> different. Handling ALL the cases is quite annoying... can you figure
> out what the hw doesn't support and just handle that? I doubt it'll be
> any slower, and definitely simpler.
I don’t remember if I checked U64 <=> S64 conversions… Will need to refresh my
memory and log which combinations fail.
Thanks!
Pierre
>
> -ilia
>
> >> + }
> >> +
> >> + Value *value32[2] = { bld.getSSA(), bld.getSSA() };
> >> + bld.mkSplit(value32, 4, toSplit);
> >> + if (typeSizeof(i->dType) == 4) {
> >> + bld.mkMov(i->getDef(0), value32[0], i->dType);
> >> + delete_Instruction(prog, i);
> >> + return true;
> >> + }
> >> +
> >> + i->setSrc(0, bld.getSSA());
> >> + i->sType = isSignedIntType(i->dType) ? TYPE_S32 : TYPE_U32;
> >> + bld.mkMov(i->getSrc(0), value32[0], i->sType);
> >> + } else if (typeSizeof(i->dType) == 8) {
> >> + bld.setPosition(i, true);
> >> + Value *res = i->getDef(0);
> >> + Value *high32 = bld.loadImm(bld.getSSA(),
> >> + isSignedType(i->sType) ? UINT32_MAX :
> >> 0u);
> >> + Value *low32 = i->getSrc(0);
> >> + DataType resType = i->dType;
> >> +
> >> + if (typeSizeof(i->sType) <= 2) {
> >> + i->dType = isSignedIntType(i->dType) ? TYPE_S32 : TYPE_U32;
> >> + low32 = bld.getSSA();
> >> + i->setDef(0, low32);
> >> + } else if (typeSizeof(i->sType) == 4) {
> >> + delete_Instruction(prog, i);
> >> + }
> >> +
> >> + Value *merged64 = bld.mkOp2v(OP_MERGE, resType, bld.getSSA(8),
> >> low32,
> >> + high32);
> >> + bld.mkMov(res, merged64, resType);
> >> + }
> >> +
> >> + return true;
> >> +}
> >> +
> >> // Generate a binary predicate if an instruction is predicated by
> >> // e.g. an f32 value.
> >> void
> >> @@ -1894,6 +1951,8 @@ NVC0LoweringPass::visit(Instruction *i)
> >> checkPredicate(i);
> >>
> >> switch (i->op) {
> >> + case OP_CVT:
> >> + return handleCVT(i);
> >> case OP_TEX:
> >> case OP_TXB:
> >> case OP_TXL:
> >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> >> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> >> index 6eb8aff..9fc24d9 100644
> >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> >> @@ -96,6 +96,7 @@ protected:
> >> bool handleMOD(Instruction *);
> >> bool handleSQRT(Instruction *);
> >> bool handlePOW(Instruction *);
> >> + bool handleCVT(Instruction *);
> >> bool handleTEX(TexInstruction *);
> >> bool handleTXD(TexInstruction *);
> >> bool handleTXQ(TexInstruction *);
> >> --
> >> 2.7.4
> >>
> >> _______________________________________________
> >> mesa-dev mailing list
> >> [email protected]
> >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> >
> > _______________________________________________
> > mesa-dev mailing list
> > [email protected]
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> >
> _______________________________________________
> mesa-dev mailing list
> [email protected]
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
signature.asc
Description: PGP signature
_______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
