---
.../drivers/swr/rasterizer/jitter/fetch_jit.cpp | 485 +++++++++++++--------
.../drivers/swr/rasterizer/jitter/fetch_jit.h | 24 +-
2 files changed, 336 insertions(+), 173 deletions(-)
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
index 58cafb5..0b805bc 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
@@ -61,13 +61,14 @@ struct FetchJit : public Builder
Value* GetSimdValid8bitIndices(Value* vIndices, Value* pLastIndex);
// package up Shuffle*bpcGatherd args into a tuple for convenience
- typedef std::tuple<Value*&, Value*, const Instruction::CastOps, const
ConversionType,
- uint32_t&, uint32_t&, const ComponentEnable, const
ComponentControl(&)[4], Value*(&)[4],
- const uint32_t (&)[4]> Shuffle8bpcArgs;
+ typedef std::tuple<Value*&, Value*, const Instruction::CastOps, const
ConversionType,
+ uint32_t&, uint32_t&, const ComponentEnable, const
ComponentControl(&)[4], Value*(&)[4],
+ const uint32_t(&)[4], Value*, bool, uint32_t, bool, uint32_t>
Shuffle8bpcArgs;
void Shuffle8bpcGatherd(Shuffle8bpcArgs &args);
typedef std::tuple<Value*(&)[2], Value*, const Instruction::CastOps, const
ConversionType,
- uint32_t&, uint32_t&, const ComponentEnable, const
ComponentControl(&)[4], Value*(&)[4]> Shuffle16bpcArgs;
+ uint32_t&, uint32_t&, const ComponentEnable, const
ComponentControl(&)[4], Value*(&)[4],
+ Value*, bool, uint32_t, bool, uint32_t> Shuffle16bpcArgs;
void Shuffle16bpcGather(Shuffle16bpcArgs &args);
void StoreVertexElements(Value* pVtxOut, const uint32_t outputElt, const
uint32_t numEltsToStore, Value* (&vVertexElements)[4]);
@@ -226,7 +227,7 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE&
fetchState)
/// @brief Loads attributes from memory using LOADs, shuffling the
/// components into SOA form.
/// *Note* currently does not support component control,
-/// component packing, or instancing
+/// component packing, instancing, InstanceID SGVs, or VertexID SGVs
/// @param fetchState - info about attributes to be fetched from memory
/// @param streams - value pointer to the current vertex stream
/// @param vIndices - vector value of indices to load
@@ -786,6 +787,23 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE
&fetchState, Value* f
CreateGatherOddFormats((SWR_FORMAT)ied.Format, pStreamBase,
vOffsets, pResults);
ConvertFormat((SWR_FORMAT)ied.Format, pResults);
+ // check for InstanceID SGV
+ if (fetchState.InstanceIdEnable &&
(fetchState.InstanceIdElementOffset == nInputElt))
+ {
+ SWR_ASSERT(fetchState.InstanceIdComponentNumber <
(sizeof(pResults) / sizeof(pResults[0])));
+
+ // Load a SIMD of InstanceIDs
+ pResults[fetchState.InstanceIdComponentNumber] =
VBROADCAST(LOAD(GEP(fetchInfo, { 0, SWR_FETCH_CONTEXT_CurInstance }))); //
InstanceID
+ }
+ // check for VertexID SGV
+ else if (fetchState.VertexIdEnable &&
(fetchState.VertexIdElementOffset == nInputElt))
+ {
+ SWR_ASSERT(fetchState.VertexIdComponentNumber <
(sizeof(pResults) / sizeof(pResults[0])));
+
+ // Load a SIMD of VertexIDs
+ pResults[fetchState.VertexIdComponentNumber] =
LOAD(GEP(fetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID }));
+ }
+
StoreVertexElements(pVtxOut, outputElt++, 4, pResults);
currentVertexElement = 0;
}
@@ -832,8 +850,13 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE
&fetchState, Value* f
// if we have at least one component to shuffle into place
if(compMask){
+ const bool instanceIdEnable =
(fetchState.InstanceIdEnable) && (fetchState.InstanceIdElementOffset ==
nInputElt);
+ const bool vertexIdEnable =
(fetchState.VertexIdEnable) && (fetchState.VertexIdElementOffset == nInputElt);
+
Shuffle16bpcArgs args =
std::forward_as_tuple(vGatherResult, pVtxOut, Instruction::CastOps::FPExt,
CONVERT_NONE,
-
currentVertexElement, outputElt, compMask, compCtrl, vVertexElements);
+ currentVertexElement, outputElt, compMask,
compCtrl, vVertexElements, fetchInfo, instanceIdEnable,
+ fetchState.InstanceIdComponentNumber,
vertexIdEnable, fetchState.VertexIdComponentNumber);
+
// Shuffle gathered components into place in
simdvertex struct
Shuffle16bpcGather(args); // outputs to
vVertexElements ref
}
@@ -841,30 +864,43 @@ void FetchJit::JitGatherVertices(const
FETCH_COMPILE_STATE &fetchState, Value* f
break;
case 32:
{
- for(uint32_t i = 0; i < 4; i++)
+ for (uint32_t i = 0; i < 4; i++)
{
- if(!isComponentEnabled(compMask, i)){
- // offset base to the next component in the vertex
to gather
- pStreamBase = GEP(pStreamBase, C((char)4));
- continue;
- }
-
- // if we need to gather the component
- if(compCtrl[i] == StoreSrc){
- // save mask as it is zero'd out after each gather
- Value *vMask = vGatherMask;
-
- // Gather a SIMD of vertices
- vVertexElements[currentVertexElement++] =
GATHERPS(gatherSrc, pStreamBase, vOffsets, vMask, C((char)1));
- }
- else{
- vVertexElements[currentVertexElement++] =
GenerateCompCtrlVector(compCtrl[i]);
- }
+ if (isComponentEnabled(compMask, i))
+ {
+ // check for InstanceID SGV
+ if ((fetchState.InstanceIdEnable) &&
(fetchState.InstanceIdElementOffset == nInputElt) &&
(fetchState.InstanceIdComponentNumber == currentVertexElement))
+ {
+ // Load a SIMD of InstanceIDs
+ vVertexElements[currentVertexElement++] =
VBROADCAST(LOAD(GEP(fetchInfo, { 0, SWR_FETCH_CONTEXT_CurInstance }))); //
InstanceID
+ }
+ // check for VertexID SGV
+ else if ((fetchState.VertexIdEnable) &&
(fetchState.VertexIdElementOffset == nInputElt) &&
(fetchState.VertexIdComponentNumber == currentVertexElement))
+ {
+ // Load a SIMD of VertexIDs
+ vVertexElements[currentVertexElement++] =
LOAD(GEP(fetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID }));
+ }
+ // if we need to gather the component
+ else if (compCtrl[i] == StoreSrc)
+ {
+ // save mask as it is zero'd out after each
gather
+ Value *vMask = vGatherMask;
+
+ // Gather a SIMD of vertices
+ vVertexElements[currentVertexElement++] =
GATHERPS(gatherSrc, pStreamBase, vOffsets, vMask, C((char)1));
+ }
+ else
+ {
+ vVertexElements[currentVertexElement++] =
GenerateCompCtrlVector(compCtrl[i]);
+ }
+
+ if (currentVertexElement > 3)
+ {
+ StoreVertexElements(pVtxOut, outputElt++, 4,
vVertexElements);
+ // reset to the next vVertexElement to output
+ currentVertexElement = 0;
+ }
- if(currentVertexElement > 3){
- StoreVertexElements(pVtxOut, outputElt++, 4,
vVertexElements);
- // reset to the next vVertexElement to output
- currentVertexElement = 0;
}
// offset base to the next component in the vertex to
gather
@@ -918,14 +954,20 @@ void FetchJit::JitGatherVertices(const
FETCH_COMPILE_STATE &fetchState, Value* f
case 8:
{
// if we have at least one component to fetch
- if(compMask){
+ if(compMask)
+ {
Value* vGatherResult = GATHERDD(gatherSrc,
pStreamBase, vOffsets, vGatherMask, C((char)1));
// e.g. result of an 8x32bit integer gather for 8bit
components
// 256i - 0 1 2 3 4 5 6 7
// xyzw xyzw xyzw xyzw xyzw xyzw xyzw xyzw
+ const bool instanceIdEnable =
fetchState.InstanceIdEnable && (fetchState.InstanceIdElementOffset ==
nInputElt);
+ const bool vertexIdEnable = fetchState.VertexIdEnable
&& (fetchState.VertexIdElementOffset == nInputElt);
+
Shuffle8bpcArgs args =
std::forward_as_tuple(vGatherResult, pVtxOut, extendCastType, conversionType,
-
currentVertexElement, outputElt, compMask, compCtrl, vVertexElements,
info.swizzle);
+ currentVertexElement, outputElt, compMask,
compCtrl, vVertexElements, info.swizzle, fetchInfo,
+ instanceIdEnable,
fetchState.InstanceIdComponentNumber, vertexIdEnable,
fetchState.VertexIdComponentNumber);
+
// Shuffle gathered components into place in
simdvertex struct
Shuffle8bpcGatherd(args); // outputs to
vVertexElements ref
}
@@ -963,8 +1005,13 @@ void FetchJit::JitGatherVertices(const
FETCH_COMPILE_STATE &fetchState, Value* f
// if we have at least one component to shuffle into place
if(compMask){
+ const bool instanceIdEnable =
fetchState.InstanceIdEnable && (fetchState.InstanceIdElementOffset ==
nInputElt);
+ const bool vertexIdEnable = fetchState.VertexIdEnable
&& (fetchState.VertexIdElementOffset == nInputElt);
+
Shuffle16bpcArgs args =
std::forward_as_tuple(vGatherResult, pVtxOut, extendCastType, conversionType,
-
currentVertexElement, outputElt, compMask, compCtrl, vVertexElements);
+ currentVertexElement, outputElt, compMask,
compCtrl, vVertexElements, fetchInfo, instanceIdEnable,
+ fetchState.InstanceIdComponentNumber,
vertexIdEnable, fetchState.VertexIdComponentNumber);
+
// Shuffle gathered components into place in
simdvertex struct
Shuffle16bpcGather(args); // outputs to
vVertexElements ref
}
@@ -975,33 +1022,46 @@ void FetchJit::JitGatherVertices(const
FETCH_COMPILE_STATE &fetchState, Value* f
SWR_ASSERT(conversionType == CONVERT_NONE);
// Gathered components into place in simdvertex struct
- for(uint32_t i = 0; i < 4; i++)
+ for (uint32_t i = 0; i < 4; i++)
{
- if(!isComponentEnabled(compMask, i)){
- // offset base to the next component in the vertex
to gather
- pStreamBase = GEP(pStreamBase, C((char)4));
- continue;
- }
+ if (isComponentEnabled(compMask, i))
+ {
+ // check for InstanceID SGV
+ if (fetchState.InstanceIdEnable &&
(fetchState.InstanceIdElementOffset == nInputElt) &&
(fetchState.InstanceIdComponentNumber == currentVertexElement))
+ {
+ // Load a SIMD of InstanceIDs
+ vVertexElements[currentVertexElement++] =
VBROADCAST(LOAD(GEP(fetchInfo, { 0, SWR_FETCH_CONTEXT_CurInstance }))); //
InstanceID
+ }
+ // check for VertexID SGV
+ else if (fetchState.VertexIdEnable &&
(fetchState.VertexIdElementOffset == nInputElt) &&
(fetchState.VertexIdComponentNumber == currentVertexElement))
+ {
+ // Load a SIMD of VertexIDs
+ vVertexElements[currentVertexElement++] =
LOAD(GEP(fetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID }));
+ }
+ // if we need to gather the component
+ else if (compCtrl[i] == StoreSrc)
+ {
+ // save mask as it is zero'd out after each
gather
+ Value *vMask = vGatherMask;
+
+ vVertexElements[currentVertexElement++] =
GATHERDD(gatherSrc, pStreamBase, vOffsets, vMask, C((char)1));
+
+ // e.g. result of a single 8x32bit integer
gather for 32bit components
+ // 256i - 0 1 2 3 4 5 6 7
+ // xxxx xxxx xxxx xxxx xxxx xxxx xxxx
xxxx
+ }
+ else
+ {
+ vVertexElements[currentVertexElement++] =
GenerateCompCtrlVector(compCtrl[i]);
+ }
+
+ if (currentVertexElement > 3)
+ {
+ StoreVertexElements(pVtxOut, outputElt++, 4,
vVertexElements);
+ // reset to the next vVertexElement to output
+ currentVertexElement = 0;
+ }
- // if we need to gather the component
- if(compCtrl[i] == StoreSrc){
- // save mask as it is zero'd out after each gather
- Value *vMask = vGatherMask;
-
- vVertexElements[currentVertexElement++] =
GATHERDD(gatherSrc, pStreamBase, vOffsets, vMask, C((char)1));
-
- // e.g. result of a single 8x32bit integer gather
for 32bit components
- // 256i - 0 1 2 3 4 5 6 7
- // xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx
- }
- else{
- vVertexElements[currentVertexElement++] =
GenerateCompCtrlVector(compCtrl[i]);
- }
-
- if(currentVertexElement > 3){
- StoreVertexElements(pVtxOut, outputElt++, 4,
vVertexElements);
- // reset to the next vVertexElement to output
- currentVertexElement = 0;
}
// offset base to the next component in the vertex to
gather
@@ -1140,6 +1200,11 @@ Value* FetchJit::GetSimdValid32bitIndices(Value*
pIndices, Value* pLastIndex)
/// @param compCtrl - component control val
/// @param vVertexElements[4] - vertex components to output
/// @param swizzle[4] - component swizzle location
+/// @param fetchInfo - fetch shader info
+/// @param instanceIdEnable - InstanceID enabled?
+/// @param instanceIdComponentNumber - InstanceID component override
+/// @param vertexIdEnable - VertexID enabled?
+/// @param vertexIdComponentNumber - VertexID component override
void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args)
{
// Unpack tuple args
@@ -1153,6 +1218,11 @@ void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args)
const ComponentControl (&compCtrl)[4] = std::get<7>(args);
Value* (&vVertexElements)[4] = std::get<8>(args);
const uint32_t (&swizzle)[4] = std::get<9>(args);
+ Value *fetchInfo = std::get<10>(args);
+ const bool instanceIdEnable = std::get<11>(args);
+ const uint32_t instanceIdComponentNumber = std::get<12>(args);
+ const bool vertexIdEnable = std::get<13>(args);
+ const uint32_t vertexIdComponentNumber = std::get<14>(args);
// cast types
Type* vGatherTy = mSimdInt32Ty;
@@ -1219,34 +1289,50 @@ void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args)
}
// sign extend all enabled components. If we have a fill
vVertexElements, output to current simdvertex
- for(uint32_t i = 0; i < 4; i++){
- if(!isComponentEnabled(compMask, i)){
- continue;
- }
+ for (uint32_t i = 0; i < 4; i++)
+ {
+ if (isComponentEnabled(compMask, i))
+ {
+ // check for InstanceID SGV
+ if (instanceIdEnable && (instanceIdComponentNumber ==
currentVertexElement))
+ {
+ // Load a SIMD of InstanceIDs
+ vVertexElements[currentVertexElement++] =
VBROADCAST(LOAD(GEP(fetchInfo, { 0, SWR_FETCH_CONTEXT_CurInstance }))); //
InstanceID
+ }
+ // check for VertexID SGV
+ else if (vertexIdEnable && (vertexIdComponentNumber ==
currentVertexElement))
+ {
+ // Load a SIMD of VertexIDs
+ vVertexElements[currentVertexElement++] =
LOAD(GEP(fetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID }));
+ }
+ else if (compCtrl[i] == ComponentControl::StoreSrc)
+ {
+ // if x or z, extract 128bits from lane 0, else for y or
w, extract from lane 1
+ uint32_t lane = ((i == 0) || (i == 2)) ? 0 : 1;
+ // if x or y, use vi128XY permute result, else use vi128ZW
+ Value* selectedPermute = (i < 2) ? vi128XY : vi128ZW;
+
+ // sign extend
+ vVertexElements[currentVertexElement] =
PMOVSXBD(BITCAST(VEXTRACT(selectedPermute, C(lane)), v16x8Ty));
- if(compCtrl[i] == ComponentControl::StoreSrc){
- // if x or z, extract 128bits from lane 0, else for y or w,
extract from lane 1
- uint32_t lane = ((i == 0) || (i == 2)) ? 0 : 1;
- // if x or y, use vi128XY permute result, else use vi128ZW
- Value* selectedPermute = (i < 2) ? vi128XY : vi128ZW;
-
- // sign extend
- vVertexElements[currentVertexElement] =
PMOVSXBD(BITCAST(VEXTRACT(selectedPermute, C(lane)), v16x8Ty));
-
- // denormalize if needed
- if(conversionType != CONVERT_NONE){
- vVertexElements[currentVertexElement] = FMUL(CAST(fpCast,
vVertexElements[currentVertexElement], mSimdFP32Ty), conversionFactor);
+ // denormalize if needed
+ if (conversionType != CONVERT_NONE)
+ {
+ vVertexElements[currentVertexElement] =
FMUL(CAST(fpCast, vVertexElements[currentVertexElement], mSimdFP32Ty),
conversionFactor);
+ }
+ currentVertexElement++;
+ }
+ else
+ {
+ vVertexElements[currentVertexElement++] =
GenerateCompCtrlVector(compCtrl[i]);
}
- currentVertexElement++;
- }
- else{
- vVertexElements[currentVertexElement++] =
GenerateCompCtrlVector(compCtrl[i]);
- }
- if(currentVertexElement > 3){
- StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
- // reset to the next vVertexElement to output
- currentVertexElement = 0;
+ if (currentVertexElement > 3)
+ {
+ StoreVertexElements(pVtxOut, outputElt++, 4,
vVertexElements);
+ // reset to the next vVertexElement to output
+ currentVertexElement = 0;
+ }
}
}
}
@@ -1278,59 +1364,76 @@ void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args)
}
// shuffle enabled components into lower byte of each 32bit lane, 0
extending to 32 bits
- for(uint32_t i = 0; i < 4; i++){
- if(!isComponentEnabled(compMask, i)){
- continue;
- }
-
- if(compCtrl[i] == ComponentControl::StoreSrc){
- // pshufb masks for each component
- Value* vConstMask;
- switch(swizzle[i]){
+ for (uint32_t i = 0; i < 4; i++)
+ {
+ if (isComponentEnabled(compMask, i))
+ {
+ // check for InstanceID SGV
+ if (instanceIdEnable && (instanceIdComponentNumber ==
currentVertexElement))
+ {
+ // Load a SIMD of InstanceIDs
+ vVertexElements[currentVertexElement++] =
VBROADCAST(LOAD(GEP(fetchInfo, { 0, SWR_FETCH_CONTEXT_CurInstance }))); //
InstanceID
+ }
+ // check for VertexID SGV
+ else if (vertexIdEnable && (vertexIdComponentNumber ==
currentVertexElement))
+ {
+ // Load a SIMD of VertexIDs
+ vVertexElements[currentVertexElement++] =
LOAD(GEP(fetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID }));
+ }
+ else if (compCtrl[i] == ComponentControl::StoreSrc)
+ {
+ // pshufb masks for each component
+ Value* vConstMask;
+ switch (swizzle[i])
+ {
case 0:
// x shuffle mask
- vConstMask = C<char>({0, -1, -1, -1, 4, -1, -1, -1, 8,
-1, -1, -1, 12, -1, -1, -1,
- 0, -1, -1, -1, 4, -1, -1, -1, 8,
-1, -1, -1, 12, -1, -1, -1});
+ vConstMask = C<char>({ 0, -1, -1, -1, 4, -1, -1, -1,
8, -1, -1, -1, 12, -1, -1, -1,
+ 0, -1, -1, -1, 4, -1, -1, -1,
8, -1, -1, -1, 12, -1, -1, -1 });
break;
case 1:
// y shuffle mask
- vConstMask = C<char>({1, -1, -1, -1, 5, -1, -1, -1, 9,
-1, -1, -1, 13, -1, -1, -1,
- 1, -1, -1, -1, 5, -1, -1, -1, 9,
-1, -1, -1, 13, -1, -1, -1});
+ vConstMask = C<char>({ 1, -1, -1, -1, 5, -1, -1, -1,
9, -1, -1, -1, 13, -1, -1, -1,
+ 1, -1, -1, -1, 5, -1, -1, -1,
9, -1, -1, -1, 13, -1, -1, -1 });
break;
case 2:
// z shuffle mask
- vConstMask = C<char>({2, -1, -1, -1, 6, -1, -1, -1,
10, -1, -1, -1, 14, -1, -1, -1,
- 2, -1, -1, -1, 6, -1, -1, -1,
10, -1, -1, -1, 14, -1, -1, -1});
+ vConstMask = C<char>({ 2, -1, -1, -1, 6, -1, -1, -1,
10, -1, -1, -1, 14, -1, -1, -1,
+ 2, -1, -1, -1, 6, -1, -1, -1,
10, -1, -1, -1, 14, -1, -1, -1 });
break;
case 3:
// w shuffle mask
- vConstMask = C<char>({3, -1, -1, -1, 7, -1, -1, -1,
11, -1, -1, -1, 15, -1, -1, -1,
- 3, -1, -1, -1, 7, -1, -1, -1,
11, -1, -1, -1, 15, -1, -1, -1});
+ vConstMask = C<char>({ 3, -1, -1, -1, 7, -1, -1, -1,
11, -1, -1, -1, 15, -1, -1, -1,
+ 3, -1, -1, -1, 7, -1, -1, -1,
11, -1, -1, -1, 15, -1, -1, -1 });
break;
default:
vConstMask = nullptr;
break;
- }
+ }
- vVertexElements[currentVertexElement] =
BITCAST(PSHUFB(BITCAST(vGatherResult, v32x8Ty), vConstMask), vGatherTy);
- // after pshufb for x channel
- // 256i - 0 1 2 3 4 5 6 7
- // x000 x000 x000 x000 x000 x000 x000 x000
+ vVertexElements[currentVertexElement] =
BITCAST(PSHUFB(BITCAST(vGatherResult, v32x8Ty), vConstMask), vGatherTy);
+ // after pshufb for x channel
+ // 256i - 0 1 2 3 4 5 6 7
+ // x000 x000 x000 x000 x000 x000 x000 x000
- // denormalize if needed
- if (conversionType != CONVERT_NONE){
- vVertexElements[currentVertexElement] = FMUL(CAST(fpCast,
vVertexElements[currentVertexElement], mSimdFP32Ty), conversionFactor);
+ // denormalize if needed
+ if (conversionType != CONVERT_NONE)
+ {
+ vVertexElements[currentVertexElement] =
FMUL(CAST(fpCast, vVertexElements[currentVertexElement], mSimdFP32Ty),
conversionFactor);
+ }
+ currentVertexElement++;
+ }
+ else
+ {
+ vVertexElements[currentVertexElement++] =
GenerateCompCtrlVector(compCtrl[i]);
}
- currentVertexElement++;
- }
- else{
- vVertexElements[currentVertexElement++] =
GenerateCompCtrlVector(compCtrl[i]);
- }
- if(currentVertexElement > 3){
- StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
- // reset to the next vVertexElement to output
- currentVertexElement = 0;
+ if (currentVertexElement > 3)
+ {
+ StoreVertexElements(pVtxOut, outputElt++, 4,
vVertexElements);
+ // reset to the next vVertexElement to output
+ currentVertexElement = 0;
+ }
}
}
}
@@ -1354,6 +1457,11 @@ void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args)
/// @param compMask - component packing mask
/// @param compCtrl - component control val
/// @param vVertexElements[4] - vertex components to output
+/// @param fetchInfo - fetch shader info
+/// @param instanceIdEnable - InstanceID enabled?
+/// @param instanceIdComponentNumber - InstanceID component override
+/// @param vertexIdEnable - VertexID enabled?
+/// @param vertexIdComponentNumber - VertexID component override
void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs &args)
{
// Unpack tuple args
@@ -1366,6 +1474,11 @@ void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs &args)
const ComponentEnable compMask = std::get<6>(args);
const ComponentControl(&compCtrl)[4] = std::get<7>(args);
Value* (&vVertexElements)[4] = std::get<8>(args);
+ Value *fetchInfo = std::get<9>(args);
+ const bool instanceIdEnable = std::get<10>(args);
+ const uint32_t instanceIdComponentNumber = std::get<11>(args);
+ const bool vertexIdEnable = std::get<12>(args);
+ const uint32_t vertexIdComponentNumber = std::get<13>(args);
// cast types
Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext),
mVWidth);
@@ -1429,43 +1542,57 @@ void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs
&args)
}
// sign extend all enabled components. If we have a fill
vVertexElements, output to current simdvertex
- for(uint32_t i = 0; i < 4; i++){
- if(!isComponentEnabled(compMask, i)){
- continue;
- }
-
- if(compCtrl[i] == ComponentControl::StoreSrc){
- // if x or z, extract 128bits from lane 0, else for y or w,
extract from lane 1
- uint32_t lane = ((i == 0) || (i == 2)) ? 0 : 1;
- // if x or y, use vi128XY permute result, else use vi128ZW
- Value* selectedPermute = (i < 2) ? vi128XY : vi128ZW;
-
- if(bFP) {
- // extract 128 bit lanes to sign extend each component
- vVertexElements[currentVertexElement] =
CVTPH2PS(BITCAST(VEXTRACT(selectedPermute, C(lane)), v8x16Ty));
+ for (uint32_t i = 0; i < 4; i++)
+ {
+ if (isComponentEnabled(compMask, i))
+ {
+ // check for InstanceID SGV
+ if (instanceIdEnable && (instanceIdComponentNumber ==
currentVertexElement))
+ {
+ // Load a SIMD of InstanceIDs
+ vVertexElements[currentVertexElement++] =
VBROADCAST(LOAD(GEP(fetchInfo, { 0, SWR_FETCH_CONTEXT_CurInstance }))); //
InstanceID
+ }
+ // check for VertexID SGV
+ else if (vertexIdEnable && (vertexIdComponentNumber ==
currentVertexElement))
+ {
+ // Load a SIMD of VertexIDs
+ vVertexElements[currentVertexElement++] =
LOAD(GEP(fetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID }));
}
- else {
- // extract 128 bit lanes to sign extend each component
- vVertexElements[currentVertexElement] =
PMOVSXWD(BITCAST(VEXTRACT(selectedPermute, C(lane)), v8x16Ty));
+ else if (compCtrl[i] == ComponentControl::StoreSrc)
+ {
+ // if x or z, extract 128bits from lane 0, else for y or
w, extract from lane 1
+ uint32_t lane = ((i == 0) || (i == 2)) ? 0 : 1;
+ // if x or y, use vi128XY permute result, else use vi128ZW
+ Value* selectedPermute = (i < 2) ? vi128XY : vi128ZW;
+
+ if (bFP) {
+ // extract 128 bit lanes to sign extend each component
+ vVertexElements[currentVertexElement] =
CVTPH2PS(BITCAST(VEXTRACT(selectedPermute, C(lane)), v8x16Ty));
+ }
+ else {
+ // extract 128 bit lanes to sign extend each component
+ vVertexElements[currentVertexElement] =
PMOVSXWD(BITCAST(VEXTRACT(selectedPermute, C(lane)), v8x16Ty));
- // denormalize if needed
- if(conversionType != CONVERT_NONE){
- vVertexElements[currentVertexElement] =
FMUL(CAST(IntToFpCast, vVertexElements[currentVertexElement], mSimdFP32Ty),
conversionFactor);
+ // denormalize if needed
+ if (conversionType != CONVERT_NONE) {
+ vVertexElements[currentVertexElement] =
FMUL(CAST(IntToFpCast, vVertexElements[currentVertexElement], mSimdFP32Ty),
conversionFactor);
+ }
}
+ currentVertexElement++;
+ }
+ else
+ {
+ vVertexElements[currentVertexElement++] =
GenerateCompCtrlVector(compCtrl[i]);
}
- currentVertexElement++;
- }
- else{
- vVertexElements[currentVertexElement++] =
GenerateCompCtrlVector(compCtrl[i]);
- }
- if(currentVertexElement > 3){
- StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
- // reset to the next vVertexElement to output
- currentVertexElement = 0;
+ if (currentVertexElement > 3)
+ {
+ StoreVertexElements(pVtxOut, outputElt++, 4,
vVertexElements);
+ // reset to the next vVertexElement to output
+ currentVertexElement = 0;
+ }
}
}
-
}
// else zero extend
else if ((extendType == Instruction::CastOps::ZExt) || (extendType ==
Instruction::CastOps::UIToFP))
@@ -1509,36 +1636,52 @@ void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs
&args)
}
// shuffle enabled components into lower word of each 32bit lane, 0
extending to 32 bits
- for(uint32_t i = 0; i < 4; i++){
- if(!isComponentEnabled(compMask, i)){
- continue;
- }
-
- if(compCtrl[i] == ComponentControl::StoreSrc){
- // select correct constMask for x/z or y/w pshufb
- uint32_t selectedMask = ((i == 0) || (i == 2)) ? 0 : 1;
- // if x or y, use vi128XY permute result, else use vi128ZW
- uint32_t selectedGather = (i < 2) ? 0 : 1;
+ for (uint32_t i = 0; i < 4; i++)
+ {
+ if (isComponentEnabled(compMask, i))
+ {
+ // check for InstanceID SGV
+ if (instanceIdEnable && (instanceIdComponentNumber ==
currentVertexElement))
+ {
+ // Load a SIMD of InstanceIDs
+ vVertexElements[currentVertexElement++] =
VBROADCAST(LOAD(GEP(fetchInfo, { 0, SWR_FETCH_CONTEXT_CurInstance }))); //
InstanceID
+ }
+ // check for VertexID SGV
+ else if (vertexIdEnable && (vertexIdComponentNumber ==
currentVertexElement))
+ {
+ // Load a SIMD of VertexIDs
+ vVertexElements[currentVertexElement++] =
LOAD(GEP(fetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID }));
+ }
+ else if (compCtrl[i] == ComponentControl::StoreSrc)
+ {
+ // select correct constMask for x/z or y/w pshufb
+ uint32_t selectedMask = ((i == 0) || (i == 2)) ? 0 : 1;
+ // if x or y, use vi128XY permute result, else use vi128ZW
+ uint32_t selectedGather = (i < 2) ? 0 : 1;
- vVertexElements[currentVertexElement] =
BITCAST(PSHUFB(BITCAST(vGatherResult[selectedGather], v32x8Ty),
vConstMask[selectedMask]), vGatherTy);
- // after pshufb mask for x channel; z uses the same shuffle
from the second gather
- // 256i - 0 1 2 3 4 5 6 7
- // xx00 xx00 xx00 xx00 xx00 xx00 xx00 xx00
+ vVertexElements[currentVertexElement] =
BITCAST(PSHUFB(BITCAST(vGatherResult[selectedGather], v32x8Ty),
vConstMask[selectedMask]), vGatherTy);
+ // after pshufb mask for x channel; z uses the same
shuffle from the second gather
+ // 256i - 0 1 2 3 4 5 6 7
+ // xx00 xx00 xx00 xx00 xx00 xx00 xx00 xx00
- // denormalize if needed
- if(conversionType != CONVERT_NONE){
- vVertexElements[currentVertexElement] = FMUL(CAST(fpCast,
vVertexElements[currentVertexElement], mSimdFP32Ty), conversionFactor);
+ // denormalize if needed
+ if (conversionType != CONVERT_NONE)
+ {
+ vVertexElements[currentVertexElement] =
FMUL(CAST(fpCast, vVertexElements[currentVertexElement], mSimdFP32Ty),
conversionFactor);
+ }
+ currentVertexElement++;
+ }
+ else
+ {
+ vVertexElements[currentVertexElement++] =
GenerateCompCtrlVector(compCtrl[i]);
}
- currentVertexElement++;
- }
- else{
- vVertexElements[currentVertexElement++] =
GenerateCompCtrlVector(compCtrl[i]);
- }
- if(currentVertexElement > 3){
- StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
- // reset to the next vVertexElement to output
- currentVertexElement = 0;
+ if (currentVertexElement > 3)
+ {
+ StoreVertexElements(pVtxOut, outputElt++, 4,
vVertexElements);
+ // reset to the next vVertexElement to output
+ currentVertexElement = 0;
+ }
}
}
}
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h
b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h
index ea3625d..12d15d5 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h
@@ -97,13 +97,20 @@ struct FETCH_COMPILE_STATE
SWR_FORMAT indexType;
uint32_t cutIndex{ 0xffffffff };
+ bool InstanceIdEnable;
+ uint32_t InstanceIdElementOffset;
+ uint32_t InstanceIdComponentNumber;
+ bool VertexIdEnable;
+ uint32_t VertexIdElementOffset;
+ uint32_t VertexIdComponentNumber;
+
// Options that effect the JIT'd code
bool bDisableVGATHER; // if enabled, FetchJit will generate
loads/shuffles instead of VGATHERs
bool bDisableIndexOOBCheck; // if enabled, FetchJit will exclude index
OOB check
bool bEnableCutIndex{ false }; // compares indices with the cut index and
returns a cut mask
- FETCH_COMPILE_STATE(bool useVGATHER = false, bool indexOOBCheck = false) :
- bDisableVGATHER(useVGATHER), bDisableIndexOOBCheck(indexOOBCheck){};
+ FETCH_COMPILE_STATE(bool disableVGATHER = false, bool diableIndexOOBCheck
= false):
+ bDisableVGATHER(disableVGATHER),
bDisableIndexOOBCheck(diableIndexOOBCheck){ };
bool operator==(const FETCH_COMPILE_STATE &other) const
{
@@ -114,6 +121,19 @@ struct FETCH_COMPILE_STATE
if (bEnableCutIndex != other.bEnableCutIndex) return false;
if (cutIndex != other.cutIndex) return false;
+ if (InstanceIdEnable != other.InstanceIdEnable) return false;
+ if (InstanceIdEnable)
+ {
+ if (InstanceIdComponentNumber != other.InstanceIdComponentNumber)
return false;
+ if (InstanceIdElementOffset != other.InstanceIdElementOffset)
return false;
+ }
+ if (VertexIdEnable != other.VertexIdEnable) return false;
+ if (VertexIdEnable)
+ {
+ if (VertexIdComponentNumber != other.VertexIdComponentNumber)
return false;
+ if (VertexIdElementOffset != other.VertexIdElementOffset) return
false;
+ }
+
for(uint32_t i = 0; i < numAttribs; ++i)
{
if((layout[i].bits != other.layout[i].bits) ||
--
1.9.1
_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev