Jerome Glisse wrote:
Well, I'm in mixed minds about this myself now. But if you have a way to gain extraI use a similar approach. v_swiz contains all the native r300 swizzle
values,
aswell as a couple of cases where we have to handle them specially. The
non-native cases have v_swiz->native set to GL_FALSE.
I saw that in code but you still got a loop and test case, your approach is well thinked. But i think it may be hard to understand say if you look at it in 2 or 3 month.
Thus the only + arguments for a simple 2tab translation i propose is that it the easiest thing to understand, moreover you can easily optimize some swizzling case and don't bother too much on other... But beside the understanding of your mixed approach haven't a strong opinion on which solution is the best...
The code is only translated once. In the case of the texenv stuff, whenever
it needs to be regenerated Mesa will call r300ProgramStringNotify to tell
us that the program has changed.
This why i think tab lookup speed isn't really revealent in selecting this. I will try to adapt my swizzle function to your code (shouldn't be difficult) thus you can see it. Bascily arg checking in emit arith look like this :
id = reg & MASK_XYZCHANNEL reg_fpi0_mask = tab1[id]; if (reg_fpi0_mask ^ ffe0 ) { swizzle -> copy tab2[id>>5] -> r300_instruct t = get_temp for i<tab2[id>>5].length r300_instr[p-i] |= t } Just a memcpy of instruction and a small loop to set the correct temp reg allocated.
speed out of this without consuming a heap of RAM, I'm all for it :)
Extra speed could be useful, as some programs may have a lot of swizzles per-frame (UT2004
has a few when MaxTextureUnits is set to 8 in ut2004.ini) and every extra bit helps.
I've attached another patch (applies on top of the last one) which /should/ take
Care must be taken not to cause texture indirections by reusing an
already
used temp as the destination for a TEX instruction (that's what
rp->used_this_node
is/was for)
Yes, i saw this possible issue but didn't think too much on solution to handle it, i will give a deeper look to your code this evening.
care of this, and handle the case where a program does a TEX directly to an
output. It also free's up temps in the cases where we need one for a swizzle, or
an LRP.
What I'd like to be able to do is re-use temps used by the Mesa program, and also
to free-up the hardware temp that a Mesa INPUT uses once they're no longer needed.
We'd need to find out when the temp/input was last used so we didn't destroy it
pre-maturely.
Should be easy enough to do, as my code already pre-parses the Mesa program. I
originally considered this a thing I needed to fix. But it could be useful for some things.
You could possibly do this for some cases right now. In t_src you'd need to skipNo, you haven't missed anything. This was another case where I quickly
hacked
something up. Constant swizzles should/could be handled exactly the same as
temp swizzles. In the swizzling code you just have to be careful that
reg->type is
set correctly depending on whether or not it's a native swizzle.
I was thinking of tracking constant and see if they are always used
swizzle. If so easier to emit the const swizzled like you do. As i said this is maybe a to advanced and complex optimization which
may involve complex tracking of operand in the program.
the emit_const4fv for constant sources if they're swizzles, and call swizzle_const
instead. This would eliminate some of them.
The patch I attached doesn't call swizzle_const at all, instead, it uses the same
method as temps do. I didn't see anything too nasty in ut2004 from doing this.
I didn't test the patch in great detail, so there's probably something I missed :/
Ben Skeggs.
Jerome Glisse
------------------------------------------------------- This SF.Net email is sponsored by Oracle Space Sweepstakes Want to be the first software developer in space? Enter now for the Oracle Space Sweepstakes! http://ads.osdn.com/?ad_idt12&alloc_id344&op=click -- _______________________________________________ Dri-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/dri-devel
--- diff/r300/r300_fragprog.c 2005-05-19 04:23:09.000000000 +1000
+++ r300_wip/r300/r300_fragprog.c 2005-05-20 03:21:23.000000000 +1000
@@ -172,10 +172,21 @@
rp->hwreg_flag |= (1 << r);
rp->hwreg_used |= (1 << r);
-
+
return r;
}
+static void free_hw_temp(struct r300_fragment_program *rp, int idx)
+{
+ rp->hwreg_flag &= ~(1<<idx);
+}
+
+static void free_temp(struct r300_fragment_program *rp, int idx)
+{
+ free_hw_temp(rp, rp->temps[idx]);
+ rp->temp_flag &= ~(1<<idx);
+}
+
static pfs_reg_t emit_const4fv(struct r300_fragment_program *rp, GLfloat *cp)
{
pfs_reg_t r = pfs_default_reg;
@@ -209,6 +220,23 @@
return r;
}
+static pfs_reg_t get_temp_tex(struct r300_fragment_program *rp)
+{
+ pfs_reg_t r = get_temp_reg(rp);
+ int hw_r;
+
+ hw_r = ffs(~(rp->hwreg_flag | rp->used_in_node));
+ if (!hw_r || (--hw_r == r.index))
+ return r;
+
+ free_hw_temp(rp, rp->temps[r.index]);
+ r.index = hw_r;
+
+ rp->hwreg_flag |= (1 << hw_r);
+ rp->hwreg_used |= (1 << hw_r);
+ return r;
+}
+
static pfs_reg_t negate(pfs_reg_t r)
{
r.negate = 1;
@@ -278,9 +306,10 @@
case SWIZZLE_W:
ssrc = get_temp_reg(rp);
src.v_swz = 6; /* wzy */
- if (cnt == 3 && src.index != REG_TYPE_CONST) {
+ if (cnt == 3) {
emit_arith(rp, PFS_OP_MAD, ssrc, WRITEMASK_XYZW, src,
pfs_one, pfs_zero, 0);
r->index = ssrc.index;
+ r->type = src.type;
r->v_swz = 1; /* xxx */
r->has_w = GL_TRUE;
r->valid = GL_TRUE;
@@ -291,6 +320,7 @@
r->valid = GL_TRUE;
ssrc.v_swz = 1; /* xxx */
emit_arith(rp, PFS_OP_MAD, *r, s_mask[mask].mask, ssrc,
pfs_one, pfs_zero, 0);
+ free_temp(rp, ssrc.index); /* don't need this anymore */
}
break;
case SWIZZLE_ONE:
@@ -313,12 +343,6 @@
int v_matched = 0;
int w_swizzle = GET_SWZ(arbswz, 3);
- /* Is this the best way? Or should we just do this as we do
- * for temps..
- */
- if (src.type == REG_TYPE_CONST)
- return swizzle_const(rp, src, arbswz);
-
do {
do {
#define CUR_HASH (v_swiz[r.v_swz].hash & s_mask[c_mask].hash)
@@ -424,9 +448,25 @@
int opcode)
{
pfs_reg_t coord = t_src(rp, fpi->SrcReg[0]);
- pfs_reg_t dest = t_dst(rp, fpi->DstReg);
+ pfs_reg_t dest = pfs_default_reg, rdest = pfs_default_reg;
int unit = fpi->TexSrcUnit;
int hwsrc, hwdest, flags = 0;
+ int to_output = 0;
+
+ switch (fpi->DstReg.File) {
+ case PROGRAM_TEMPORARY:
+ dest.index = fpi->DstReg.Index;
+ dest.valid = GL_TRUE;
+ break;
+ case PROGRAM_OUTPUT: /* AFAIK r300 doesn't support this directly */
+ to_output = 1;
+ dest = get_temp_tex(rp);
+ rdest = t_dst(rp, fpi->DstReg);
+ break;
+ default:
+ ERROR("Unknown DstReg.File for tex result\n");
+ return;
+ }
if ((coord.type == REG_TYPE_TEMP && (rp->used_in_node & (1 <<
coord.index))) ||
(dest.type == REG_TYPE_TEMP && (rp->used_in_node & (1 <<
dest.index)))) {
@@ -469,6 +509,9 @@
| flags;
rp->node[rp->cur_node].tex_end++;
+
+ if (to_output)
+ emit_arith(rp, PFS_OP_MAD, rdest, WRITEMASK_XYZW, dest,
pfs_one, pfs_zero, 0);
}
static void emit_arith(struct r300_fragment_program *rp, int op,
@@ -510,7 +553,7 @@
s_idx = v_idx;
hwsrc[i] = rp->temps[src[i].index];
- rp->used_in_node |= (1 << src[i].index);
+ rp->used_in_node |= (1 << hwsrc[i]);
break;
case REG_TYPE_CONST:
hwsrc[i] = src[i].index;
@@ -525,7 +568,7 @@
switch (dest.type) {
case REG_TYPE_TEMP:
hwdest = rp->temps[dest.index];
- rp->used_in_node |= (1 << src[i].index);
+ rp->used_in_node |= (1 << hwdest);
break;
case REG_TYPE_OUTPUT:
hwdest = 0;
@@ -649,6 +692,7 @@
negate(src0), src2,
src2, 0);
emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg),
fpi->DstReg.WriteMask,
src0, src1, temp,
flags);
+ free_temp(rp, temp.index);
break;
case FP_OPCODE_MAD:
emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg),
fpi->DstReg.WriteMask,
