Jerome Glisse wrote:
On 5/19/05, Keith Whitwell <[EMAIL PROTECTED]> wrote:
Vladimir Dergachev wrote:
Hi Aapo, Ben, Jerome, Nicolai:
I recently checked fresh code from CVS and was pleasantly surprised to see that all Quake3 levels that were broken are now perfect - in fact I cannot find anything that is amiss !
Do you think it would be a good idea to tag the current code and make
a snapshot ?
Sounds like a good idea.
Why not :)
So have you guys given any consideration to moving the r300 driver into
mesa proper? CVS access shouldn't be a problem, fwiw...
I think to few of us have an access to mesa cvs (at leat i didn't have one), anyway i could ask one. But there is still missing parts. I would like to know if anyone know what is still not working and thus do a to do list of that...
What i see missing is : (i may not see everythings :-)
-deeper testing of tcl program generated with mesa
-tex env
-fragment program
Also, I think there's still some weirdness with a couple of texture formats, namely GL_ALPHA and GL_LUMINANCE_ALPHA. This is clearly seen in Mesa/progs/demos/texenv.c.
Someone, I believe it was Aapo, said that they see white lines across the
screen when the framerate is fairly high. I didn't see this up until yesterday
when I had to change from my 9600pro to a 9600XT (I killed the card moving
it between machines somehow).
Does ... work ?I've also been working on some fragment program stuff. I have attached what
-z offset -stencil
Right now i am on pixel shader after doing some test i don't think we can
use a similar stuff like i915 emit arithm, i915 hardware are far more easier
to program than r300. I am coding another approach an hope to have it
done by the end of this week.
I've done so far, which works quite well with Keith's texenv program generation
that's in Mesa cvs. Not all arb_f_p opcodes are implemented, but I think everything's
there that the texenv stuff needs.
I was planning on commiting this soon, but you may have a better approach than I
took so I'll wait a bit.
Ben Skeggs.
Moreover i see that 9800 are reported to crash with the driver ? Is this still true ?
Jerome Glisse
------------------------------------------------------- This SF.Net email is sponsored by Oracle Space Sweepstakes Want to be the first software developer in space? Enter now for the Oracle Space Sweepstakes! http://ads.osdn.com/?ad_idt12&alloc_id344&op=click -- _______________________________________________ Dri-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/dri-devel
Gemeinsame Unterverzeichnisse: r300.old/CVS und r300/CVS.
diff -Nu r300.old/Makefile r300/Makefile
--- r300.old/Makefile 2005-04-24 19:08:31.000000000 +1000
+++ r300/Makefile 2005-05-19 03:24:11.000000000 +1000
@@ -37,6 +37,7 @@
r300_texstate.c \
r300_texprog.c \
r300_vertexprog.c \
+ r300_fragprog.c \
r300_shader.c \
r300_maos.c
# \
diff -Nu r300.old/r300_context.c r300/r300_context.c
--- r300.old/r300_context.c 2005-05-19 23:50:38.000000000 +1000
+++ r300/r300_context.c 2005-05-19 23:47:41.000000000 +1000
@@ -80,7 +80,9 @@
"GL_ARB_texture_mirrored_repeat",
"GL_ARB_vertex_buffer_object",
"GL_ARB_vertex_program",
- //"GL_ARB_fragment_program",
+#if USE_ARB_F_P == 1
+ "GL_ARB_fragment_program",
+#endif
"GL_EXT_blend_equation_separate",
"GL_EXT_blend_func_separate",
"GL_EXT_blend_minmax",
@@ -325,7 +327,19 @@
ctx->Const.MaxVertexProgramLocalParams=256; // r420
ctx->Const.MaxVertexProgramEnvParams=256; // r420
ctx->Const.MaxVertexProgramAddressRegs=1;
-
+
+#if USE_ARB_F_P
+ ctx->Const.MaxFragmentProgramTemps = PFS_NUM_TEMP_REGS;
+ ctx->Const.MaxFragmentProgramAttribs = 11; /* copy i915... */
+ ctx->Const.MaxFragmentProgramLocalParams = PFS_NUM_CONST_REGS;
+ ctx->Const.MaxFragmentProgramEnvParams = PFS_NUM_CONST_REGS;
+ ctx->Const.MaxFragmentProgramAluInstructions = PFS_MAX_ALU_INST;
+ ctx->Const.MaxFragmentProgramTexInstructions = PFS_MAX_TEX_INST;
+ ctx->Const.MaxFragmentProgramInstructions =
PFS_MAX_ALU_INST+PFS_MAX_TEX_INST;
+ ctx->Const.MaxFragmentProgramAddressRegs = 0; /* and these are?? */
+ ctx->_MaintainTexEnvProgram = GL_TRUE;
+#endif
+
driInitExtensions(ctx, card_extensions, GL_TRUE);
radeonInitSpanFuncs(ctx);
diff -Nu r300.old/r300_context.h r300/r300_context.h
--- r300.old/r300_context.h 2005-05-19 23:50:38.000000000 +1000
+++ r300/r300_context.h 2005-05-19 23:47:41.000000000 +1000
@@ -47,6 +47,8 @@
#include "colormac.h"
#include "radeon_context.h"
+#define USE_ARB_F_P 1
+
struct r300_context;
typedef struct r300_context r300ContextRec;
typedef struct r300_context *r300ContextPtr;
@@ -83,7 +85,11 @@
and pixel_shader structure later on */
#define CARD32 GLuint
#include "vertex_shader.h"
+#if USE_ARB_F_P == 1
+#include "r300_fragprog.h"
+#else
#include "pixel_shader.h"
+#endif
#undef CARD32
static __inline__ uint32_t r300PackFloat32(float fl)
@@ -588,6 +594,69 @@
int tex_regs[8];
};
+#if USE_ARB_F_P == 1
+#define PFS_MAX_ALU_INST 64
+#define PFS_MAX_TEX_INST 64
+#define PFS_MAX_TEX_INDIRECT 4
+#define PFS_NUM_TEMP_REGS 32
+#define PFS_NUM_CONST_REGS 32
+struct r300_fragment_program {
+ struct fragment_program mesa_program;
+
+ GLcontext *ctx;
+ GLboolean translated;
+ GLboolean error;
+
+ struct {
+ int length;
+ GLuint inst[PFS_MAX_TEX_INST];
+ } tex;
+
+ struct {
+ int length;
+ struct {
+ GLuint inst0;
+ GLuint inst1;
+ GLuint inst2;
+ GLuint inst3;
+ } inst[PFS_MAX_ALU_INST];
+ } alu;
+ int v_pos;
+ int s_pos;
+
+ struct {
+ int tex_offset;
+ int tex_end;
+ int alu_offset;
+ int alu_end;
+ } node[4];
+ int cur_node;
+ int active_nodes;
+ int first_node_has_tex;
+
+ int alu_offset;
+ int alu_end;
+ int tex_offset;
+ int tex_end;
+
+ struct {
+ float x;
+ float y;
+ float z;
+ float w;
+ } param[32];
+ int param_length;
+
+ GLuint temps[PFS_NUM_TEMP_REGS];
+ int temp_flag;
+ GLuint used_in_node;
+ GLuint inputs[32]; /* probably don't need 32... */
+
+ int hwreg_flag;
+ int hwreg_used;
+ int max_temp_idx;
+};
+#else
/* 64 appears to be the maximum */
#define PSF_MAX_PROGRAM_LENGTH 64
@@ -652,6 +721,7 @@
float w;
} param[MAX_PIXEL_SHADER_PARAMS];
};
+#endif // USE_ARB_F_P
/* 8 is somewhat bogus... it is probably something like 24 */
#define R300_MAX_AOS_ARRAYS 8
@@ -682,8 +752,9 @@
struct r300_texture_state texture;
struct r300_vap_reg_state vap_reg;
struct r300_vertex_shader_state vertex_shader;
+#if USE_ARB_F_P == 0
struct r300_pixel_shader_state pixel_shader;
-
+#endif
struct r300_dma_region aos[R300_MAX_AOS_ARRAYS];
int aos_count;
diff -Nu r300.old/r300_fragprog.c r300/r300_fragprog.c
--- r300.old/r300_fragprog.c 1970-01-01 11:00:00.000000000 +1100
+++ r300/r300_fragprog.c 2005-05-19 04:23:09.000000000 +1000
@@ -0,0 +1,885 @@
+
+/*TODO'S
+ *
+ * - Negate on individual components (implement with swizzle code?)
+ * - Implement remaining arb_f_p opcodes
+ * - Reuse input/temp regs, if they're no longer needed
+ * - If a constant 0.0/1.0/0.5 is emitted, replace with r300 native types
rather
+ * than consuming a param reg.
+ */
+
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+
+#include "program.h"
+#include "nvfragprog.h"
+#include "r300_context.h"
+#if USE_ARB_F_P == 1
+#include "r300_fragprog.h"
+#include "r300_reg.h"
+
+#define PFS_INVAL 0xFFFFFFFF
+
+static void dump_program(struct r300_fragment_program *rp);
+static void emit_arith(struct r300_fragment_program *rp, int op,
+ pfs_reg_t dest, int mask,
+ pfs_reg_t src0, pfs_reg_t src1, pfs_reg_t src2,
+ int flags);
+
+/***************************************
+ * begin: useful data structions for fragment program generation
+ ***************************************/
+
+/* description of r300 native hw instructions */
+const struct {
+ const char *name;
+ int argc;
+ int v_op;
+ int s_op;
+} r300_fpop[] = {
+ { "MAD", 3, R300_FPI0_OUTC_MAD, R300_FPI2_OUTA_MAD },
+ { "DP3", 2, R300_FPI0_OUTC_DP3, PFS_INVAL },
+ { "DP4", 2, R300_FPI0_OUTC_DP4, R300_FPI2_OUTA_DP4 },
+ { "MIN", 2, R300_FPI0_OUTC_MIN, R300_FPI2_OUTA_MIN },
+ { "MAX", 2, R300_FPI0_OUTC_MAX, R300_FPI2_OUTA_MAX },
+ { "CMP", 3, R300_FPI0_OUTC_CMP, R300_FPI2_OUTA_CMP },
+ { "FRC", 1, R300_FPI0_OUTC_FRC, R300_FPI2_OUTA_FRC },
+/* should the vector insns below be REPL_ALPHA? */
+ { "EX2", 1, PFS_INVAL, R300_FPI2_OUTA_EX2 },
+ { "LG2", 1, PFS_INVAL, R300_FPI2_OUTA_LG2 },
+ { "RCP", 1, PFS_INVAL, R300_FPI2_OUTA_RCP },
+ { "RSQ", 1, PFS_INVAL, R300_FPI2_OUTA_RSQ },
+};
+
+#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, \
+
SWIZZLE_##y, \
+
SWIZZLE_##z, \
+
SWIZZLE_ZERO))
+
+/* vector swizzles r300 can support natively, with a couple of
+ * cases we handle specially
+ *
+ * pfs_reg_t.v_swz/pfs_reg_t.s_swz is an index into this table
+ **/
+static const struct r300_pfv_swizzle {
+ const char *name;
+ GLuint hash; /* swizzle value this matches */
+ GLboolean native;
+ GLuint base; /* base value for hw swizzle */
+ GLuint stride; /* difference in base between arg0/1/2 */
+ GLboolean dep_sca;
+} v_swiz[] = {
+/* native swizzles */
+ { "xyz", MAKE_SWZ3(X, Y, Z), GL_TRUE, R300_FPI0_ARGC_SRC0C_XYZ, 4,
GL_FALSE },
+ { "xxx", MAKE_SWZ3(X, X, X), GL_TRUE, R300_FPI0_ARGC_SRC0C_XXX, 4,
GL_FALSE },
+ { "yyy", MAKE_SWZ3(Y, Y, Y), GL_TRUE, R300_FPI0_ARGC_SRC0C_YYY, 4,
GL_FALSE },
+ { "zzz", MAKE_SWZ3(Z, Z, Z), GL_TRUE, R300_FPI0_ARGC_SRC0C_ZZZ, 4,
GL_FALSE },
+ { "yzx", MAKE_SWZ3(Y, Z, X), GL_TRUE, R300_FPI0_ARGC_SRC0C_YZX, 1,
GL_FALSE },
+ { "zxy", MAKE_SWZ3(Z, X, Y), GL_TRUE, R300_FPI0_ARGC_SRC0C_ZXY, 1,
GL_FALSE },
+ { "wzy", MAKE_SWZ3(W, Z, Y), GL_TRUE, R300_FPI0_ARGC_SRC0CA_WZY, 1,
GL_TRUE },
+/* special cases */
+ { NULL, MAKE_SWZ3(W, W, W), GL_FALSE, 0, 0, GL_FALSE},
+ { NULL, MAKE_SWZ3(ONE, ONE, ONE), GL_FALSE, R300_FPI0_ARGC_ONE, 0,
GL_FALSE},
+ { NULL, MAKE_SWZ3(ZERO, ZERO, ZERO), GL_FALSE, R300_FPI0_ARGC_ZERO, 0,
GL_FALSE},
+ { NULL, PFS_INVAL, GL_FALSE, 0, 0, 0 },
+};
+
+
+#define SWZ_X_MASK (7 << 0)
+#define SWZ_Y_MASK (7 << 3)
+#define SWZ_Z_MASK (7 << 6)
+#define SWZ_W_MASK (7 << 9)
+/* used during matching of non-native swizzles */
+static const struct {
+ GLuint hash; /* used to mask matching swizzle components */
+ int mask; /* actual outmask */
+ int count; /* count of components matched */
+} s_mask[] = {
+ { SWZ_X_MASK|SWZ_Y_MASK|SWZ_Z_MASK, 1|2|4, 3},
+ { SWZ_X_MASK|SWZ_Y_MASK, 1|2, 2},
+ { SWZ_X_MASK|SWZ_Z_MASK, 1|4, 2},
+ { SWZ_Y_MASK|SWZ_Z_MASK, 2|4, 2},
+ { SWZ_X_MASK, 1, 1},
+ { SWZ_Y_MASK, 2, 1},
+ { SWZ_Z_MASK, 4, 1},
+ { PFS_INVAL, PFS_INVAL, PFS_INVAL}
+};
+
+/* mapping from SWIZZLE_* to r300 native values for scalar insns */
+static const struct {
+ const char *name;
+ int base; /* hw value of swizzle */
+ int stride; /* difference between SRC0/1/2 */
+ GLboolean dep_vec;
+} s_swiz[] = {
+ { "x", R300_FPI2_ARGA_SRC0C_X, 1, GL_TRUE },
+ { "y", R300_FPI2_ARGA_SRC0C_Y, 1, GL_TRUE },
+ { "z", R300_FPI2_ARGA_SRC0C_Z, 1, GL_TRUE },
+ { "w", R300_FPI2_ARGA_SRC0A , 1, GL_FALSE },
+ { "0", R300_FPI2_ARGA_ZERO , 0, GL_FALSE },
+ { "1", R300_FPI2_ARGA_ONE , 0, GL_FALSE },
+};
+
+/* boiler-plate reg, for convenience */
+const pfs_reg_t pfs_default_reg = {
+ type: REG_TYPE_TEMP,
+ index: 0,
+ v_swz: 0 /* matches XYZ in table */,
+ s_swz: SWIZZLE_W,
+ vcross: 0,
+ scross: 0,
+ negate: 0,
+ has_w: GL_FALSE,
+ valid: GL_FALSE
+};
+
+/* constant zero source */
+const pfs_reg_t pfs_one = {
+ type: REG_TYPE_CONST,
+ index: 0,
+ v_swz: 8 /* matches constant one in table */,
+ s_swz: SWIZZLE_ONE,
+ valid: GL_TRUE
+};
+
+/* constant one source */
+const pfs_reg_t pfs_zero = {
+ type: REG_TYPE_CONST,
+ index: 0,
+ v_swz: 9 /* matches constant zero in table */,
+ s_swz: SWIZZLE_ZERO,
+ valid: GL_TRUE
+};
+
+/***************************************
+ * end: data structures
+ ***************************************/
+
+#define ERROR(fmt, args...) do { \
+ fprintf(stderr, "%s::%s(): " fmt "\n", __FILE__, __func__,
##args); \
+ rp->error = GL_TRUE; \
+} while(0)
+
+static int get_hw_temp(struct r300_fragment_program *rp)
+{
+ int r = ffs(~rp->hwreg_flag);
+ if (!r) {
+ ERROR("Out of hardware temps\n");
+ return 0;
+ }
+ r--;
+
+ rp->hwreg_flag |= (1 << r);
+ rp->hwreg_used |= (1 << r);
+
+ return r;
+}
+
+static pfs_reg_t emit_const4fv(struct r300_fragment_program *rp, GLfloat *cp)
+{
+ pfs_reg_t r = pfs_default_reg;
+
+ r.index = rp->param_length++;
+ r.type = REG_TYPE_CONST;
+ r.valid = GL_TRUE;
+
+ rp->param[r.index].x = cp[0];
+ rp->param[r.index].y = cp[1];
+ rp->param[r.index].z = cp[2];
+ rp->param[r.index].w = cp[3];
+
+ return r;
+}
+
+static pfs_reg_t get_temp_reg(struct r300_fragment_program *rp)
+{
+ pfs_reg_t r = pfs_default_reg;
+
+ r.index = ffs(~rp->temp_flag);
+ if (!r.index) {
+ ERROR("Out of program temps\n");
+ return r;
+ }
+ r.index--;
+ rp->temp_flag |= (1 << r.index);
+
+ rp->temps[r.index] = get_hw_temp(rp);
+ r.valid = GL_TRUE;
+ return r;
+}
+
+static pfs_reg_t negate(pfs_reg_t r)
+{
+ r.negate = 1;
+ return r;
+}
+
+static pfs_reg_t swizzle_const(struct r300_fragment_program *rp,
+ pfs_reg_t src, GLuint arbswz)
+{
+ GLfloat oc[4], nc[4];
+ int i;
+
+ oc[0] = rp->param[src.index].x;
+ oc[1] = rp->param[src.index].y;
+ oc[2] = rp->param[src.index].z;
+ oc[3] = rp->param[src.index].w;
+
+ for (i=0;i<4;i++) {
+ int c = GET_SWZ(arbswz, i);
+ switch(c) {
+ case SWIZZLE_ONE:
+ nc[i] = 1.0;
+ break;
+ case SWIZZLE_ZERO:
+ nc[i] = 0.0;
+ break;
+ default:
+ nc[i] = oc[c];
+ break;
+ }
+ }
+
+ return emit_const4fv(rp, oc);
+}
+
+static int swz_native(struct r300_fragment_program *rp,
+ pfs_reg_t src, pfs_reg_t *r)
+{
+ r->index = src.index;
+ r->type = src.type;
+ r->valid = GL_TRUE;
+ r->has_w = GL_TRUE;
+ return 3;
+}
+
+static int swz_emit_partial(struct r300_fragment_program *rp,
+ pfs_reg_t src, pfs_reg_t *r, int mask)
+{
+ pfs_reg_t ssrc;
+
+ if (!r->valid)
+ r->index = get_hw_temp(rp);
+ r->valid = GL_TRUE;
+
+ emit_arith(rp, PFS_OP_MAD, *r, s_mask[mask].mask, ssrc, pfs_one,
pfs_zero, 0);
+
+ return s_mask[mask].count;
+}
+
+static int swz_special_case(struct r300_fragment_program *rp,
+ pfs_reg_t src, pfs_reg_t *r, int mask)
+{
+ pfs_reg_t ssrc;
+ int cnt = s_mask[mask].count;
+
+ switch (GET_SWZ(v_swiz[r->v_swz].hash, 0)) {
+ case SWIZZLE_W:
+ ssrc = get_temp_reg(rp);
+ src.v_swz = 6; /* wzy */
+ if (cnt == 3 && src.index != REG_TYPE_CONST) {
+ emit_arith(rp, PFS_OP_MAD, ssrc, WRITEMASK_XYZW, src,
pfs_one, pfs_zero, 0);
+ r->index = ssrc.index;
+ r->v_swz = 1; /* xxx */
+ r->has_w = GL_TRUE;
+ r->valid = GL_TRUE;
+ } else {
+ emit_arith(rp, PFS_OP_MAD, ssrc, WRITEMASK_XYZ, src,
pfs_one, pfs_zero, 0);
+ if (!r->valid)
+ r->index = get_hw_temp(rp);
+ r->valid = GL_TRUE;
+ ssrc.v_swz = 1; /* xxx */
+ emit_arith(rp, PFS_OP_MAD, *r, s_mask[mask].mask, ssrc,
pfs_one, pfs_zero, 0);
+ }
+ break;
+ case SWIZZLE_ONE:
+ case SWIZZLE_ZERO:
+ default:
+ ERROR("unknown/unimplemented swizzle\n");
+ return 0;
+ }
+
+ return cnt;
+}
+
+static pfs_reg_t swizzle(struct r300_fragment_program *rp,
+ pfs_reg_t src,
+ GLuint arbswz)
+{
+ pfs_reg_t r = pfs_default_reg;
+
+ int c_mask = 0;
+ int v_matched = 0;
+ int w_swizzle = GET_SWZ(arbswz, 3);
+
+ /* Is this the best way? Or should we just do this as we do
+ * for temps..
+ */
+ if (src.type == REG_TYPE_CONST)
+ return swizzle_const(rp, src, arbswz);
+
+ do {
+ do {
+#define CUR_HASH (v_swiz[r.v_swz].hash & s_mask[c_mask].hash)
+ if (CUR_HASH == (arbswz & s_mask[c_mask].hash)) {
+ if (v_swiz[r.v_swz].native == GL_FALSE)
+ v_matched += swz_special_case(rp, src,
&r, c_mask);
+ else if (s_mask[c_mask].count == 3)
+ v_matched += swz_native(rp, src, &r);
+ else
+ v_matched += swz_emit_partial(rp, src,
&r, c_mask);
+
+ if (v_matched == 3) {
+ if (!r.has_w) { /* only if we need to
copy w */
+ src.s_swz = w_swizzle;
+ r.valid = GL_TRUE;
+ r.s_swz = SWIZZLE_W;
+ emit_arith(rp, PFS_OP_MAD, r,
WRITEMASK_W, src, pfs_one, pfs_zero, 0);
+ } else
+ r.s_swz = w_swizzle;
+
+ if (r.v_swz == 6 /* WZY */)
+ r.vcross = GL_TRUE;
+ if (r.s_swz >= SWIZZLE_X && r.s_swz <=
SWIZZLE_Z)
+ r.scross = GL_TRUE;
+ return r;
+ }
+
+ arbswz &= ~s_mask[c_mask].hash;
+ }
+ } while(v_swiz[++r.v_swz].hash != PFS_INVAL);
+ } while (s_mask[++c_mask].hash != PFS_INVAL);
+
+ ERROR("should NEVER get here\n");
+ return r;
+}
+
+static inline pfs_reg_t t_src(struct r300_fragment_program *rp,
+ struct fp_src_register fpsrc) {
+ pfs_reg_t r = pfs_default_reg;
+
+ switch (fpsrc.File) {
+ case PROGRAM_TEMPORARY:
+ r.index = fpsrc.Index;
+ break;
+ case PROGRAM_INPUT:
+ r.index = fpsrc.Index;
+ r.type = REG_TYPE_INPUT;
+ break;
+ case PROGRAM_LOCAL_PARAM:
+ r = emit_const4fv(rp,
rp->mesa_program.Base.LocalParams[fpsrc.Index]);
+ break;
+ case PROGRAM_ENV_PARAM:
+ r = emit_const4fv(rp,
rp->ctx->FragmentProgram.Parameters[fpsrc.Index]);
+ break;
+ case PROGRAM_STATE_VAR:
+ case PROGRAM_NAMED_PARAM:
+ r = emit_const4fv(rp,
rp->mesa_program.Parameters->ParameterValues[fpsrc.Index]);
+ break;
+ default:
+ ERROR("unknown SrcReg->File %x\n", fpsrc.File);
+ return r;
+ }
+
+ r.valid = GL_TRUE;
+ r = swizzle(rp, r, fpsrc.Swizzle);
+
+ if (fpsrc.NegateBase)
+ r.negate = GL_TRUE;
+
+ return r;
+}
+
+static inline pfs_reg_t t_dst(struct r300_fragment_program *rp,
+ struct fp_dst_register dest) {
+ pfs_reg_t r = pfs_default_reg;
+
+ switch (dest.File) {
+ case PROGRAM_TEMPORARY:
+ r.index = dest.Index;
+ r.valid = GL_TRUE;
+ return r;
+ case PROGRAM_OUTPUT:
+ r.type = REG_TYPE_OUTPUT;
+ switch (dest.Index) {
+ case 0:
+ r.valid = GL_TRUE;
+ return r;
+ case 1:
+ ERROR("I don't know how to write depth!\n");
+ return r;
+ default:
+ ERROR("Bad DstReg->Index 0x%x\n", dest.Index);
+ return r;
+ }
+ default:
+ ERROR("Bad DstReg->File 0x%x\n", dest.File);
+ return r;
+ }
+}
+
+static void emit_tex(struct r300_fragment_program *rp,
+ struct fp_instruction *fpi,
+ int opcode)
+{
+ pfs_reg_t coord = t_src(rp, fpi->SrcReg[0]);
+ pfs_reg_t dest = t_dst(rp, fpi->DstReg);
+ int unit = fpi->TexSrcUnit;
+ int hwsrc, hwdest, flags = 0;
+
+ if ((coord.type == REG_TYPE_TEMP && (rp->used_in_node & (1 <<
coord.index))) ||
+ (dest.type == REG_TYPE_TEMP && (rp->used_in_node & (1 <<
dest.index)))) {
+
+ if (rp->cur_node == 4) {
+ ERROR("too many levels of texture indirection\n");
+ return;
+ }
+ rp->cur_node++;
+
+ rp->node[rp->cur_node].tex_offset = rp->tex.length;
+ rp->node[rp->cur_node].alu_offset = rp->alu.length;
+ rp->node[rp->cur_node].tex_end = -1;
+ rp->node[rp->cur_node].alu_end = -1;
+ rp->active_nodes++;
+
+ rp->v_pos = rp->alu.length;
+ rp->s_pos = rp->alu.length;
+ rp->used_in_node = 0;
+ }
+
+ if (coord.type == REG_TYPE_TEMP) {
+ hwsrc = rp->temps[coord.index];
+ } else if (coord.type == REG_TYPE_CONST) {
+ hwsrc = coord.index;
+ flags = R300_FPITX_SRC_CONST;
+ } else {
+ hwsrc = rp->inputs[coord.index];
+ }
+
+ hwdest = rp->temps[dest.index];
+
+ if (rp->cur_node == 0) rp->first_node_has_tex = 1;
+
+ rp->tex.inst[rp->tex.length++] = 0
+ | (hwsrc << R300_FPITX_SRC_SHIFT)
+ | (hwdest << R300_FPITX_DST_SHIFT)
+ | (unit << R300_FPITX_IMAGE_SHIFT)
+ | (opcode << R300_FPITX_OPCODE_SHIFT) /* not entirely sure about this
*/
+ | flags;
+
+ rp->node[rp->cur_node].tex_end++;
+}
+
+static void emit_arith(struct r300_fragment_program *rp, int op,
+ pfs_reg_t dest, int mask,
+ pfs_reg_t src0, pfs_reg_t src1, pfs_reg_t src2,
+ int flags)
+{
+ pfs_reg_t src[3] = { src0, src1, src2 };
+ int hwdest, hwsrc[3];
+ int argc;
+ int v_idx = rp->v_pos, s_idx = rp->s_pos;
+ GLuint inst[4] = { 0, 0, 0, 0 };
+ int i;
+
+ if (!dest.valid || !src0.valid || !src1.valid || !src2.valid) {
+ ERROR("invalid register. dest/src0/src1/src2 valid =
%d/%d/%d/%d\n",
+ dest.valid, src0.valid,
src1.valid, src2.valid);
+ return;
+ }
+
+ /* check opcode */
+ if (op > MAX_PFS_OP) {
+ ERROR("unknown opcode!\n");
+ return;
+ }
+ argc = r300_fpop[op].argc;
+
+ /* grab hwregs of sources */
+ for (i=0;i<argc;i++) {
+ switch (src[i].type) {
+ case REG_TYPE_INPUT:
+ hwsrc[i] = rp->inputs[src[i].index];
+ break;
+ case REG_TYPE_TEMP:
+ /* make sure insn ordering is right... */
+ if (src[i].vcross && v_idx < s_idx)
+ v_idx = s_idx;
+ if (src[i].scross && s_idx < v_idx)
+ s_idx = v_idx;
+
+ hwsrc[i] = rp->temps[src[i].index];
+ rp->used_in_node |= (1 << src[i].index);
+ break;
+ case REG_TYPE_CONST:
+ hwsrc[i] = src[i].index;
+ break;
+ default:
+ ERROR("invalid source reg\n");
+ return;
+ }
+ }
+
+ /* grab hwregs of dest */
+ switch (dest.type) {
+ case REG_TYPE_TEMP:
+ hwdest = rp->temps[dest.index];
+ rp->used_in_node |= (1 << src[i].index);
+ break;
+ case REG_TYPE_OUTPUT:
+ hwdest = 0;
+ break;
+ default:
+ ERROR("invalid dest reg type %d\n", dest.type);
+ return;
+ }
+
+ for (i=0;i<3;i++) {
+ if (i < argc) {
+ inst[0] |= (v_swiz[src[i].v_swz].base + (i *
v_swiz[src[i].v_swz].stride)) << (i * 7);
+ inst[2] |= (s_swiz[src[i].s_swz].base + (i *
s_swiz[src[i].s_swz].stride)) << (i * 7);
+ if (src[i].negate) {
+ inst[0] |= (1<<5) << (i*7);
+ inst[2] |= (1<<5) << (i*7);
+ }
+ inst[1] |= hwsrc[i] << (i*6);
+ inst[3] |= hwsrc[i] << (i*6);
+ if (src[i].type == REG_TYPE_CONST) {
+ inst[1] |= (1<<5) << (i*6);
+ inst[3] |= (1<<5) << (i*6);
+ }
+ } else {
+ /* read constant zero, may aswell use a ZERO swizzle
aswell.. */
+ inst[0] |= R300_FPI0_ARGC_ZERO << (i*7);
+ inst[2] |= R300_FPI2_ARGA_ZERO << (i*7);
+ inst[1] |= (1<<5) << (i*6);
+ inst[2] |= (1<<5) << (i*6);
+ }
+ }
+ if (mask & 7) {
+ rp->alu.inst[v_idx].inst0 = inst[0] | r300_fpop[op].v_op |
flags;
+ rp->alu.inst[v_idx].inst1 = inst[1] |
+ (hwdest << R300_FPI1_DSTC_SHIFT) |
+ ((mask & WRITEMASK_XYZ) << (dest.type ==
REG_TYPE_OUTPUT ? 26 : 23));
+ rp->v_pos = v_idx + 1;
+ }
+ if (mask & 8) {
+ rp->alu.inst[s_idx].inst2 = inst[2] | r300_fpop[op].s_op |
flags;
+ rp->alu.inst[s_idx].inst3 = inst[3] |
+ (hwdest << R300_FPI3_DSTA_SHIFT) |
+ (1 << (dest.type == REG_TYPE_OUTPUT ? 24 : 23));
+ rp->s_pos = s_idx + 1;
+ }
+
+ i = rp->v_pos > rp->s_pos ? rp->v_pos : rp->s_pos;
+ if (i > rp->alu.length) {
+ rp->alu.length++;
+ rp->node[rp->cur_node].alu_end++;
+ }
+
+ return;
+};
+
+static GLboolean parse_program(struct r300_fragment_program *rp)
+{
+ struct fragment_program *mp = &rp->mesa_program;
+ const struct fp_instruction *inst = mp->Instructions;
+ struct fp_instruction *fpi;
+ pfs_reg_t src0, src1, src2, temp;
+ int flags;
+
+ if (!inst)
+ return GL_FALSE;
+
+ if (inst[0].Opcode == FP_OPCODE_END) {
+ ERROR("empty program?\n");
+ return GL_FALSE;
+ }
+
+ for (fpi=mp->Instructions; fpi->Opcode != FP_OPCODE_END; fpi++) {
+ flags = 0;
+ if (inst->Saturate) flags = R300_FPI0_OUTC_SAT; /* same for
OUTA */
+
+ switch (fpi->Opcode) {
+ case FP_OPCODE_ABS:
+ ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
+ break;
+ case FP_OPCODE_ADD:
+ emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg),
fpi->DstReg.WriteMask,
+ t_src(rp,
fpi->SrcReg[0]),
+ pfs_one,
+ t_src(rp,
fpi->SrcReg[1]),
+ flags);
+ break;
+ case FP_OPCODE_CMP:
+ case FP_OPCODE_COS:
+ ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
+ break;
+ case FP_OPCODE_DP3:
+ emit_arith(rp, PFS_OP_DP3, t_dst(rp, fpi->DstReg),
+ (fpi->DstReg.WriteMask
& WRITEMASK_XYZ),
+ t_src(rp,
fpi->SrcReg[0]),
+ t_src(rp,
fpi->SrcReg[1]),
+ pfs_zero,
+ flags);
+ break;
+ case FP_OPCODE_DP4:
+ case FP_OPCODE_DPH:
+ case FP_OPCODE_DST:
+ case FP_OPCODE_EX2:
+ case FP_OPCODE_FLR:
+ case FP_OPCODE_FRC:
+ case FP_OPCODE_KIL:
+ case FP_OPCODE_LG2:
+ case FP_OPCODE_LIT:
+ ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
+ break;
+ case FP_OPCODE_LRP:
+ /* TODO: use the special LRP form if possible */
+ src0 = t_src(rp, fpi->SrcReg[0]);
+ src1 = t_src(rp, fpi->SrcReg[1]);
+ src2 = t_src(rp, fpi->SrcReg[2]);
+ // result = tmp0tmp1 + (1 - tmp0)tmp2
+ // = tmp0tmp1 + tmp2 + (-tmp0)tmp2
+ // MAD temp, -tmp0, tmp2, tmp2
+ // MAD result, tmp0, tmp1, temp
+ temp = get_temp_reg(rp);
+ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_XYZW,
+ negate(src0), src2,
src2, 0);
+ emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg),
fpi->DstReg.WriteMask,
+ src0, src1, temp,
flags);
+ break;
+ case FP_OPCODE_MAD:
+ emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg),
fpi->DstReg.WriteMask,
+ t_src(rp,
fpi->SrcReg[0]),
+ t_src(rp,
fpi->SrcReg[1]),
+ t_src(rp,
fpi->SrcReg[2]),
+ flags);
+ break;
+ case FP_OPCODE_MAX:
+ case FP_OPCODE_MIN:
+ ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
+ break;
+ case FP_OPCODE_MOV:
+ emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg),
fpi->DstReg.WriteMask,
+ t_src(rp,
fpi->SrcReg[0]), pfs_one, pfs_zero,
+ flags);
+ break;
+ case FP_OPCODE_MUL:
+ emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg),
fpi->DstReg.WriteMask,
+ t_src(rp,
fpi->SrcReg[0]),
+ t_src(rp,
fpi->SrcReg[1]),
+ pfs_zero,
+ flags);
+ break;
+ case FP_OPCODE_POW:
+ case FP_OPCODE_RCP:
+ case FP_OPCODE_RSQ:
+ case FP_OPCODE_SCS:
+ case FP_OPCODE_SGE:
+ case FP_OPCODE_SIN:
+ case FP_OPCODE_SLT:
+ ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
+ break;
+ case FP_OPCODE_SUB:
+ emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg),
fpi->DstReg.WriteMask,
+ t_src(rp,
fpi->SrcReg[0]),
+ pfs_one,
+ negate(t_src(rp,
fpi->SrcReg[1])),
+ flags);
+ break;
+ case FP_OPCODE_SWZ:
+ ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
+ break;
+ case FP_OPCODE_TEX:
+ emit_tex(rp, fpi, R300_FPITX_OP_TEX);
+ break;
+ case FP_OPCODE_TXB:
+ emit_tex(rp, fpi, R300_FPITX_OP_TXB);
+ break;
+ case FP_OPCODE_TXP:
+ emit_tex(rp, fpi, R300_FPITX_OP_TXP);
+ break;
+ case FP_OPCODE_XPD:
+ ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
+ break;
+ default:
+ ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
+ break;
+ }
+
+ if (rp->error)
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
+/* - Init structures
+ * - Determine what hwregs each input corresponds to
+ */
+void init_program(struct r300_fragment_program *rp)
+{
+ struct fragment_program *mp = &rp->mesa_program;
+ struct fp_instruction *fpi;
+ GLuint InputsRead = mp->InputsRead;
+ GLuint fp_reg = 0;
+ GLuint temps_used = 0; /* for rp->temps[] */
+ int i;
+
+ rp->translated = GL_FALSE;
+ rp->tex.length = 0;
+ rp->alu.length = 0;
+ rp->v_pos = 0;
+ rp->s_pos = 0;
+ rp->cur_node = 0;
+ rp->node[0].alu_offset = 0;
+ rp->node[0].alu_end = -1;
+ rp->node[0].tex_offset = 0;
+ rp->node[0].tex_end = -1;
+ rp->active_nodes = 1;
+ rp->first_node_has_tex = 0;
+ rp->alu_offset = 0;
+ rp->alu_end = 0;
+ rp->tex_offset = 0;
+ rp->tex_end = 0;
+ rp->param_length = 0;
+ rp->temp_flag = 0;
+ rp->used_in_node = 0;
+ rp->hwreg_used = 0;
+ rp->hwreg_flag = 0;
+ rp->max_temp_idx = 0;
+ rp->error = 0;
+
+ /* Work out what temps the Mesa inputs correspond to, this must match
+ * what setup_rs_unit does, which shouldn't be a problem as rs_unit
+ * configures itself based on the fragprog's InputsRead
+ */
+
+ /* Texcoords come first */
+ for (i=0;i<8;i++) {
+ if (InputsRead & (FRAG_BIT_TEX0 << i)) {
+ rp->hwreg_flag |= (1<<fp_reg);
+ rp->inputs[FRAG_ATTRIB_TEX0+i] = fp_reg++;
+ }
+ }
+ InputsRead &= ~FRAG_BITS_TEX_ANY;
+
+ /* The primary colour */
+ if (InputsRead & FRAG_BIT_COL0) {
+ rp->hwreg_flag |= (1<<fp_reg);
+ rp->inputs[FRAG_ATTRIB_COL0] = fp_reg++;
+ }
+ InputsRead &= ~FRAG_BIT_COL0;
+
+ /* Anything else */
+ if (InputsRead) {
+ WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead);
+ /* force read from hwreg 0 for now */
+ for (i=0;i<32;i++)
+ if (InputsRead & (1<<i)) rp->inputs[i] = 0;
+ }
+
+ rp->hwreg_used = rp->hwreg_flag;
+
+ /* Possibly the worst part of how I went about this... Find out what
+ * temps are used by the mesa program so we don't clobber something
+ * when we need a temp for other reasons.
+ *
+ * Possibly not too bad actually, as we could add to this later and
+ * find out when inputs are last used so we can reuse them as temps.
+ */
+ if (!mp->Instructions) {
+ ERROR("No instructions found in program\n");
+ return;
+ }
+ for (fpi=mp->Instructions;fpi->Opcode != FP_OPCODE_END; fpi++) {
+ for (i=0;i<3;i++) {
+ if (fpi->SrcReg[i].File == PROGRAM_TEMPORARY) {
+ if (!(temps_used & (1 <<
fpi->SrcReg[i].Index))) {
+ temps_used |= (1 <<
fpi->SrcReg[i].Index);
+ rp->temps[fpi->SrcReg[i].Index] =
get_hw_temp(rp);
+ }
+ }
+ }
+ /* needed? surely if a program writes a temp it'll read it
again */
+ if (fpi->DstReg.File == PROGRAM_TEMPORARY) {
+ if (!(temps_used & (1 << fpi->DstReg.Index))) {
+ temps_used |= (1 << fpi->DstReg.Index);
+ rp->temps[fpi->DstReg.Index] = get_hw_temp(rp);
+ }
+ }
+ }
+ rp->temp_flag = temps_used;
+
+ /* Ask Mesa nicely to fill in ParameterValues for us */
+ _mesa_load_state_parameters(rp->ctx, rp->mesa_program.Parameters);
+}
+
+void translate_fragment_shader(struct r300_fragment_program *rp)
+{
+ int i;
+
+ init_program(rp);
+
+ // parse mesa program
+ if (parse_program(rp) == GL_FALSE) {
+ dump_program(rp);
+ return;
+ }
+
+ rp->alu.length = rp->v_pos > rp->s_pos ? rp->v_pos : rp->s_pos;
+
+ rp->alu_offset = 0;
+ rp->alu_end = rp->alu.length - 1;
+ rp->tex_offset = 0;
+ rp->tex_end = rp->tex.length - 1;
+
+ for (i=0;i<32;i++)
+ if (rp->hwreg_used & (1 << i))
+ rp->max_temp_idx = i;
+ rp->translated = GL_TRUE;
+
+ if (0) dump_program(rp);
+}
+
+/* just some random things... */
+static void dump_program(struct r300_fragment_program *rp)
+{
+ int i;
+
+ fprintf(stderr, "Mesa program:\n");
+ fprintf(stderr, "-------------\n");
+ _mesa_debug_fp_inst(rp->mesa_program.NumTexInstructions +
+
rp->mesa_program.NumAluInstructions,
+ rp->mesa_program.Instructions);
+ fflush(stderr); fflush(stdout);
+
+ fprintf(stderr, "Hardware program\n");
+ fprintf(stderr, "----------------\n");
+ for (i=0;i<rp->active_nodes;i++) {
+ fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d,
alu_end: %d, tex_end: %d\n", i,
+ rp->node[i].alu_offset,
+ rp->node[i].tex_offset,
+ rp->node[i].alu_end,
+ rp->node[i].tex_end);
+ }
+
+/* dump program in pretty_print_command_stream.tcl-readable format */
+ fprintf(stderr, "%08x\n", (((rp->alu.length-1) << 16) | (R300_PFS_INSTR0_0
>> 2)));
+ for (i=0;i<rp->alu.length;i++)
+ fprintf(stderr, "%08x\n", rp->alu.inst[i].inst0);
+ fprintf(stderr, "%08x\n", (((rp->alu.length-1) << 16) | (R300_PFS_INSTR1_0
>> 2)));
+ for (i=0;i<rp->alu.length;i++)
+ fprintf(stderr, "%08x\n", rp->alu.inst[i].inst1);
+ fprintf(stderr, "%08x\n", (((rp->alu.length-1) << 16) | (R300_PFS_INSTR2_0
>> 2)));
+ for (i=0;i<rp->alu.length;i++)
+ fprintf(stderr, "%08x\n", rp->alu.inst[i].inst2);
+ fprintf(stderr, "%08x\n", (((rp->alu.length-1) << 16) | (R300_PFS_INSTR3_0
>> 2)));
+ for (i=0;i<rp->alu.length;i++)
+ fprintf(stderr, "%08x\n", rp->alu.inst[i].inst3);
+ fprintf(stderr, "00000000\n");
+
+}
+#endif // USE_ARB_F_P == 1
diff -Nu r300.old/r300_fragprog.h r300/r300_fragprog.h
--- r300.old/r300_fragprog.h 1970-01-01 11:00:00.000000000 +1100
+++ r300/r300_fragprog.h 2005-05-19 03:29:23.000000000 +1000
@@ -0,0 +1,46 @@
+#ifndef __R300_FRAGPROG_H_
+#define __R300_FRAGPROG_H_
+
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+
+#include "program.h"
+#include "r300_context.h"
+#include "nvfragprog.h"
+
+/* representation of a register for emit_arith/swizzle */
+typedef struct _pfs_reg_t {
+ enum {
+ REG_TYPE_INPUT,
+ REG_TYPE_OUTPUT,
+ REG_TYPE_TEMP,
+ REG_TYPE_CONST
+ } type:2;
+ GLuint index:6;
+ GLuint v_swz:5;
+ GLuint s_swz:5;
+ GLboolean vcross:1;
+ GLboolean scross:1;
+ GLuint negate:1; //XXX: we need to handle negate individually
+ GLboolean has_w:1;
+ GLboolean valid:1;
+} pfs_reg_t;
+
+/* supported hw opcodes */
+#define PFS_OP_MAD 0
+#define PFS_OP_DP3 1
+#define PFS_OP_DP4 2
+#define PFS_OP_MIN 3
+#define PFS_OP_MAX 4
+#define PFS_OP_CMP 5
+#define PFS_OP_FRC 6
+#define PFS_OP_EX2 7
+#define PFS_OP_LG2 8
+#define PFS_OP_RCP 9
+#define PFS_OP_RSQ 10
+#define MAX_PFS_OP 10
+#define OP(n) PFS_OP_##n
+
+#endif
+
diff -Nu r300.old/r300_reg.h r300/r300_reg.h
--- r300.old/r300_reg.h 2005-05-19 23:50:38.000000000 +1000
+++ r300/r300_reg.h 2005-05-19 23:47:41.000000000 +1000
@@ -823,6 +823,13 @@
# define R300_FPITX_DST_MASK (31 << 6)
# define R300_FPITX_IMAGE_SHIFT 11
# define R300_FPITX_IMAGE_MASK (15 << 11) /* GUESS based on
layout and native limits */
+/* Unsure if these are opcodes, or some kind of bitfiels, but this is how
+ * they were set when I checked
+ */
+# define R300_FPITX_OPCODE_SHIFT 15
+# define R300_FPITX_OP_TEX 1
+# define R300_FPITX_OP_TXP 3
+# define R300_FPITX_OP_TXB 4
/* ALU
// The ALU instructions register blocks are enumerated according to the order
@@ -997,13 +1004,13 @@
# define R300_FPI2_ARG1A_MASK (31 << 7)
# define R300_FPI2_ARG1A_NEG (1 << 12)
# define R300_FPI2_ARG2A_SHIFT 14
-# define R300_FPI2_AEG2A_MASK (31 << 14)
+# define R300_FPI2_ARG2A_MASK (31 << 14)
# define R300_FPI2_ARG2A_NEG (1 << 19)
# define R300_FPI2_SPECIAL_LRP (1 << 21)
# define R300_FPI2_OUTA_MAD (0 << 23)
# define R300_FPI2_OUTA_DP4 (1 << 23)
-# define R300_RPI2_OUTA_MIN (2 << 23)
-# define R300_RPI2_OUTA_MAX (3 << 23)
+# define R300_FPI2_OUTA_MIN (2 << 23)
+# define R300_FPI2_OUTA_MAX (3 << 23)
# define R300_FPI2_OUTA_CMP (6 << 23)
# define R300_FPI2_OUTA_FRC (7 << 23)
# define R300_FPI2_OUTA_EX2 (8 << 23)
diff -Nu r300.old/r300_shader.c r300/r300_shader.c
--- r300.old/r300_shader.c 2005-05-12 03:28:13.000000000 +1000
+++ r300/r300_shader.c 2005-05-19 04:04:40.000000000 +1000
@@ -5,6 +5,9 @@
#include "program.h"
#include "r300_context.h"
#include "nvvertprog.h"
+#if USE_ARB_F_P == 1
+#include "r300_fragprog.h"
+#endif
static void r300BindProgram(GLcontext *ctx, GLenum target, struct program
*prog)
{
@@ -13,6 +16,9 @@
switch(target){
case GL_VERTEX_PROGRAM_ARB:
+#if USE_ARB_F_P == 1
+ case GL_FRAGMENT_PROGRAM_ARB:
+#endif
//rmesa->current_vp = vp;
break;
default:
@@ -24,7 +30,11 @@
static struct program *r300NewProgram(GLcontext *ctx, GLenum target, GLuint id)
{
struct r300_vertex_program *vp;
+#if USE_ARB_F_P == 1
+ struct r300_fragment_program *fp;
+#else
struct fragment_program *fp;
+#endif
struct ati_fragment_shader *afs;
switch(target){
@@ -33,9 +43,14 @@
return _mesa_init_vertex_program(ctx, &vp->mesa_program,
target, id);
case GL_FRAGMENT_PROGRAM_ARB:
+#if USE_ARB_F_P == 1
+ fp=CALLOC_STRUCT(r300_fragment_program);
+ fp->ctx = ctx;
+ return _mesa_init_fragment_program(ctx, &fp->mesa_program,
target, id);
+#else
fp=CALLOC_STRUCT(fragment_program);
return _mesa_init_fragment_program(ctx, fp, target, id);
-
+#endif
case GL_FRAGMENT_PROGRAM_NV:
fp=CALLOC_STRUCT(fragment_program);
return _mesa_init_fragment_program(ctx, fp, target, id);
@@ -64,15 +79,20 @@
struct program *prog)
{
struct r300_vertex_program *vp=(void *)prog;
-
+#if USE_ARB_F_P == 1
+ struct r300_fragment_program *fp=(void *)prog;
+#endif
+
switch(target) {
case GL_VERTEX_PROGRAM_ARB:
/*vp->translated=GL_FALSE;
translate_vertex_shader(vp);*/
//debug_vp(ctx, vp);
break;
-
case GL_FRAGMENT_PROGRAM_ARB:
+#if USE_ARB_F_P == 1
+ fp->translated = GL_FALSE;
+#endif
break;
}
}
diff -Nu r300.old/r300_state.c r300/r300_state.c
--- r300.old/r300_state.c 2005-05-19 23:50:38.000000000 +1000
+++ r300/r300_state.c 2005-05-19 23:47:41.000000000 +1000
@@ -58,10 +58,14 @@
#include "r300_reg.h"
#include "r300_program.h"
#include "r300_emit.h"
+#if USE_ARB_F_P == 1
+#include "r300_fragprog.h"
+#else
#include "r300_fixed_pipelines.h"
+#include "r300_texprog.h"
+#endif
#include "r300_tex.h"
#include "r300_maos.h"
-#include "r300_texprog.h"
static void r300AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref)
{
@@ -1094,6 +1098,100 @@
fprintf(stderr, "TX_ENABLE: %08x max_texture_unit=%d\n",
r300->hw.txe.cmd[R300_TXE_ENABLE], max_texture_unit);
}
+#if USE_ARB_F_P == 1
+void r300_setup_rs_unit(GLcontext *ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ int i, vp_reg, fp_reg, in_texcoords;
+ /* I'm still unsure if these are needed */
+ GLuint interp_magic[8] = {
+ 0x00,
+ 0x40,
+ 0x80,
+ 0xC0,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00
+ };
+ GLuint OutputsWritten;
+ GLuint InputsRead;
+
+ if(hw_tcl_on)
+ OutputsWritten = CURRENT_VERTEX_SHADER(ctx)->OutputsWritten;
+ else
+ OutputsWritten = r300->state.render_inputs;
+
+ if (ctx->FragmentProgram._Current)
+ InputsRead = ctx->FragmentProgram._Current->InputsRead;
+ else {
+ fprintf(stderr, "No ctx->FragmentProgram._Current!!\n");
+ return; /* This should only ever happen once.. */
+ }
+ /* This needs to be rewritten - it is a hack at best */
+ R300_STATECHANGE(r300, ri);
+ R300_STATECHANGE(r300, rc);
+ R300_STATECHANGE(r300, rr);
+
+ vp_reg = fp_reg = in_texcoords = 0;
+ r300->hw.rr.cmd[R300_RR_ROUTE_0] = 0;
+
+ for (i=0;i<ctx->Const.MaxTextureUnits;i++) {
+ if (OutputsWritten & (hw_tcl_on ? (VERT_BIT_TEX0<<i) :
(_TNL_BIT_TEX0<<i)))
+ in_texcoords++;
+
+ r300->hw.ri.cmd[R300_RI_INTERP_0+i] = 0
+ | R300_RS_INTERP_USED
+ | (vp_reg << R300_RS_INTERP_SRC_SHIFT)
+ | interp_magic[i];
+
+ if (InputsRead & (FRAG_BIT_TEX0<<i)) {
+ assert(r300->state.texture.tc_count != 0);
+ r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] = 0
+ | R300_RS_ROUTE_ENABLE
+ | i /* source INTERP */
+ | (fp_reg << R300_RS_ROUTE_DEST_SHIFT);
+
+ if (OutputsWritten & (hw_tcl_on ? (VERT_BIT_TEX0<<i) :
(_TNL_BIT_TEX0<<i))) {
+ vp_reg++;
+ } else {
+ /* Unsure of how to handle this situation, for
now print errors and
+ * the program will just recieve bogus data
+ */
+ fprintf(stderr, "fragprog wants coords for
tex%d, vp doesn't provide them!\n", i);
+ }
+ InputsRead &= ~(FRAG_BIT_TEX0<<i);
+ fp_reg++;
+ }
+ }
+
+ if (InputsRead & FRAG_BIT_COL0) {
+ if (!(OutputsWritten & (hw_tcl_on ? VERT_BIT_COLOR0 :
_TNL_BIT_COLOR0)))
+ fprintf(stderr, "fragprog wants col0, vp doesn't
provide it\n");
+
+ r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0
+ | R300_RS_ROUTE_0_COLOR
+ | (fp_reg << R300_RS_ROUTE_0_COLOR_DEST_SHIFT);
+ InputsRead &= ~FRAG_BIT_COL0;
+ }
+
+ r300->hw.rc.cmd[1] = 0
+ | (in_texcoords << R300_RS_CNTL_TC_CNT_SHIFT)
+ | R300_RS_CNTL_0_UNKNOWN_7
+ | R300_RS_CNTL_0_UNKNOWN_18;
+
+ if (r300->state.texture.tc_count > 0) {
+ r300->hw.rr.cmd[R300_RR_CMD_0] =
cmducs(R300_RS_ROUTE_0, fp_reg);
+ r300->hw.rc.cmd[2] = 0xC0 | (fp_reg-1); /* index of
highest RS_ROUTE used*/
+ } else {
+ r300->hw.rr.cmd[R300_RR_CMD_0] =
cmducs(R300_RS_ROUTE_0, 1);
+ r300->hw.rc.cmd[2] = 0x0;
+ }
+
+ if (InputsRead)
+ WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n",
InputsRead);
+}
+#else
void r300_setup_rs_unit(GLcontext *ctx)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
@@ -1120,7 +1218,6 @@
R300_STATECHANGE(r300, rc);
R300_STATECHANGE(r300, rr);
-#if 1
cur_reg = 0;
r300->hw.rr.cmd[R300_RR_ROUTE_0] = 0;
@@ -1163,48 +1260,8 @@
// fprintf(stderr, "rendering with %d texture co-ordinate sets\n",
cur_reg);
-
-#else
- for(i = 1; i <= 8; ++i)
- r300->hw.ri.cmd[i] = 0x00d10000;
- r300->hw.ri.cmd[R300_RI_INTERP_1] |= R300_RS_INTERP_1_UNKNOWN;
- r300->hw.ri.cmd[R300_RI_INTERP_2] |= R300_RS_INTERP_2_UNKNOWN;
- r300->hw.ri.cmd[R300_RI_INTERP_3] |= R300_RS_INTERP_3_UNKNOWN;
-
-#if 1
- for(i = 2; i <= 8; ++i)
- r300->hw.ri.cmd[i] |= 4;
-#endif
-
- for(i = 1; i <= 8; ++i)
- r300->hw.rr.cmd[i] = 0;
- /* textures enabled ? */
- if(r300->state.texture.tc_count>0){
-
- /* This code only really works with one set of texture
coordinates */
-
- /* The second constant is needed to get glxgears display
anything .. */
- r300->hw.rc.cmd[1] = R300_RS_CNTL_0_UNKNOWN_7
- | R300_RS_CNTL_0_UNKNOWN_18
- |
(r300->state.texture.tc_count<<R300_RS_CNTL_TC_CNT_SHIFT);
- r300->hw.rc.cmd[2] = 0xc0;
-
-
-
((drm_r300_cmd_header_t*)r300->hw.rr.cmd)->unchecked_state.count = 1;
- r300->hw.rr.cmd[R300_RR_ROUTE_0] = 0x24008;
-
- } else {
-
- /* The second constant is needed to get glxgears display
anything .. */
- r300->hw.rc.cmd[1] = R300_RS_CNTL_0_UNKNOWN_7 |
R300_RS_CNTL_0_UNKNOWN_18;
- r300->hw.rc.cmd[2] = 0;
-
-
((drm_r300_cmd_header_t*)r300->hw.rr.cmd)->unchecked_state.count = 1;
- r300->hw.rr.cmd[R300_RR_ROUTE_0] = 0x4000;
-
- }
-#endif
}
+#endif // USE_ARB_F_P
#define vpucount(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count)
@@ -1477,7 +1534,84 @@
#endif
}
+#if USE_ARB_F_P == 1
+void r300SetupPixelShader(r300ContextPtr rmesa)
+{
+ GLcontext *ctx = rmesa->radeon.glCtx;
+ struct r300_fragment_program *rp = ctx->FragmentProgram._Current;
+ int i,k;
+ if (!rp) /* should only happenen once, just after context is
created */
+ return;
+
+ if (!rp->translated) {
+ translate_fragment_shader(ctx->FragmentProgram._Current);
+ if (!rp->translated) {
+ fprintf(stderr, "%s: No valid fragment shader,
exiting\n", __func__);
+ exit(-1);
+ }
+ }
+
+ R300_STATECHANGE(rmesa, fpt);
+ for(i=0;i<rp->tex.length;i++)
+ rmesa->hw.fpt.cmd[R300_FPT_INSTR_0+i]=rp->tex.inst[i];
+ rmesa->hw.fpt.cmd[R300_FPT_CMD_0]=cmducs(R300_PFS_TEXI_0,
rp->tex.length);
+
+#define OUTPUT_FIELD(st, reg, field) \
+ R300_STATECHANGE(rmesa, st); \
+ for(i=0;i<rp->alu.length;i++) \
+
rmesa->hw.st.cmd[R300_FPI_INSTR_0+i]=rp->alu.inst[i].field;\
+ rmesa->hw.st.cmd[R300_FPI_CMD_0]=cmducs(reg, rp->alu.length);
+
+ OUTPUT_FIELD(fpi[0], R300_PFS_INSTR0_0, inst0);
+ OUTPUT_FIELD(fpi[1], R300_PFS_INSTR1_0, inst1);
+ OUTPUT_FIELD(fpi[2], R300_PFS_INSTR2_0, inst2);
+ OUTPUT_FIELD(fpi[3], R300_PFS_INSTR3_0, inst3);
+#undef OUTPUT_FIELD
+
+ R300_STATECHANGE(rmesa, fp);
+ /* I just want to say, the way these nodes are stored.. weird.. */
+ for (i=0,k=(4-rp->active_nodes);i<4;i++,k++) {
+ if (i<rp->active_nodes) {
+ rmesa->hw.fp.cmd[R300_FP_NODE0+k]=
+ (rp->node[i].alu_offset <<
R300_PFS_NODE_ALU_OFFSET_SHIFT)
+ | (rp->node[i].alu_end <<
R300_PFS_NODE_ALU_END_SHIFT)
+ | (rp->node[i].tex_offset <<
R300_PFS_NODE_TEX_OFFSET_SHIFT)
+ | (rp->node[i].tex_end <<
R300_PFS_NODE_TEX_END_SHIFT)
+ | ( (k==3) ? R300_PFS_NODE_LAST_NODE : 0);
+ } else {
+ rmesa->hw.fp.cmd[R300_FP_NODE0+(3-i)] = 0;
+ }
+ }
+
+ /* PFS_CNTL_0 */
+ rmesa->hw.fp.cmd[R300_FP_CNTL0]=
+ (rp->active_nodes-1)
+ | (rp->first_node_has_tex<<3);
+ /* PFS_CNTL_1 */
+ rmesa->hw.fp.cmd[R300_FP_CNTL1]=rp->max_temp_idx;
+ /* PFS_CNTL_2 */
+ rmesa->hw.fp.cmd[R300_FP_CNTL2]=
+ (rp->alu_offset << R300_PFS_CNTL_ALU_OFFSET_SHIFT)
+ | (rp->alu_end << R300_PFS_CNTL_ALU_END_SHIFT)
+ | (rp->tex_offset << R300_PFS_CNTL_TEX_OFFSET_SHIFT)
+ | (rp->tex_end << R300_PFS_CNTL_TEX_END_SHIFT);
+
+ /* This is /wrong/ The float values from rp->param[] need to be
converted
+ * to a 24-bit float (sign bit, 7 bit exponent, bias 63, 16 bit
mantissa).
+ * I'm unsure of a good way to do this.
+ *
+ * The >>8 is a dirty hack so I can actually see something in ut2004 */
+ R300_STATECHANGE(rmesa, fpp);
+ for(i=0;i<rp->param_length;i++){
+
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0+4*i+0]=(r300PackFloat32(rp->param[i].x)>>8);
+
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0+4*i+1]=(r300PackFloat32(rp->param[i].y)>>8);
+
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0+4*i+2]=(r300PackFloat32(rp->param[i].z)>>8);
+
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0+4*i+3]=(r300PackFloat32(rp->param[i].w)>>8);
+ }
+ rmesa->hw.fpp.cmd[R300_FPP_CMD_0]=cmducs(R300_PFS_PARAM_0_X,
rp->param_length*4);
+}
+#else
/* just a skeleton for now.. */
void r300GenerateTexturePixelShader(r300ContextPtr r300)
{
@@ -1652,6 +1786,7 @@
rmesa->hw.fpp.cmd[R300_FPP_CMD_0]=cmducs(R300_PFS_PARAM_0_X,
rmesa->state.pixel_shader.param_length);
}
+#endif
/**
* Called by Mesa after an internal state update.
@@ -2118,3 +2253,4 @@
functions->PolygonOffset = r300PolygonOffset;
functions->PolygonMode = r300PolygonMode;
}
+
diff -Nu r300.old/r300_texprog.c r300/r300_texprog.c
--- r300.old/r300_texprog.c 2005-05-14 03:44:22.000000000 +1000
+++ r300/r300_texprog.c 2005-05-19 03:44:24.000000000 +1000
@@ -17,6 +17,7 @@
#include "radeon_ioctl.h"
#include "radeon_state.h"
#include "r300_context.h"
+#if USE_ARB_F_P == 0
#include "r300_ioctl.h"
#include "r300_state.h"
#include "r300_reg.h"
@@ -265,4 +266,5 @@
p->alu_end = ps->program.alu.length - 1;
p->alu_offset = 0;
}
+#endif // USE_ARB_F_P == 0
