From 59f55a3c598cfd36318551b5933ca656ef7092ce Mon Sep 17 00:00:00 2001
From: Martin Andersson <g02maran@gmail.com>
Date: Fri, 12 Apr 2013 21:04:17 +0200
Subject: [PATCH] r600g: HACK: Fix nested loops on Cayman

---
 src/gallium/drivers/r600/r600_asm.c    | 14 ++++++-
 src/gallium/drivers/r600/r600_shader.c | 69 ++++++++++++++++++++++++++++------
 src/gallium/drivers/r600/r600d.h       |  5 +++
 3 files changed, 75 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 26a848a..2874adf 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -1985,6 +1985,7 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
 		LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
 			const char *omod_str[] = {"","*2","*4","/2"};
 			const struct alu_op_info *aop = r600_isa_alu(alu->op);
+			bool cm_execmask_op = alu->execute_mask && bc->chip_class == CAYMAN;
 			int o = 0;
 
 			r600_bytecode_alu_nliterals(bc, alu, literal, &nliteral);
@@ -1997,8 +1998,10 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
 					alu->update_pred ? 'P':' ',
 					alu->pred_sel ? alu->pred_sel==2 ? '0':'1':' ');
 
-			o += fprintf(stderr, "%s%s%s ", aop->name,
-					omod_str[alu->omod], alu->dst.clamp ? "_sat":"");
+			o += fprintf(stderr, "%s ", aop->name);
+			if (!cm_execmask_op)
+				o += fprintf(stderr, "%s ", omod_str[alu->omod]);
+			o += fprintf(stderr, "%s ", alu->dst.clamp ? "_sat":"");
 
 			o += print_indent(o,60);
 			o += print_dst(alu);
@@ -2012,6 +2015,13 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
 				o += fprintf(stderr, "  BS:%d", alu->bank_swizzle);
 			}
 
+			if (cm_execmask_op && alu->omod) {
+				static const char* cm_em_op_names[] =
+					{"BREAK", "CONTINUE", "KILL"};
+
+				fprintf(stderr, "  %s", cm_em_op_names[alu->omod - 1]);
+			}
+
 			fprintf(stderr, "\n");
 			id += 2;
 
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index f801707..073d2c9 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -58,6 +58,9 @@ issued in the w slot as well.
 The compiler must issue the source argument to slots z, y, and x
 */
 
+static int hack_if;
+static int hack_endif;
+
 static int r600_shader_from_tgsi(struct r600_screen *rscreen,
 				 struct r600_pipe_shader *pipeshader,
 				 struct r600_shader_key key);
@@ -5490,7 +5493,7 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx)
 	return 0;
 }
 
-static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
+static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode, int op_break)
 {
 	struct r600_bytecode_alu alu;
 	int r;
@@ -5499,6 +5502,8 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
 	alu.op = opcode;
 	alu.execute_mask = 1;
 	alu.update_pred = 1;
+	if (op_break)
+		alu.omod = SQ_ALU_EXECUTE_MASK_OP_BREAK;
 
 	alu.dst.sel = ctx->temp_reg;
 	alu.dst.write = 1;
@@ -5510,7 +5515,10 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
 
 	alu.last = 1;
 
-	r = r600_bytecode_add_alu_type(ctx->bc, &alu, CF_OP_ALU_PUSH_BEFORE);
+	if (op_break)
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
+	else
+		r = r600_bytecode_add_alu_type(ctx->bc, &alu, CF_OP_ALU_PUSH_BEFORE);
 	if (r)
 		return r;
 	return 0;
@@ -5729,10 +5737,16 @@ static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
 #endif
 
 static int tgsi_if(struct r600_shader_ctx *ctx)
-{
-	emit_logic_pred(ctx, ALU_OP2_PRED_SETNE_INT);
-
-	r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP);
+{ 
+	if (hack_if) {
+		hack_if = 0;
+		emit_logic_pred(ctx, ALU_OP2_PRED_SETE_INT, 1);
+		r600_bytecode_add_cfinst(ctx->bc, CF_OP_NOP);
+	}
+	else {
+		emit_logic_pred(ctx, ALU_OP2_PRED_SETNE_INT, 0);
+		r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP);
+	}
 
 	fc_pushlevel(ctx, FC_IF);
 
@@ -5752,7 +5766,15 @@ static int tgsi_else(struct r600_shader_ctx *ctx)
 
 static int tgsi_endif(struct r600_shader_ctx *ctx)
 {
-	pops(ctx, 1);
+	int pop = 0;
+	if (!hack_endif) {
+		pops(ctx, 1);
+		pop = 1;
+	}
+	else {
+		hack_endif = 0;
+	}
+
 	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
 		R600_ERR("if/endif unbalanced in shader\n");
 		return -1;
@@ -5760,7 +5782,7 @@ static int tgsi_endif(struct r600_shader_ctx *ctx)
 
 	if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
 		ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
-		ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
+		ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = pop;
 	} else {
 		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
 	}
@@ -5772,6 +5794,9 @@ static int tgsi_endif(struct r600_shader_ctx *ctx)
 
 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
 {
+	hack_if = 1;
+	hack_endif = 1;
+
 	/* LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not
 	 * limited to 4096 iterations, like the other LOOP_* instructions. */
 	r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_START_DX10);
@@ -5814,7 +5839,7 @@ static int tgsi_endloop(struct r600_shader_ctx *ctx)
 
 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
 {
-	unsigned int fscp;
+/*	unsigned int fscp;
 
 	for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
 	{
@@ -5827,9 +5852,31 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
 		return -EINVAL;
 	}
 
-	r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op);
 
-	fc_set_mid(ctx, fscp);
+	if (ctx->bc->chip_class == CAYMAN) {
+		struct r600_bytecode_alu alu = {};
+		int r;
+
+		alu.op = ALU_OP2_PRED_SETE;
+		alu.src[0].sel = V_SQ_ALU_SRC_0;
+		alu.src[1].sel = V_SQ_ALU_SRC_1;
+
+		if (ctx->inst_info->op == CF_OP_LOOP_BREAK)
+			alu.omod = SQ_ALU_EXECUTE_MASK_OP_BREAK;
+		else
+			alu.omod = SQ_ALU_EXECUTE_MASK_OP_CONTINUE;
+
+		alu.execute_mask = 1;
+		alu.last = 1;
+
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
+		if (r)
+			return r;
+	} else {
+		r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op);
+	}
+
+	fc_set_mid(ctx, fscp);*/
 
 	return 0;
 }
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index 9b31383..679dd81 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -3698,4 +3698,9 @@
 #define DMA_PACKET_CONSTANT_FILL	0xd /* 7xx only */
 #define DMA_PACKET_NOP			0xf
 
+#define SQ_ALU_EXECUTE_MASK_OP_DEACTIVATE    0x0
+#define SQ_ALU_EXECUTE_MASK_OP_BREAK         0x1
+#define SQ_ALU_EXECUTE_MASK_OP_CONTINUE      0x2
+#define SQ_ALU_EXECUTE_MASK_OP_KILL          0x3
+
 #endif
-- 
1.8.2.1

