--- lib/Target/AMDGPU/AMDGPUIntrinsics.td | 2 +- .../AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp | 26 +++- lib/Target/AMDGPU/R600ISelLowering.cpp | 57 +++++++- lib/Target/AMDGPU/R600Instructions.td | 4 +- lib/Target/AMDGPU/R600RegisterInfo.td | 149 +++++++++++++++++++-- lib/Target/AMDGPU/SIInstructions.td | 6 +- 6 files changed, 223 insertions(+), 21 deletions(-)
diff --git a/lib/Target/AMDGPU/AMDGPUIntrinsics.td b/lib/Target/AMDGPU/AMDGPUIntrinsics.td index 2ba2d4b..3bb2eee 100644 --- a/lib/Target/AMDGPU/AMDGPUIntrinsics.td +++ b/lib/Target/AMDGPU/AMDGPUIntrinsics.td @@ -13,7 +13,7 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in { - def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>; diff --git a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp index 018234a..94f169d 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp @@ -307,11 +307,29 @@ void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned OpIdx, // value of the source select is defined in the r600isa docs. if (MO.isReg()) { unsigned Reg = MO.getReg(); - if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(Reg)) { - EmitByte(1, OS); - } else { - EmitByte(0, OS); + unsigned EmittedBank = 0; + unsigned BankClassId[16] = { + AMDGPU::R600_KC0_CReg32RegClassID, + AMDGPU::R600_KC1_CReg32RegClassID, + AMDGPU::R600_KC2_CReg32RegClassID, + AMDGPU::R600_KC3_CReg32RegClassID, + AMDGPU::R600_KC4_CReg32RegClassID, + AMDGPU::R600_KC5_CReg32RegClassID, + AMDGPU::R600_KC6_CReg32RegClassID, + AMDGPU::R600_KC7_CReg32RegClassID, + AMDGPU::R600_KC8_CReg32RegClassID, + AMDGPU::R600_KC9_CReg32RegClassID, + AMDGPU::R600_KC10_CReg32RegClassID, + AMDGPU::R600_KC11_CReg32RegClassID, + AMDGPU::R600_KC12_CReg32RegClassID, + AMDGPU::R600_KC13_CReg32RegClassID, + AMDGPU::R600_KC14_CReg32RegClassID, + AMDGPU::R600_KC15_CReg32RegClassID}; + for (unsigned i = 0; i < 16; i++) { + if (AMDGPUMCRegisterClasses[BankClassId[i]].contains(Reg)) + EmittedBank = i + 1; } + EmitByte(EmittedBank, OS); if (Reg == AMDGPU::ALU_LITERAL_X) { unsigned ImmOpIndex = MI.getNumOperands() - 1; diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index 3a4283c..16d2280 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -161,7 +161,62 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( case AMDGPU::R600_LOAD_CONST: { int64_t RegIndex = MI->getOperand(1).getImm(); - unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex); + int64_t BankIndex = MI->getOperand(2).getImm(); + unsigned ConstantReg; + switch (BankIndex) { + case 0: + ConstantReg = AMDGPU::R600_KC0_CReg32RegClass.getRegister(RegIndex); + break; + case 1: + ConstantReg = AMDGPU::R600_KC1_CReg32RegClass.getRegister(RegIndex); + break; + case 2: + ConstantReg = AMDGPU::R600_KC2_CReg32RegClass.getRegister(RegIndex); + break; + case 3: + ConstantReg = AMDGPU::R600_KC3_CReg32RegClass.getRegister(RegIndex); + break; + case 4: + ConstantReg = AMDGPU::R600_KC4_CReg32RegClass.getRegister(RegIndex); + break; + case 5: + ConstantReg = AMDGPU::R600_KC5_CReg32RegClass.getRegister(RegIndex); + break; + case 6: + ConstantReg = AMDGPU::R600_KC6_CReg32RegClass.getRegister(RegIndex); + break; + case 7: + ConstantReg = AMDGPU::R600_KC7_CReg32RegClass.getRegister(RegIndex); + break; + case 8: + ConstantReg = AMDGPU::R600_KC8_CReg32RegClass.getRegister(RegIndex); + break; + case 9: + ConstantReg = AMDGPU::R600_KC9_CReg32RegClass.getRegister(RegIndex); + break; + case 10: + ConstantReg = AMDGPU::R600_KC10_CReg32RegClass.getRegister(RegIndex); + break; + case 11: + ConstantReg = AMDGPU::R600_KC11_CReg32RegClass.getRegister(RegIndex); + break; + case 12: + ConstantReg = AMDGPU::R600_KC12_CReg32RegClass.getRegister(RegIndex); + break; + case 13: + ConstantReg = AMDGPU::R600_KC13_CReg32RegClass.getRegister(RegIndex); + break; + case 14: + ConstantReg = AMDGPU::R600_KC14_CReg32RegClass.getRegister(RegIndex); + break; + case 15: + ConstantReg = AMDGPU::R600_KC15_CReg32RegClass.getRegister(RegIndex); + break; + default: + assert( 0 && "Not a valid Bank Index !"); + } + + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY)) .addOperand(MI->getOperand(0)) .addReg(ConstantReg); diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td index d89b03b..621ed09 100644 --- a/lib/Target/AMDGPU/R600Instructions.td +++ b/lib/Target/AMDGPU/R600Instructions.td @@ -1572,9 +1572,9 @@ def MASK_WRITE : AMDGPUShaderInst < def R600_LOAD_CONST : AMDGPUShaderInst < (outs R600_Reg32:$dst), - (ins i32imm:$src0), + (ins i32imm:$src0, i32imm:$src1), "R600_LOAD_CONST $dst, $src0", - [(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0))] + [(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0, imm:$src1))] >; def RESERVE_REG : AMDGPUShaderInst < diff --git a/lib/Target/AMDGPU/R600RegisterInfo.td b/lib/Target/AMDGPU/R600RegisterInfo.td index 3b21825..577938d 100644 --- a/lib/Target/AMDGPU/R600RegisterInfo.td +++ b/lib/Target/AMDGPU/R600RegisterInfo.td @@ -28,10 +28,6 @@ foreach Index = 0-127 in { // 32-bit Temporary Registers def T#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>; - // 32-bit Constant Registers (There are more than 128, this the number - // that is currently supported. - def C#Index#_#Chan : R600RegWithChan <"C"#Index#"."#Chan, Index, Chan>; - // Indirect addressing offset registers def Addr#Index#_#Chan : R600RegWithChan <"T("#Index#" + AR.x)."#Chan, Index, Chan>; @@ -45,6 +41,16 @@ foreach Index = 0-127 in { Index>; } +foreach Index = 0 - 255 in { + foreach Bank = 0 - 15 in { + foreach Chan = [ "X", "Y", "Z", "W" ] in { + // 32-bit Constant Registers (There are more than 128, this the number + // that is currently supported. + def KC#Bank#_C#Index#_#Chan : R600RegWithChan <"KC"#Bank#"_C"#Index#"."#Chan, Index, Chan>; + } + } +} + // Array Base Register holding input in FS foreach Index = 448-464 in { def ArrayBase#Index : R600Reg<"ARRAY_BASE", Index>; @@ -80,12 +86,135 @@ def R600_Addr : RegisterClass <"AMDGPU", [i32], 127, } // End isAllocatable = 0 -def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, - (add (interleave - (interleave (sequence "C%u_X", 0, 127), - (sequence "C%u_Z", 0, 127)), - (interleave (sequence "C%u_Y", 0, 127), - (sequence "C%u_W", 0, 127))))>; +def R600_KC0_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (interleave + (interleave (sequence "KC0_C%u_X", 0, 255), + (sequence "KC0_C%u_Z", 0, 255)), + (interleave (sequence "KC0_C%u_Y", 0, 255), + (sequence "KC0_C%u_W", 0, 255))))>; + +def R600_KC1_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (interleave + (interleave (sequence "KC1_C%u_X", 0, 255), + (sequence "KC1_C%u_Z", 0, 255)), + (interleave (sequence "KC1_C%u_Y", 0, 255), + (sequence "KC1_C%u_W", 0, 255))))>; + +def R600_KC2_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (interleave + (interleave (sequence "KC2_C%u_X", 0, 255), + (sequence "KC2_C%u_Z", 0, 255)), + (interleave (sequence "KC2_C%u_Y", 0, 255), + (sequence "KC2_C%u_W", 0, 255))))>; + +def R600_KC3_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (interleave + (interleave (sequence "KC3_C%u_X", 0, 255), + (sequence "KC3_C%u_Z", 0, 255)), + (interleave (sequence "KC3_C%u_Y", 0, 255), + (sequence "KC3_C%u_W", 0, 255))))>; + +def R600_KC4_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (interleave + (interleave (sequence "KC4_C%u_X", 0, 255), + (sequence "KC4_C%u_Z", 0, 255)), + (interleave (sequence "KC4_C%u_Y", 0, 255), + (sequence "KC4_C%u_W", 0, 255))))>; + +def R600_KC5_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (interleave + (interleave (sequence "KC5_C%u_X", 0, 255), + (sequence "KC5_C%u_Z", 0, 255)), + (interleave (sequence "KC5_C%u_Y", 0, 255), + (sequence "KC5_C%u_W", 0, 255))))>; + +def R600_KC6_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (interleave + (interleave (sequence "KC6_C%u_X", 0, 255), + (sequence "KC6_C%u_Z", 0, 255)), + (interleave (sequence "KC6_C%u_Y", 0, 255), + (sequence "KC6_C%u_W", 0, 255))))>; + +def R600_KC7_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (interleave + (interleave (sequence "KC7_C%u_X", 0, 255), + (sequence "KC7_C%u_Z", 0, 255)), + (interleave (sequence "KC7_C%u_Y", 0, 255), + (sequence "KC7_C%u_W", 0, 255))))>; + +def R600_KC8_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (interleave + (interleave (sequence "KC8_C%u_X", 0, 255), + (sequence "KC8_C%u_Z", 0, 255)), + (interleave (sequence "KC8_C%u_Y", 0, 255), + (sequence "KC8_C%u_W", 0, 255))))>; + +def R600_KC9_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (interleave + (interleave (sequence "KC9_C%u_X", 0, 255), + (sequence "KC9_C%u_Z", 0, 255)), + (interleave (sequence "KC9_C%u_Y", 0, 255), + (sequence "KC9_C%u_W", 0, 255))))>; + +def R600_KC10_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (interleave + (interleave (sequence "KC10_C%u_X", 0, 255), + (sequence "KC10_C%u_Z", 0, 255)), + (interleave (sequence "KC10_C%u_Y", 0, 255), + (sequence "KC10_C%u_W", 0, 255))))>; + +def R600_KC11_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (interleave + (interleave (sequence "KC11_C%u_X", 0, 255), + (sequence "KC11_C%u_Z", 0, 255)), + (interleave (sequence "KC11_C%u_Y", 0, 255), + (sequence "KC11_C%u_W", 0, 255))))>; + +def R600_KC12_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (interleave + (interleave (sequence "KC12_C%u_X", 0, 255), + (sequence "KC12_C%u_Z", 0, 255)), + (interleave (sequence "KC12_C%u_Y", 0, 255), + (sequence "KC12_C%u_W", 0, 255))))>; + +def R600_KC13_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (interleave + (interleave (sequence "KC13_C%u_X", 0, 255), + (sequence "KC13_C%u_Z", 0, 255)), + (interleave (sequence "KC13_C%u_Y", 0, 255), + (sequence "KC13_C%u_W", 0, 255))))>; + +def R600_KC14_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (interleave + (interleave (sequence "KC14_C%u_X", 0, 255), + (sequence "KC14_C%u_Z", 0, 255)), + (interleave (sequence "KC14_C%u_Y", 0, 255), + (sequence "KC14_C%u_W", 0, 255))))>; + +def R600_KC15_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (interleave + (interleave (sequence "KC15_C%u_X", 0, 255), + (sequence "KC15_C%u_Z", 0, 255)), + (interleave (sequence "KC15_C%u_Y", 0, 255), + (sequence "KC15_C%u_W", 0, 255))))>; + +def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add + R600_KC0_CReg32, + R600_KC1_CReg32, + R600_KC2_CReg32, + R600_KC3_CReg32, + R600_KC4_CReg32, + R600_KC5_CReg32, + R600_KC6_CReg32, + R600_KC7_CReg32, + R600_KC8_CReg32, + R600_KC9_CReg32, + R600_KC10_CReg32, + R600_KC11_CReg32, + R600_KC12_CReg32, + R600_KC13_CReg32, + R600_KC14_CReg32, + R600_KC15_CReg32)>; def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32, (add (sequence "T%u_X", 0, 127))>; diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index e9bbe23..bcf0635 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -1050,9 +1050,9 @@ def SET_M0 : InstSI < def LOAD_CONST : AMDGPUShaderInst < (outs GPRF32:$dst), - (ins i32imm:$src), - "LOAD_CONST $dst, $src", - [(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))] + (ins i32imm:$src0), + "LOAD_CONST $dst, $src0", + [(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src0, imm))] >; let usesCustomInserter = 1 in { -- 1.8.0.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev