Réf.: [PATCH] R600: Enable folding of inline literals into REQ_SEQUENCE instructions

Vincent Lejeune vljn at ovi.com
Thu Aug 1 05:45:27 PDT 2013






------------------------------
Le jeu. 1 août 2013 04:45 HAEC, Tom Stellard a écrit :

>From: Tom Stellard <thomas.stellard at amd.com>
>
>---
> lib/Target/R600/AMDGPUISelDAGToDAG.cpp          | 37 +++++++++++++------------
> lib/Target/R600/R600OptimizeVectorRegisters.cpp |  3 ++
> test/CodeGen/R600/literals.ll                   | 13 +++++++++
> 3 files changed, 36 insertions(+), 17 deletions(-)
>
>diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
>index 38a5f24..be21d77 100644
>--- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
>+++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
>@@ -371,29 +371,32 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
>             continue;
>           }
>       } else {
>-        if (!TII->isALUInstr(Use->getMachineOpcode()) ||
>-            (TII->get(Use->getMachineOpcode()).TSFlags &
>-            R600_InstFlag::VECTOR)) {
>-          continue;
>-        }
>-
>-        int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(),
>-                                        AMDGPU::OpName::literal);
>-        if (ImmIdx == -1) {
>-          continue;
>-        }

Parenthesis are not needed for single line statement. 

>-
>-        if (TII->getOperandIdx(Use->getMachineOpcode(),
>-                               AMDGPU::OpName::dst) != -1) {
>-          // subtract one from ImmIdx, because the DST operand is usually index
>-          // 0 for MachineInstrs, but we have no DST in the Ops vector.
>-          ImmIdx--;
>+        switch(Use->getMachineOpcode()) {
>+        case AMDGPU::REG_SEQUENCE: break;
>+        default:
>+          if (!TII->isALUInstr(Use->getMachineOpcode()) ||
>+              (TII->get(Use->getMachineOpcode()).TSFlags &
>+               R600_InstFlag::VECTOR)) {
>+            continue;
>+          }
>         }
> 
>         // Check that we aren't already using an immediate.
>         // XXX: It's possible for an instruction to have more than one
>         // immediate operand, but this is not supported yet.
>         if (ImmReg == AMDGPU::ALU_LITERAL_X) {
>+          int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(),
>+                                          AMDGPU::OpName::literal);
>+          if (ImmIdx == -1) {
>+            continue;
>+          }
>+
>+          if (TII->getOperandIdx(Use->getMachineOpcode(),
>+                                 AMDGPU::OpName::dst) != -1) {
>+            // subtract one from ImmIdx, because the DST operand is usually index
>+            // 0 for MachineInstrs, but we have no DST in the Ops vector.
>+            ImmIdx--;
>+          }
>           ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx));
>           assert(C);
> 
>diff --git a/lib/Target/R600/R600OptimizeVectorRegisters.cpp b/lib/Target/R600/R600OptimizeVectorRegisters.cpp
>index c47bc39..7478635 100644
>--- a/lib/Target/R600/R600OptimizeVectorRegisters.cpp
>+++ b/lib/Target/R600/R600OptimizeVectorRegisters.cpp
>@@ -50,6 +50,9 @@ isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) {
>       E = MRI.def_end(); It != E; ++It) {
>     return (*It).isImplicitDef();
>   }
>+  if (MRI.isReserved(Reg)) {
>+    return false;
>+  }
>   llvm_unreachable("Reg without a def");
>   return false;
> }
>diff --git a/test/CodeGen/R600/literals.ll b/test/CodeGen/R600/literals.ll
>index 77b168e..7a113f1 100644
>--- a/test/CodeGen/R600/literals.ll
>+++ b/test/CodeGen/R600/literals.ll
>@@ -31,3 +31,16 @@ entry:
>   store float %0, float addrspace(1)* %out
>   ret void
> }
>+
>+; Make sure inline literals are folded into REG_SEQUENCE instructions.
>+; CHECK: @inline_literal_reg_sequence
>+; CHECK: MOV T[[GPR:[0-9]]].X, 0.0
>+; CHECK-NEXT: MOV T[[GPR]].Y, 0.0
>+; CHECK-NEXT: MOV T[[GPR]].Z, 0.0
>+; CHECK-NEXT: MOV * T[[GPR]].W, 0.0
>+
>+define void @inline_literal_reg_sequence(<4 x i32> addrspace(1)* %out) {
>+entry:
>+  store <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> addrspace(1)* %out
>+  ret void

You should probably use another constant to  check  the folding. 
I think 0.0 and  1.0 value can be folded inside the store instruction ;we don't so that at the moment but it's a potential future optimisation. 

Vincent 

>+}
>-- 
>1.7.11.4
>
>_______________________________________________
>llvm-commits mailing list
>llvm-commits at cs.uiuc.edu
>http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits





More information about the llvm-commits mailing list