Réf.: [PATCH] R600: Enable folding of inline literals into REQ_SEQUENCE instructions
Tom Stellard
tom at stellard.net
Thu Aug 1 19:14:22 PDT 2013
Hi Vincent,
Here is an updated patch with the coding style fix.
-Tom
On Thu, Aug 01, 2013 at 07:16:32AM -0700, Tom Stellard wrote:
> On Thu, Aug 01, 2013 at 05:45:27AM -0700, Vincent Lejeune wrote:
> >
> >
> >
> >
> >
> > ------------------------------
> > Le jeu. 1 août 2013 04:45 HAEC, Tom Stellard a écrit :
> >
> > >From: Tom Stellard <thomas.stellard at amd.com>
> > >
> > >---
> > > lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 37 +++++++++++++------------
> > > lib/Target/R600/R600OptimizeVectorRegisters.cpp | 3 ++
> > > test/CodeGen/R600/literals.ll | 13 +++++++++
> > > 3 files changed, 36 insertions(+), 17 deletions(-)
> > >
> > >diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> > >index 38a5f24..be21d77 100644
> > >--- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> > >+++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> > >@@ -371,29 +371,32 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
> > > continue;
> > > }
> > > } else {
> > >- if (!TII->isALUInstr(Use->getMachineOpcode()) ||
> > >- (TII->get(Use->getMachineOpcode()).TSFlags &
> > >- R600_InstFlag::VECTOR)) {
> > >- continue;
> > >- }
> > >-
> > >- int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(),
> > >- AMDGPU::OpName::literal);
> > >- if (ImmIdx == -1) {
> > >- continue;
> > >- }
> >
> > Parenthesis are not needed for single line statement.
> >
> > >-
> > >- if (TII->getOperandIdx(Use->getMachineOpcode(),
> > >- AMDGPU::OpName::dst) != -1) {
> > >- // subtract one from ImmIdx, because the DST operand is usually index
> > >- // 0 for MachineInstrs, but we have no DST in the Ops vector.
> > >- ImmIdx--;
> > >+ switch(Use->getMachineOpcode()) {
> > >+ case AMDGPU::REG_SEQUENCE: break;
> > >+ default:
> > >+ if (!TII->isALUInstr(Use->getMachineOpcode()) ||
> > >+ (TII->get(Use->getMachineOpcode()).TSFlags &
> > >+ R600_InstFlag::VECTOR)) {
> > >+ continue;
> > >+ }
> > > }
> > >
> > > // Check that we aren't already using an immediate.
> > > // XXX: It's possible for an instruction to have more than one
> > > // immediate operand, but this is not supported yet.
> > > if (ImmReg == AMDGPU::ALU_LITERAL_X) {
> > >+ int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(),
> > >+ AMDGPU::OpName::literal);
> > >+ if (ImmIdx == -1) {
> > >+ continue;
> > >+ }
> > >+
> > >+ if (TII->getOperandIdx(Use->getMachineOpcode(),
> > >+ AMDGPU::OpName::dst) != -1) {
> > >+ // subtract one from ImmIdx, because the DST operand is usually index
> > >+ // 0 for MachineInstrs, but we have no DST in the Ops vector.
> > >+ ImmIdx--;
> > >+ }
> > > ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx));
> > > assert(C);
> > >
> > >diff --git a/lib/Target/R600/R600OptimizeVectorRegisters.cpp b/lib/Target/R600/R600OptimizeVectorRegisters.cpp
> > >index c47bc39..7478635 100644
> > >--- a/lib/Target/R600/R600OptimizeVectorRegisters.cpp
> > >+++ b/lib/Target/R600/R600OptimizeVectorRegisters.cpp
> > >@@ -50,6 +50,9 @@ isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) {
> > > E = MRI.def_end(); It != E; ++It) {
> > > return (*It).isImplicitDef();
> > > }
> > >+ if (MRI.isReserved(Reg)) {
> > >+ return false;
> > >+ }
> > > llvm_unreachable("Reg without a def");
> > > return false;
> > > }
> > >diff --git a/test/CodeGen/R600/literals.ll b/test/CodeGen/R600/literals.ll
> > >index 77b168e..7a113f1 100644
> > >--- a/test/CodeGen/R600/literals.ll
> > >+++ b/test/CodeGen/R600/literals.ll
> > >@@ -31,3 +31,16 @@ entry:
> > > store float %0, float addrspace(1)* %out
> > > ret void
> > > }
> > >+
> > >+; Make sure inline literals are folded into REG_SEQUENCE instructions.
> > >+; CHECK: @inline_literal_reg_sequence
> > >+; CHECK: MOV T[[GPR:[0-9]]].X, 0.0
> > >+; CHECK-NEXT: MOV T[[GPR]].Y, 0.0
> > >+; CHECK-NEXT: MOV T[[GPR]].Z, 0.0
> > >+; CHECK-NEXT: MOV * T[[GPR]].W, 0.0
> > >+
> > >+define void @inline_literal_reg_sequence(<4 x i32> addrspace(1)* %out) {
> > >+entry:
> > >+ store <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> addrspace(1)* %out
> > >+ ret void
> >
> > You should probably use another constant to check the folding.
> > I think 0.0 and 1.0 value can be folded inside the store instruction ;we don't so that at the moment but it's a potential future optimisation.
> >
>
> I got the terminology wrong in the commit message. This patch adds
> support for inline constants (e.g. -1, 1, 0, 0.5) and not literal constants,
> so I have to use one of the constants listed above. It shouldn't be a
> problem though, because you can't fold constants into global stores.
>
> -Tom
>
> > Vincent
> >
> > >+}
> > >--
> > >1.7.11.4
> > >
> > >_______________________________________________
> > >llvm-commits mailing list
> > >llvm-commits at cs.uiuc.edu
> > >http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> >
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
-------------- next part --------------
>From af6d2d5ce734b41555c55a8c364b87ec03a70b60 Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard at amd.com>
Date: Mon, 29 Jul 2013 09:26:40 -0700
Subject: [PATCH] R600: Enable folding of inline literals into REQ_SEQUENCE
instructions
---
lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 36 +++++++++++++------------
lib/Target/R600/R600OptimizeVectorRegisters.cpp | 3 +++
test/CodeGen/R600/literals.ll | 13 +++++++++
3 files changed, 35 insertions(+), 17 deletions(-)
diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
index 38a5f24..538bc44 100644
--- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -371,29 +371,31 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
continue;
}
} else {
- if (!TII->isALUInstr(Use->getMachineOpcode()) ||
- (TII->get(Use->getMachineOpcode()).TSFlags &
- R600_InstFlag::VECTOR)) {
- continue;
- }
-
- int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(),
- AMDGPU::OpName::literal);
- if (ImmIdx == -1) {
- continue;
- }
-
- if (TII->getOperandIdx(Use->getMachineOpcode(),
- AMDGPU::OpName::dst) != -1) {
- // subtract one from ImmIdx, because the DST operand is usually index
- // 0 for MachineInstrs, but we have no DST in the Ops vector.
- ImmIdx--;
+ switch(Use->getMachineOpcode()) {
+ case AMDGPU::REG_SEQUENCE: break;
+ default:
+ if (!TII->isALUInstr(Use->getMachineOpcode()) ||
+ (TII->get(Use->getMachineOpcode()).TSFlags &
+ R600_InstFlag::VECTOR)) {
+ continue;
+ }
}
// Check that we aren't already using an immediate.
// XXX: It's possible for an instruction to have more than one
// immediate operand, but this is not supported yet.
if (ImmReg == AMDGPU::ALU_LITERAL_X) {
+ int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(),
+ AMDGPU::OpName::literal);
+ if (ImmIdx == -1)
+ continue;
+
+ if (TII->getOperandIdx(Use->getMachineOpcode(),
+ AMDGPU::OpName::dst) != -1) {
+ // subtract one from ImmIdx, because the DST operand is usually index
+ // 0 for MachineInstrs, but we have no DST in the Ops vector.
+ ImmIdx--;
+ }
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx));
assert(C);
diff --git a/lib/Target/R600/R600OptimizeVectorRegisters.cpp b/lib/Target/R600/R600OptimizeVectorRegisters.cpp
index c47bc39..17dc0c5 100644
--- a/lib/Target/R600/R600OptimizeVectorRegisters.cpp
+++ b/lib/Target/R600/R600OptimizeVectorRegisters.cpp
@@ -50,6 +50,9 @@ isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) {
E = MRI.def_end(); It != E; ++It) {
return (*It).isImplicitDef();
}
+ if (MRI.isReserved(Reg))
+ return false;
+
llvm_unreachable("Reg without a def");
return false;
}
diff --git a/test/CodeGen/R600/literals.ll b/test/CodeGen/R600/literals.ll
index 77b168e..7a113f1 100644
--- a/test/CodeGen/R600/literals.ll
+++ b/test/CodeGen/R600/literals.ll
@@ -31,3 +31,16 @@ entry:
store float %0, float addrspace(1)* %out
ret void
}
+
+; Make sure inline literals are folded into REG_SEQUENCE instructions.
+; CHECK: @inline_literal_reg_sequence
+; CHECK: MOV T[[GPR:[0-9]]].X, 0.0
+; CHECK-NEXT: MOV T[[GPR]].Y, 0.0
+; CHECK-NEXT: MOV T[[GPR]].Z, 0.0
+; CHECK-NEXT: MOV * T[[GPR]].W, 0.0
+
+define void @inline_literal_reg_sequence(<4 x i32> addrspace(1)* %out) {
+entry:
+ store <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> addrspace(1)* %out
+ ret void
+}
--
1.7.11.4
More information about the llvm-commits
mailing list