[llvm] aeb63d4 - [RISCV] Teach RISCVTargetLowering::shouldSinkOperands to sink splats for and/or/xor.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 21 10:07:58 PDT 2021
Author: Craig Topper
Date: 2021-09-21T10:07:29-07:00
New Revision: aeb63d464f30db5e32da1449a650eec8c85d95bd
URL: https://github.com/llvm/llvm-project/commit/aeb63d464f30db5e32da1449a650eec8c85d95bd
DIFF: https://github.com/llvm/llvm-project/commit/aeb63d464f30db5e32da1449a650eec8c85d95bd.diff
LOG: [RISCV] Teach RISCVTargetLowering::shouldSinkOperands to sink splats for and/or/xor.
This requires a minor change to CodeGenPrepare to ensure that
shouldSinkOperands will be called for And.
Reviewed By: frasercrmck
Differential Revision: https://reviews.llvm.org/D110106
Added:
Modified:
llvm/lib/CodeGen/CodeGenPrepare.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 365e25263ce80..dc772e86ed7bb 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -7859,8 +7859,9 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
- if (BinOp && (BinOp->getOpcode() == Instruction::And) && EnableAndCmpSinking)
- return sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts);
+ if (BinOp && BinOp->getOpcode() == Instruction::And && EnableAndCmpSinking &&
+ sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts))
+ return true;
// TODO: Move this into the switch on opcode - it handles shifts already.
if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index d0d90641e4392..a61ded49dc0ae 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1065,6 +1065,9 @@ bool RISCVTargetLowering::shouldSinkOperands(
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
case Instruction::FAdd:
case Instruction::FSub:
case Instruction::FMul:
diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
index ed2f3cf37e542..9d926ab64eba0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
@@ -149,17 +149,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_and(i32* nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_and:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT: vmv.v.x v25, a1
-; CHECK-NEXT: addi a1, zero, 1024
+; CHECK-NEXT: addi a2, zero, 1024
; CHECK-NEXT: .LBB4_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vle32.v v26, (a0)
-; CHECK-NEXT: vand.vv v26, v26, v25
-; CHECK-NEXT: vse32.v v26, (a0)
-; CHECK-NEXT: addi a1, a1, -4
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vand.vx v25, v25, a1
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a1, .LBB4_1
+; CHECK-NEXT: bnez a2, .LBB4_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -186,17 +185,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_or(i32* nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_or:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT: vmv.v.x v25, a1
-; CHECK-NEXT: addi a1, zero, 1024
+; CHECK-NEXT: addi a2, zero, 1024
; CHECK-NEXT: .LBB5_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vle32.v v26, (a0)
-; CHECK-NEXT: vor.vv v26, v26, v25
-; CHECK-NEXT: vse32.v v26, (a0)
-; CHECK-NEXT: addi a1, a1, -4
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vor.vx v25, v25, a1
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a1, .LBB5_1
+; CHECK-NEXT: bnez a2, .LBB5_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -223,17 +221,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_xor(i32* nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_xor:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT: vmv.v.x v25, a1
-; CHECK-NEXT: addi a1, zero, 1024
+; CHECK-NEXT: addi a2, zero, 1024
; CHECK-NEXT: .LBB6_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vle32.v v26, (a0)
-; CHECK-NEXT: vxor.vv v26, v26, v25
-; CHECK-NEXT: vse32.v v26, (a0)
-; CHECK-NEXT: addi a1, a1, -4
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vxor.vx v25, v25, a1
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a1, .LBB6_1
+; CHECK-NEXT: bnez a2, .LBB6_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -632,34 +629,33 @@ for.body: ; preds = %for.body.preheader,
define void @sink_splat_and_scalable(i32* nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_and_scalable:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a7, a5, 1
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: srli a7, a2, 1
; CHECK-NEXT: addi a3, zero, 1024
; CHECK-NEXT: bgeu a3, a7, .LBB11_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, zero
+; CHECK-NEXT: mv t0, zero
; CHECK-NEXT: j .LBB11_5
; CHECK-NEXT: .LBB11_2: # %vector.ph
-; CHECK-NEXT: mv a4, zero
+; CHECK-NEXT: mv a5, zero
; CHECK-NEXT: remu a6, a3, a7
-; CHECK-NEXT: sub a3, a3, a6
-; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, mu
-; CHECK-NEXT: vmv.v.x v26, a1
-; CHECK-NEXT: slli a5, a5, 1
+; CHECK-NEXT: sub t0, a3, a6
+; CHECK-NEXT: slli a4, a2, 1
; CHECK-NEXT: mv a2, a0
; CHECK-NEXT: .LBB11_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vl2re32.v v28, (a2)
-; CHECK-NEXT: vand.vv v28, v28, v26
-; CHECK-NEXT: vs2r.v v28, (a2)
-; CHECK-NEXT: add a4, a4, a7
-; CHECK-NEXT: add a2, a2, a5
-; CHECK-NEXT: bne a4, a3, .LBB11_3
+; CHECK-NEXT: vl2re32.v v26, (a2)
+; CHECK-NEXT: vsetvli a3, zero, e32, m2, ta, mu
+; CHECK-NEXT: vand.vx v26, v26, a1
+; CHECK-NEXT: vs2r.v v26, (a2)
+; CHECK-NEXT: add a5, a5, a7
+; CHECK-NEXT: add a2, a2, a4
+; CHECK-NEXT: bne a5, t0, .LBB11_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a6, .LBB11_7
; CHECK-NEXT: .LBB11_5: # %for.body.preheader
-; CHECK-NEXT: addi a2, a3, -1024
-; CHECK-NEXT: slli a3, a3, 2
+; CHECK-NEXT: addi a2, t0, -1024
+; CHECK-NEXT: slli a3, t0, 2
; CHECK-NEXT: add a0, a0, a3
; CHECK-NEXT: .LBB11_6: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -726,34 +722,33 @@ for.body: ; preds = %for.body.preheader,
define void @sink_splat_or_scalable(i32* nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_or_scalable:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a7, a5, 1
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: srli a7, a2, 1
; CHECK-NEXT: addi a3, zero, 1024
; CHECK-NEXT: bgeu a3, a7, .LBB12_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, zero
+; CHECK-NEXT: mv t0, zero
; CHECK-NEXT: j .LBB12_5
; CHECK-NEXT: .LBB12_2: # %vector.ph
-; CHECK-NEXT: mv a4, zero
+; CHECK-NEXT: mv a5, zero
; CHECK-NEXT: remu a6, a3, a7
-; CHECK-NEXT: sub a3, a3, a6
-; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, mu
-; CHECK-NEXT: vmv.v.x v26, a1
-; CHECK-NEXT: slli a5, a5, 1
+; CHECK-NEXT: sub t0, a3, a6
+; CHECK-NEXT: slli a4, a2, 1
; CHECK-NEXT: mv a2, a0
; CHECK-NEXT: .LBB12_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vl2re32.v v28, (a2)
-; CHECK-NEXT: vor.vv v28, v28, v26
-; CHECK-NEXT: vs2r.v v28, (a2)
-; CHECK-NEXT: add a4, a4, a7
-; CHECK-NEXT: add a2, a2, a5
-; CHECK-NEXT: bne a4, a3, .LBB12_3
+; CHECK-NEXT: vl2re32.v v26, (a2)
+; CHECK-NEXT: vsetvli a3, zero, e32, m2, ta, mu
+; CHECK-NEXT: vor.vx v26, v26, a1
+; CHECK-NEXT: vs2r.v v26, (a2)
+; CHECK-NEXT: add a5, a5, a7
+; CHECK-NEXT: add a2, a2, a4
+; CHECK-NEXT: bne a5, t0, .LBB12_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a6, .LBB12_7
; CHECK-NEXT: .LBB12_5: # %for.body.preheader
-; CHECK-NEXT: addi a2, a3, -1024
-; CHECK-NEXT: slli a3, a3, 2
+; CHECK-NEXT: addi a2, t0, -1024
+; CHECK-NEXT: slli a3, t0, 2
; CHECK-NEXT: add a0, a0, a3
; CHECK-NEXT: .LBB12_6: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -820,34 +815,33 @@ for.body: ; preds = %for.body.preheader,
define void @sink_splat_xor_scalable(i32* nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_xor_scalable:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a7, a5, 1
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: srli a7, a2, 1
; CHECK-NEXT: addi a3, zero, 1024
; CHECK-NEXT: bgeu a3, a7, .LBB13_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, zero
+; CHECK-NEXT: mv t0, zero
; CHECK-NEXT: j .LBB13_5
; CHECK-NEXT: .LBB13_2: # %vector.ph
-; CHECK-NEXT: mv a4, zero
+; CHECK-NEXT: mv a5, zero
; CHECK-NEXT: remu a6, a3, a7
-; CHECK-NEXT: sub a3, a3, a6
-; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, mu
-; CHECK-NEXT: vmv.v.x v26, a1
-; CHECK-NEXT: slli a5, a5, 1
+; CHECK-NEXT: sub t0, a3, a6
+; CHECK-NEXT: slli a4, a2, 1
; CHECK-NEXT: mv a2, a0
; CHECK-NEXT: .LBB13_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vl2re32.v v28, (a2)
-; CHECK-NEXT: vxor.vv v28, v28, v26
-; CHECK-NEXT: vs2r.v v28, (a2)
-; CHECK-NEXT: add a4, a4, a7
-; CHECK-NEXT: add a2, a2, a5
-; CHECK-NEXT: bne a4, a3, .LBB13_3
+; CHECK-NEXT: vl2re32.v v26, (a2)
+; CHECK-NEXT: vsetvli a3, zero, e32, m2, ta, mu
+; CHECK-NEXT: vxor.vx v26, v26, a1
+; CHECK-NEXT: vs2r.v v26, (a2)
+; CHECK-NEXT: add a5, a5, a7
+; CHECK-NEXT: add a2, a2, a4
+; CHECK-NEXT: bne a5, t0, .LBB13_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a6, .LBB13_7
; CHECK-NEXT: .LBB13_5: # %for.body.preheader
-; CHECK-NEXT: addi a2, a3, -1024
-; CHECK-NEXT: slli a3, a3, 2
+; CHECK-NEXT: addi a2, t0, -1024
+; CHECK-NEXT: slli a3, t0, 2
; CHECK-NEXT: add a0, a0, a3
; CHECK-NEXT: .LBB13_6: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
More information about the llvm-commits
mailing list