[llvm] 1e02a29 - [AArch64][SVE] Use more flag-setting instructions
Cullen Rhodes via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 25 02:02:46 PDT 2022
Author: Cullen Rhodes
Date: 2022-10-25T09:02:21Z
New Revision: 1e02a29e4753ef70d7ce0ad90b7e4f29f1223006
URL: https://github.com/llvm/llvm-project/commit/1e02a29e4753ef70d7ce0ad90b7e4f29f1223006
DIFF: https://github.com/llvm/llvm-project/commit/1e02a29e4753ef70d7ce0ad90b7e4f29f1223006.diff
LOG: [AArch64][SVE] Use more flag-setting instructions
If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
opcode so the PTEST becomes redundant. This patch extends this existing
optimization in AArch64::optimizePTestInstr to cover all flag-setting
opcodes.
Reviewed By: peterwaller-arm
Differential Revision: https://reviews.llvm.org/D136083
Added:
Modified:
llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.h
llvm/test/CodeGen/AArch64/sve-int-pred-reduce.ll
llvm/test/CodeGen/AArch64/sve-ptest-removal-brk.ll
llvm/test/CodeGen/AArch64/sve-ptest-removal-log.ll
llvm/test/CodeGen/AArch64/sve-ptest-removal-ptrue.ll
llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll
llvm/test/CodeGen/AArch64/sve-vecreduce-fold.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp b/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp
index a307ba14dc8c..d65929eff12f 100644
--- a/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp
+++ b/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp
@@ -61,7 +61,8 @@ class AArch64CondBrTuning : public MachineFunctionPass {
private:
MachineInstr *getOperandDef(const MachineOperand &MO);
- MachineInstr *convertToFlagSetting(MachineInstr &MI, bool IsFlagSetting);
+ MachineInstr *convertToFlagSetting(MachineInstr &MI, bool IsFlagSetting,
+ bool Is64Bit);
MachineInstr *convertToCondBr(MachineInstr &MI);
bool tryToTuneBranch(MachineInstr &MI, MachineInstr &DefMI);
};
@@ -84,7 +85,8 @@ MachineInstr *AArch64CondBrTuning::getOperandDef(const MachineOperand &MO) {
}
MachineInstr *AArch64CondBrTuning::convertToFlagSetting(MachineInstr &MI,
- bool IsFlagSetting) {
+ bool IsFlagSetting,
+ bool Is64Bit) {
// If this is already the flag setting version of the instruction (e.g., SUBS)
// just make sure the implicit-def of NZCV isn't marked dead.
if (IsFlagSetting) {
@@ -93,8 +95,7 @@ MachineInstr *AArch64CondBrTuning::convertToFlagSetting(MachineInstr &MI,
MO.setIsDead(false);
return &MI;
}
- bool Is64Bit;
- unsigned NewOpc = TII->convertToFlagSettingOpc(MI.getOpcode(), Is64Bit);
+ unsigned NewOpc = TII->convertToFlagSettingOpc(MI.getOpcode());
Register NewDestReg = MI.getOperand(0).getReg();
if (MRI->hasOneNonDBGUse(MI.getOperand(0).getReg()))
NewDestReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
@@ -198,7 +199,7 @@ bool AArch64CondBrTuning::tryToTuneBranch(MachineInstr &MI,
LLVM_DEBUG(dbgs() << " ");
LLVM_DEBUG(MI.print(dbgs()));
- NewCmp = convertToFlagSetting(DefMI, IsFlagSetting);
+ NewCmp = convertToFlagSetting(DefMI, IsFlagSetting, /*Is64Bit=*/false);
NewBr = convertToCondBr(MI);
break;
}
@@ -253,7 +254,7 @@ bool AArch64CondBrTuning::tryToTuneBranch(MachineInstr &MI,
LLVM_DEBUG(dbgs() << " ");
LLVM_DEBUG(MI.print(dbgs()));
- NewCmp = convertToFlagSetting(DefMI, IsFlagSetting);
+ NewCmp = convertToFlagSetting(DefMI, IsFlagSetting, /*Is64Bit=*/true);
NewBr = convertToCondBr(MI);
break;
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index f4d9186489a0..f558cf2c2cbf 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1333,51 +1333,47 @@ bool AArch64InstrInfo::optimizePTestInstr(
// Fallthough to simply remove the PTEST.
} else {
- switch (Pred->getOpcode()) {
+ // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
+ // opcode so the PTEST becomes redundant.
+ switch (PredOpcode) {
+ case AArch64::AND_PPzPP:
+ case AArch64::BIC_PPzPP:
+ case AArch64::EOR_PPzPP:
+ case AArch64::NAND_PPzPP:
+ case AArch64::NOR_PPzPP:
+ case AArch64::ORN_PPzPP:
+ case AArch64::ORR_PPzPP:
+ case AArch64::BRKA_PPzP:
+ case AArch64::BRKPA_PPzPP:
case AArch64::BRKB_PPzP:
- case AArch64::BRKPB_PPzPP: {
- // Op 0 is chain, 1 is the mask, 2 the previous predicate to
- // propagate, 3 the new predicate.
-
- // Check to see if our mask is the same as the brkpb's. If
- // not the resulting flag bits may be
diff erent and we
- // can't remove the ptest.
+ case AArch64::BRKPB_PPzPP:
+ case AArch64::RDFFR_PPz: {
+ // Check to see if our mask is the same. If not the resulting flag bits
+ // may be
diff erent and we can't remove the ptest.
auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
if (Mask != PredMask)
return false;
-
- // Switch to the new opcode
- NewOp = Pred->getOpcode() == AArch64::BRKB_PPzP ? AArch64::BRKBS_PPzP
- : AArch64::BRKPBS_PPzPP;
- OpChanged = true;
break;
}
case AArch64::BRKN_PPzP: {
+ // BRKN uses an all active implicit mask to set flags unlike the other
+ // flag-setting instructions.
// PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
if ((MaskOpcode != AArch64::PTRUE_B) ||
(Mask->getOperand(1).getImm() != 31))
return false;
-
- NewOp = AArch64::BRKNS_PPzP;
- OpChanged = true;
break;
}
- case AArch64::RDFFR_PPz: {
- // rdffr p1.b, PredMask=p0/z <--- Definition of Pred
- // ptest Mask=p0, Pred=p1.b <--- If equal masks, remove this and use
- // `rdffrs p1.b, p0/z` above.
- auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
- if (Mask != PredMask)
- return false;
-
- NewOp = AArch64::RDFFRS_PPz;
- OpChanged = true;
+ case AArch64::PTRUE_B:
+ // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
break;
- }
default:
// Bail out if we don't recognize the input
return false;
}
+
+ NewOp = convertToFlagSettingOpc(PredOpcode);
+ OpChanged = true;
}
const TargetRegisterInfo *TRI = &getRegisterInfo();
@@ -2388,91 +2384,93 @@ bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
}
}
-unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc,
- bool &Is64Bit) {
+unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc) {
switch (Opc) {
default:
llvm_unreachable("Opcode has no flag setting equivalent!");
// 32-bit cases:
case AArch64::ADDWri:
- Is64Bit = false;
return AArch64::ADDSWri;
case AArch64::ADDWrr:
- Is64Bit = false;
return AArch64::ADDSWrr;
case AArch64::ADDWrs:
- Is64Bit = false;
return AArch64::ADDSWrs;
case AArch64::ADDWrx:
- Is64Bit = false;
return AArch64::ADDSWrx;
case AArch64::ANDWri:
- Is64Bit = false;
return AArch64::ANDSWri;
case AArch64::ANDWrr:
- Is64Bit = false;
return AArch64::ANDSWrr;
case AArch64::ANDWrs:
- Is64Bit = false;
return AArch64::ANDSWrs;
case AArch64::BICWrr:
- Is64Bit = false;
return AArch64::BICSWrr;
case AArch64::BICWrs:
- Is64Bit = false;
return AArch64::BICSWrs;
case AArch64::SUBWri:
- Is64Bit = false;
return AArch64::SUBSWri;
case AArch64::SUBWrr:
- Is64Bit = false;
return AArch64::SUBSWrr;
case AArch64::SUBWrs:
- Is64Bit = false;
return AArch64::SUBSWrs;
case AArch64::SUBWrx:
- Is64Bit = false;
return AArch64::SUBSWrx;
// 64-bit cases:
case AArch64::ADDXri:
- Is64Bit = true;
return AArch64::ADDSXri;
case AArch64::ADDXrr:
- Is64Bit = true;
return AArch64::ADDSXrr;
case AArch64::ADDXrs:
- Is64Bit = true;
return AArch64::ADDSXrs;
case AArch64::ADDXrx:
- Is64Bit = true;
return AArch64::ADDSXrx;
case AArch64::ANDXri:
- Is64Bit = true;
return AArch64::ANDSXri;
case AArch64::ANDXrr:
- Is64Bit = true;
return AArch64::ANDSXrr;
case AArch64::ANDXrs:
- Is64Bit = true;
return AArch64::ANDSXrs;
case AArch64::BICXrr:
- Is64Bit = true;
return AArch64::BICSXrr;
case AArch64::BICXrs:
- Is64Bit = true;
return AArch64::BICSXrs;
case AArch64::SUBXri:
- Is64Bit = true;
return AArch64::SUBSXri;
case AArch64::SUBXrr:
- Is64Bit = true;
return AArch64::SUBSXrr;
case AArch64::SUBXrs:
- Is64Bit = true;
return AArch64::SUBSXrs;
case AArch64::SUBXrx:
- Is64Bit = true;
return AArch64::SUBSXrx;
+ // SVE instructions:
+ case AArch64::AND_PPzPP:
+ return AArch64::ANDS_PPzPP;
+ case AArch64::BIC_PPzPP:
+ return AArch64::BICS_PPzPP;
+ case AArch64::EOR_PPzPP:
+ return AArch64::EORS_PPzPP;
+ case AArch64::NAND_PPzPP:
+ return AArch64::NANDS_PPzPP;
+ case AArch64::NOR_PPzPP:
+ return AArch64::NORS_PPzPP;
+ case AArch64::ORN_PPzPP:
+ return AArch64::ORNS_PPzPP;
+ case AArch64::ORR_PPzPP:
+ return AArch64::ORRS_PPzPP;
+ case AArch64::BRKA_PPzP:
+ return AArch64::BRKAS_PPzP;
+ case AArch64::BRKPA_PPzPP:
+ return AArch64::BRKPAS_PPzPP;
+ case AArch64::BRKB_PPzP:
+ return AArch64::BRKBS_PPzP;
+ case AArch64::BRKPB_PPzPP:
+ return AArch64::BRKPBS_PPzPP;
+ case AArch64::BRKN_PPzP:
+ return AArch64::BRKNS_PPzP;
+ case AArch64::RDFFR_PPz:
+ return AArch64::RDFFRS_PPz;
+ case AArch64::PTRUE_B:
+ return AArch64::PTRUES_B;
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index c7fd94d1a926..96e16b0d1ee9 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -125,7 +125,7 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
/// Return the opcode that set flags when possible. The caller is
/// responsible for ensuring the opc has a flag setting equivalent.
- static unsigned convertToFlagSettingOpc(unsigned Opc, bool &Is64Bit);
+ static unsigned convertToFlagSettingOpc(unsigned Opc);
/// Return true if this is a load/store that can be potentially paired/merged.
bool isCandidateToMergeOrPair(const MachineInstr &MI) const;
diff --git a/llvm/test/CodeGen/AArch64/sve-int-pred-reduce.ll b/llvm/test/CodeGen/AArch64/sve-int-pred-reduce.ll
index fe4b6ca0f066..9cff3b3056e0 100644
--- a/llvm/test/CodeGen/AArch64/sve-int-pred-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-int-pred-reduce.ll
@@ -7,8 +7,7 @@ define i1 @reduce_and_nxv16i1(<vscale x 16 x i1> %vec) {
; CHECK-LABEL: reduce_and_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: not p0.b, p1/z, p0.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: nots p0.b, p1/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.and.i1.nxv16i1(<vscale x 16 x i1> %vec)
@@ -19,8 +18,7 @@ define i1 @reduce_and_nxv8i1(<vscale x 8 x i1> %vec) {
; CHECK-LABEL: reduce_and_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.h
-; CHECK-NEXT: not p0.b, p1/z, p0.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: nots p0.b, p1/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.and.i1.nxv8i1(<vscale x 8 x i1> %vec)
@@ -31,8 +29,7 @@ define i1 @reduce_and_nxv4i1(<vscale x 4 x i1> %vec) {
; CHECK-LABEL: reduce_and_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: not p0.b, p1/z, p0.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: nots p0.b, p1/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.and.i1.nxv4i1(<vscale x 4 x i1> %vec)
@@ -43,8 +40,7 @@ define i1 @reduce_and_nxv2i1(<vscale x 2 x i1> %vec) {
; CHECK-LABEL: reduce_and_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: not p0.b, p1/z, p0.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: nots p0.b, p1/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.and.i1.nxv2i1(<vscale x 2 x i1> %vec)
@@ -185,8 +181,7 @@ define i1 @reduce_smax_nxv16i1(<vscale x 16 x i1> %vec) {
; CHECK-LABEL: reduce_smax_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: not p0.b, p1/z, p0.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: nots p0.b, p1/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.smax.i1.nxv16i1(<vscale x 16 x i1> %vec)
@@ -197,8 +192,7 @@ define i1 @reduce_smax_nxv8i1(<vscale x 8 x i1> %vec) {
; CHECK-LABEL: reduce_smax_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.h
-; CHECK-NEXT: not p0.b, p1/z, p0.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: nots p0.b, p1/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.smax.i1.nxv8i1(<vscale x 8 x i1> %vec)
@@ -209,8 +203,7 @@ define i1 @reduce_smax_nxv4i1(<vscale x 4 x i1> %vec) {
; CHECK-LABEL: reduce_smax_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: not p0.b, p1/z, p0.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: nots p0.b, p1/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.smax.i1.nxv4i1(<vscale x 4 x i1> %vec)
@@ -221,8 +214,7 @@ define i1 @reduce_smax_nxv2i1(<vscale x 2 x i1> %vec) {
; CHECK-LABEL: reduce_smax_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: not p0.b, p1/z, p0.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: nots p0.b, p1/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.smax.i1.nxv2i1(<vscale x 2 x i1> %vec)
@@ -362,8 +354,7 @@ define i1 @reduce_umin_nxv16i1(<vscale x 16 x i1> %vec) {
; CHECK-LABEL: reduce_umin_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: not p0.b, p1/z, p0.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: nots p0.b, p1/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.umin.i1.nxv16i1(<vscale x 16 x i1> %vec)
@@ -374,8 +365,7 @@ define i1 @reduce_umin_nxv8i1(<vscale x 8 x i1> %vec) {
; CHECK-LABEL: reduce_umin_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.h
-; CHECK-NEXT: not p0.b, p1/z, p0.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: nots p0.b, p1/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.umin.i1.nxv8i1(<vscale x 8 x i1> %vec)
@@ -386,8 +376,7 @@ define i1 @reduce_umin_nxv4i1(<vscale x 4 x i1> %vec) {
; CHECK-LABEL: reduce_umin_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: not p0.b, p1/z, p0.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: nots p0.b, p1/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.umin.i1.nxv4i1(<vscale x 4 x i1> %vec)
@@ -411,8 +400,7 @@ define i1 @reduce_umin_nxv2i1(<vscale x 2 x i1> %vec) {
; CHECK-LABEL: reduce_umin_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: not p0.b, p1/z, p0.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: nots p0.b, p1/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.umin.i1.nxv2i1(<vscale x 2 x i1> %vec)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-brk.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-brk.ll
index c0b238418bd0..108fb987e809 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-brk.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-brk.ll
@@ -6,8 +6,7 @@
define i32 @brkpa(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
; CHECK-LABEL: brkpa:
; CHECK: // %bb.0:
-; CHECK-NEXT: brkpa p1.b, p0/z, p1.b, p2.b
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: brkpas p0.b, p0/z, p1.b, p2.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brkpa.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
@@ -31,8 +30,7 @@ define i32 @brkpb(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x
define i32 @brka(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
; CHECK-LABEL: brka:
; CHECK: // %bb.0:
-; CHECK-NEXT: brka p1.b, p0/z, p1.b
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: brkas p0.b, p0/z, p1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brka.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a)
@@ -53,19 +51,6 @@ define i32 @brkb(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
ret i32 %conv
}
-define i32 @brkn(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
-; CHECK-LABEL: brkn:
-; CHECK: // %bb.0:
-; CHECK-NEXT: brkn p2.b, p0/z, p1.b, p2.b
-; CHECK-NEXT: ptest p0, p2.b
-; CHECK-NEXT: cset w0, ne
-; CHECK-NEXT: ret
- %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brkn.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
- %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
- %conv = zext i1 %2 to i32
- ret i32 %conv
-}
-
define i32 @brkn_all_active(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
; CHECK-LABEL: brkn_all_active:
; CHECK: // %bb.0:
@@ -146,6 +131,19 @@ define i32 @brkn_neg(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16
ret i32 %conv
}
+define i32 @brkn_neg2(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: brkn_neg2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: brkn p2.b, p0/z, p1.b, p2.b
+; CHECK-NEXT: ptest p0, p2.b
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brkn.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
+ %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
+ %conv = zext i1 %2 to i32
+ ret i32 %conv
+}
+
declare <vscale x 16 x i1> @llvm.aarch64.sve.brkpa.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
declare <vscale x 16 x i1> @llvm.aarch64.sve.brkpb.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
declare <vscale x 16 x i1> @llvm.aarch64.sve.brka.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-log.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-log.ll
index d4f6874fc610..da38c812c2ad 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-log.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-log.ll
@@ -7,8 +7,7 @@
define i1 @and(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
; CHECK-LABEL: and:
; CHECK: // %bb.0:
-; CHECK-NEXT: and p1.b, p0/z, p1.b, p2.b
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: ands p0.b, p0/z, p1.b, p2.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
@@ -19,8 +18,7 @@ define i1 @and(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1>
define i1 @bic(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
; CHECK-LABEL: bic:
; CHECK: // %bb.0:
-; CHECK-NEXT: bic p1.b, p0/z, p1.b, p2.b
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: bics p0.b, p0/z, p1.b, p2.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.bic.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
@@ -31,8 +29,7 @@ define i1 @bic(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1>
define i1 @eor(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
; CHECK-LABEL: eor:
; CHECK: // %bb.0:
-; CHECK-NEXT: eor p1.b, p0/z, p1.b, p2.b
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: eors p0.b, p0/z, p1.b, p2.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.eor.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
@@ -43,8 +40,7 @@ define i1 @eor(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1>
define i1 @nand(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
; CHECK-LABEL: nand:
; CHECK: // %bb.0:
-; CHECK-NEXT: nand p1.b, p0/z, p1.b, p2.b
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: nands p0.b, p0/z, p1.b, p2.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.nand.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
@@ -55,8 +51,7 @@ define i1 @nand(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1
define i1 @nor(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
; CHECK-LABEL: nor:
; CHECK: // %bb.0:
-; CHECK-NEXT: nor p1.b, p0/z, p1.b, p2.b
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: nors p0.b, p0/z, p1.b, p2.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.nor.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
@@ -67,8 +62,7 @@ define i1 @nor(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1>
define i1 @orn(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
; CHECK-LABEL: orn:
; CHECK: // %bb.0:
-; CHECK-NEXT: orn p1.b, p0/z, p1.b, p2.b
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: orns p0.b, p0/z, p1.b, p2.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.orn.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
@@ -79,8 +73,7 @@ define i1 @orn(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1>
define i1 @orr(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
; CHECK-LABEL: orr:
; CHECK: // %bb.0:
-; CHECK-NEXT: orr p1.b, p0/z, p1.b, p2.b
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: orrs p0.b, p0/z, p1.b, p2.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.orr.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-ptrue.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-ptrue.ll
index f091555c0e73..be879fd61bcf 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-ptrue.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-ptrue.ll
@@ -4,8 +4,7 @@
define i1 @ptrue() {
; CHECK-LABEL: ptrue:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.b, pow2
-; CHECK-NEXT: ptest p0, p0.b
+; CHECK-NEXT: ptrues p0.b, pow2
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 0)
diff --git a/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll b/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll
index cf91ae9c5053..922320e84f83 100644
--- a/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll
@@ -8,8 +8,7 @@ define i1 @andv_nxv32i1(<vscale x 32 x i1> %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
-; CHECK-NEXT: not p0.b, p2/z, p0.b
-; CHECK-NEXT: ptest p2, p0.b
+; CHECK-NEXT: nots p0.b, p2/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.and.nxv32i1(<vscale x 32 x i1> %a)
@@ -29,8 +28,7 @@ define i1 @andv_nxv64i1(<vscale x 64 x i1> %a) {
; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: ptrue p4.b
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
-; CHECK-NEXT: not p0.b, p4/z, p0.b
-; CHECK-NEXT: ptest p4, p0.b
+; CHECK-NEXT: nots p0.b, p4/z, p0.b
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: addvl sp, sp, #1
@@ -74,8 +72,7 @@ define i1 @smaxv_nxv32i1(<vscale x 32 x i1> %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
-; CHECK-NEXT: not p0.b, p2/z, p0.b
-; CHECK-NEXT: ptest p2, p0.b
+; CHECK-NEXT: nots p0.b, p2/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.smax.nxv32i1(<vscale x 32 x i1> %a)
@@ -115,8 +112,7 @@ define i1 @uminv_nxv32i1(<vscale x 32 x i1> %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
-; CHECK-NEXT: not p0.b, p2/z, p0.b
-; CHECK-NEXT: ptest p2, p0.b
+; CHECK-NEXT: nots p0.b, p2/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.umin.nxv32i1(<vscale x 32 x i1> %a)
diff --git a/llvm/test/CodeGen/AArch64/sve-vecreduce-fold.ll b/llvm/test/CodeGen/AArch64/sve-vecreduce-fold.ll
index f46c5fa4e2fa..0bdaefdfc2a3 100644
--- a/llvm/test/CodeGen/AArch64/sve-vecreduce-fold.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vecreduce-fold.ll
@@ -53,8 +53,7 @@ define i1 @reduce_and_insert_subvec_into_ones(<vscale x 4 x i1> %in) {
; CHECK-LABEL: reduce_and_insert_subvec_into_ones:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: not p0.b, p1/z, p0.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: nots p0.b, p1/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%allones.ins = insertelement <vscale x 16 x i1> poison, i1 1, i32 0
@@ -68,8 +67,7 @@ define i1 @reduce_and_insert_subvec_into_poison(<vscale x 4 x i1> %in) {
; CHECK-LABEL: reduce_and_insert_subvec_into_poison:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: not p0.b, p1/z, p0.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: nots p0.b, p1/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv4i1(<vscale x 16 x i1> poison, <vscale x 4 x i1> %in, i64 0)
@@ -86,8 +84,7 @@ define i1 @reduce_and_insert_subvec_into_var(<vscale x 4 x i1> %in, <vscale x 16
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: uzp1 p0.h, p0.h, p3.h
; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b
-; CHECK-NEXT: not p0.b, p2/z, p0.b
-; CHECK-NEXT: ptest p2, p0.b
+; CHECK-NEXT: nots p0.b, p2/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv4i1(<vscale x 16 x i1> %vec, <vscale x 4 x i1> %in, i64 0)
More information about the llvm-commits
mailing list