[llvm] 44e2c6a - [AArch64][SVE] Use PTRUE instruction instead of WHILELO if the range is appropriate for predicator constant.
Dinar Temirbulatov via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 18 08:21:37 PST 2022
Author: Dinar Temirbulatov
Date: 2022-11-18T16:21:10Z
New Revision: 44e2c6a428cca8a50c620c589e77f46f768eb0d7
URL: https://github.com/llvm/llvm-project/commit/44e2c6a428cca8a50c620c589e77f46f768eb0d7
DIFF: https://github.com/llvm/llvm-project/commit/44e2c6a428cca8a50c620c589e77f46f768eb0d7.diff
LOG: [AArch64][SVE] Use PTRUE instruction instead of WHILELO if the range is appropriate for predicator constant.
While get_active_lane_mask lowering it uses WHILELO instruction,
but forconstant range suitable for PTRUE then we could issue PTRUE instruction
instead.
Differential Revision: https://reviews.llvm.org/D137547
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/active_lane_mask.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 8b3487dab38a8..bdaf22a47f7e1 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4749,6 +4749,22 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1))));
return SDValue();
}
+ case Intrinsic::aarch64_sve_whilelo: {
+ if (isa<ConstantSDNode>(Op.getOperand(1)) &&
+ isa<ConstantSDNode>(Op.getOperand(2))) {
+ unsigned MinSVEVectorSize =
+ std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
+ unsigned ElementSize = 128 / Op.getValueType().getVectorMinNumElements();
+ unsigned NumActiveElems =
+ Op.getConstantOperandVal(2) - Op.getConstantOperandVal(1);
+ Optional<unsigned> PredPattern =
+ getSVEPredPatternFromNumElements(NumActiveElems);
+ if ((PredPattern != None) &&
+ NumActiveElems <= (MinSVEVectorSize / ElementSize))
+ return getPTrue(DAG, dl, Op.getValueType(), *PredPattern);
+ }
+ return SDValue();
+ }
case Intrinsic::aarch64_sve_sunpkhi:
return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),
Op.getOperand(1));
diff --git a/llvm/test/CodeGen/AArch64/active_lane_mask.ll b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
index 1ddd24e6dd386..211361da18010 100644
--- a/llvm/test/CodeGen/AArch64/active_lane_mask.ll
+++ b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
@@ -475,6 +475,58 @@ define <2 x i1> @lane_mask_v2i1_i8(i8 %index, i8 %TC) {
ret <2 x i1> %active.lane.mask
}
+define <vscale x 4 x i1> @lane_mask_nxv4i1_imm3() {
+; CHECK-LABEL: lane_mask_nxv4i1_imm3:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s, vl3
+; CHECK-NEXT: ret
+entry:
+ %active.lane.mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 3)
+ ret <vscale x 4 x i1> %active.lane.mask
+}
+
+define <vscale x 4 x i1> @lane_mask_nxv4i1_imm5() {
+; CHECK-LABEL: lane_mask_nxv4i1_imm5:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #5
+; CHECK-NEXT: whilelo p0.s, xzr, x8
+; CHECK-NEXT: ret
+entry:
+ %active.lane.mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 5)
+ ret <vscale x 4 x i1> %active.lane.mask
+}
+
+define <vscale x 4 x i1> @lane_mask_nxv4i1_imm4() {
+; CHECK-LABEL: lane_mask_nxv4i1_imm4:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s, vl4
+; CHECK-NEXT: ret
+entry:
+ %active.lane.mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 10, i64 14)
+ ret <vscale x 4 x i1> %active.lane.mask
+}
+
+define <vscale x 16 x i1> @lane_mask_nxv16i1_imm10() {
+; CHECK-LABEL: lane_mask_nxv16i1_imm10:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #10
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: ret
+entry:
+ %active.lane.mask = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 10)
+ ret <vscale x 16 x i1> %active.lane.mask
+}
+
+define <vscale x 16 x i1> @lane_mask_nxv16i1_imm256() vscale_range(16, 16) {
+; CHECK-LABEL: lane_mask_nxv16i1_imm256:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.b, vl256
+; CHECK-NEXT: ret
+entry:
+ %active.lane.mask = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 256)
+ ret <vscale x 16 x i1> %active.lane.mask
+}
+
declare <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i32(i32, i32)
declare <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32, i32)
More information about the llvm-commits
mailing list