[llvm] 06c6b78 - [SelectionDAG][AArch64] Constant fold in SelectionDAG::getVScale if VScaleMin==VScaleMax.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 2 12:08:44 PST 2023
Author: Craig Topper
Date: 2023-03-02T12:02:38-08:00
New Revision: 06c6b787b28e5d11a7fc16d2e623482b38b90838
URL: https://github.com/llvm/llvm-project/commit/06c6b787b28e5d11a7fc16d2e623482b38b90838
DIFF: https://github.com/llvm/llvm-project/commit/06c6b787b28e5d11a7fc16d2e623482b38b90838.diff
LOG: [SelectionDAG][AArch64] Constant fold in SelectionDAG::getVScale if VScaleMin==VScaleMax.
Reviewed By: paulwalker-arm
Differential Revision: https://reviews.llvm.org/D145113
Added:
Modified:
llvm/include/llvm/CodeGen/SelectionDAG.h
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll
llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
llvm/test/CodeGen/AArch64/sve-insert-vector.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 1377feaa270d9..3f15eb6e05013 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1059,12 +1059,8 @@ class SelectionDAG {
}
/// Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
- SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm) {
- assert(MulImm.getSignificantBits() <= VT.getSizeInBits() &&
- "Immediate does not fit VT");
- return getNode(ISD::VSCALE, DL, VT,
- getConstant(MulImm.sextOrTrunc(VT.getSizeInBits()), DL, VT));
- }
+ SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm,
+ bool ConstantFold = true);
/// Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
SDValue getGLOBAL_OFFSET_TABLE(EVT VT) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 0ada8148bfcf4..90911013a73d9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1935,6 +1935,27 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
return SDValue(CondCodeNodes[Cond], 0);
}
+SDValue SelectionDAG::getVScale(const SDLoc &DL, EVT VT, APInt MulImm,
+ bool ConstantFold) {
+ assert(MulImm.getSignificantBits() <= VT.getSizeInBits() &&
+ "Immediate does not fit VT");
+
+ MulImm = MulImm.sextOrTrunc(VT.getSizeInBits());
+
+ if (ConstantFold) {
+ const MachineFunction &MF = getMachineFunction();
+ auto Attr = MF.getFunction().getFnAttribute(Attribute::VScaleRange);
+ if (Attr.isValid()) {
+ unsigned VScaleMin = Attr.getVScaleRangeMin();
+ if (std::optional<unsigned> VScaleMax = Attr.getVScaleRangeMax())
+ if (*VScaleMax == VScaleMin)
+ return getConstant(MulImm * VScaleMin, DL, VT);
+ }
+ }
+
+ return getNode(ISD::VSCALE, DL, VT, getConstant(MulImm, DL, VT));
+}
+
SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT) {
APInt One(ResVT.getScalarSizeInBits(), 1);
return getStepVector(DL, ResVT, One);
diff --git a/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll b/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll
index 74a2fd9236989..d731b27e18ce3 100644
--- a/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll
@@ -209,18 +209,11 @@ define <vscale x 2 x i32> @vec_scalable_subvec_fixed_idx_nonzero_large_i32(<vsca
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cntd x8
-; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
-; CHECK-NEXT: subs x8, x8, #8
; CHECK-NEXT: ptrue p1.d, vl8
-; CHECK-NEXT: csel x8, xzr, x8, lo
-; CHECK-NEXT: mov w9, #8
-; CHECK-NEXT: cmp x8, #8
-; CHECK-NEXT: csel x8, x8, x9, lo
-; CHECK-NEXT: mov x9, sp
+; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: ld1w { z0.d }, p1/z, [x1]
-; CHECK-NEXT: st1d { z0.d }, p0, [x9, x8, lsl #3]
+; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
index 96057262a624e..b220cf7315c77 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
@@ -84,16 +84,11 @@ define <4 x i32> @extract_v4i32_nxv2i32_idx4(<vscale x 2 x i32> %vec) nounwind #
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
-; CHECK-NEXT: cntd x8
-; CHECK-NEXT: mov w9, #4
-; CHECK-NEXT: subs x8, x8, #4
+; CHECK-NEXT: mov x8, #4
+; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: csel x8, xzr, x8, lo
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
-; CHECK-NEXT: cmp x8, #4
; CHECK-NEXT: ptrue p0.d, vl4
-; CHECK-NEXT: csel x8, x8, x9, lo
-; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9, x8, lsl #3]
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
@@ -154,16 +149,11 @@ define <8 x i16> @extract_v8i16_nxv4i16_idx8(<vscale x 4 x i16> %vec) nounwind #
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
-; CHECK-NEXT: cntw x8
-; CHECK-NEXT: mov w9, #8
-; CHECK-NEXT: subs x8, x8, #8
+; CHECK-NEXT: mov x8, #8
+; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: csel x8, xzr, x8, lo
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
-; CHECK-NEXT: cmp x8, #8
; CHECK-NEXT: ptrue p0.s, vl8
-; CHECK-NEXT: csel x8, x8, x9, lo
-; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x8, lsl #2]
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
@@ -192,16 +182,11 @@ define <8 x i16> @extract_v8i16_nxv2i16_idx8(<vscale x 2 x i16> %vec) nounwind #
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
-; CHECK-NEXT: cntd x8
-; CHECK-NEXT: mov w9, #8
-; CHECK-NEXT: subs x8, x8, #8
+; CHECK-NEXT: mov x8, #8
+; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: csel x8, xzr, x8, lo
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
-; CHECK-NEXT: cmp x8, #8
; CHECK-NEXT: ptrue p0.d, vl8
-; CHECK-NEXT: csel x8, x8, x9, lo
-; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9, x8, lsl #3]
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
@@ -262,16 +247,11 @@ define <16 x i8> @extract_v16i8_nxv8i8_idx16(<vscale x 8 x i8> %vec) nounwind #1
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
-; CHECK-NEXT: cnth x8
-; CHECK-NEXT: mov w9, #16
-; CHECK-NEXT: subs x8, x8, #16
+; CHECK-NEXT: mov x8, #16
+; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: csel x8, xzr, x8, lo
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
-; CHECK-NEXT: cmp x8, #16
; CHECK-NEXT: ptrue p0.h, vl16
-; CHECK-NEXT: csel x8, x8, x9, lo
-; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x9, x8, lsl #1]
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
@@ -300,16 +280,11 @@ define <16 x i8> @extract_v16i8_nxv4i8_idx16(<vscale x 4 x i8> %vec) nounwind #1
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
-; CHECK-NEXT: cntw x8
-; CHECK-NEXT: mov w9, #16
-; CHECK-NEXT: subs x8, x8, #16
+; CHECK-NEXT: mov x8, #16
+; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: csel x8, xzr, x8, lo
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
-; CHECK-NEXT: cmp x8, #16
; CHECK-NEXT: ptrue p0.s, vl16
-; CHECK-NEXT: csel x8, x8, x9, lo
-; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x8, lsl #2]
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
@@ -340,16 +315,9 @@ define <16 x i8> @extract_v16i8_nxv2i8_idx16(<vscale x 2 x i8> %vec) nounwind #1
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
-; CHECK-NEXT: cntd x8
-; CHECK-NEXT: mov w9, #16
-; CHECK-NEXT: subs x8, x8, #16
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: csel x8, xzr, x8, lo
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
-; CHECK-NEXT: cmp x8, #16
-; CHECK-NEXT: csel x8, x8, x9, lo
-; CHECK-NEXT: mov x9, sp
-; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9, x8, lsl #3]
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
@@ -468,16 +436,9 @@ define <2 x i64> @extract_fixed_v2i64_nxv2i64(<vscale x 2 x i64> %vec) nounwind
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
-; CHECK-NEXT: cntd x8
-; CHECK-NEXT: mov w9, #2
-; CHECK-NEXT: sub x8, x8, #2
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmp x8, #2
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
-; CHECK-NEXT: csel x8, x8, x9, lo
-; CHECK-NEXT: mov x9, sp
-; CHECK-NEXT: lsl x8, x8, #3
-; CHECK-NEXT: ldr q0, [x9, x8]
+; CHECK-NEXT: ldr q0, [sp, #16]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
@@ -490,16 +451,9 @@ define <4 x i64> @extract_fixed_v4i64_nxv2i64(<vscale x 2 x i64> %vec) nounwind
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
-; CHECK-NEXT: cntd x9
-; CHECK-NEXT: mov w10, #4
-; CHECK-NEXT: subs x9, x9, #4
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: csel x9, xzr, x9, lo
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
-; CHECK-NEXT: cmp x9, #4
-; CHECK-NEXT: csel x9, x9, x10, lo
-; CHECK-NEXT: mov x10, sp
-; CHECK-NEXT: ld1d { z0.d }, p0/z, [x10, x9, lsl #3]
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
; CHECK-NEXT: st1d { z0.d }, p0, [x8]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
index 27da8659f4fb2..6cb491050dc25 100644
--- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
@@ -374,16 +374,9 @@ define <vscale x 2 x i64> @insert_fixed_v2i64_nxv2i64(<vscale x 2 x i64> %vec, <
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
-; CHECK-NEXT: cntd x8
-; CHECK-NEXT: mov w9, #2
-; CHECK-NEXT: sub x8, x8, #2
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmp x8, #2
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
-; CHECK-NEXT: csel x8, x8, x9, lo
-; CHECK-NEXT: mov x9, sp
-; CHECK-NEXT: lsl x8, x8, #3
-; CHECK-NEXT: str q1, [x9, x8]
+; CHECK-NEXT: str q1, [sp, #16]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -397,17 +390,10 @@ define <vscale x 2 x i64> @insert_fixed_v4i64_nxv2i64(<vscale x 2 x i64> %vec, <
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
-; CHECK-NEXT: cntd x8
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: subs x8, x8, #4
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0]
-; CHECK-NEXT: csel x8, xzr, x8, lo
-; CHECK-NEXT: mov w9, #4
-; CHECK-NEXT: cmp x8, #4
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
-; CHECK-NEXT: csel x8, x8, x9, lo
-; CHECK-NEXT: mov x9, sp
-; CHECK-NEXT: st1d { z1.d }, p0, [x9, x8, lsl #3]
+; CHECK-NEXT: st1d { z1.d }, p0, [sp]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
More information about the llvm-commits
mailing list