[llvm] c165b05 - [TargetLowering] Teach DemandedBits about VSCALE
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 14 07:52:07 PST 2022
Author: Benjamin Maxwell
Date: 2022-12-14T15:49:08Z
New Revision: c165b0553a96394b9bbf3984782703cdae99821d
URL: https://github.com/llvm/llvm-project/commit/c165b0553a96394b9bbf3984782703cdae99821d
DIFF: https://github.com/llvm/llvm-project/commit/c165b0553a96394b9bbf3984782703cdae99821d.diff
LOG: [TargetLowering] Teach DemandedBits about VSCALE
This allows DemandedBits to see the result of VSCALE will be at most
VScaleMax * some compile-time constant. This relies on the vscale_range()
attribute being present on the function, with a max set. (This is done by
default when clang is targeting AArch64+SVE).
Using this various redundant operations (zexts, sexts, ands, ors, etc)
can be eliminated.
Differential Revision: https://reviews.llvm.org/D138508
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 202178e8166a1..014c1c8ae7fa4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1125,6 +1125,24 @@ bool TargetLowering::SimplifyDemandedBits(
KnownBits Known2;
switch (Op.getOpcode()) {
+ case ISD::VSCALE: {
+ Function const &F = TLO.DAG.getMachineFunction().getFunction();
+ Attribute const &Attr = F.getFnAttribute(Attribute::VScaleRange);
+ if (!Attr.isValid())
+ return false;
+ std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax();
+ if (!MaxVScale.has_value())
+ return false;
+ int64_t VScaleResultUpperbound =
+ *MaxVScale * Op.getConstantOperandAPInt(0).getSExtValue();
+ bool Negative = VScaleResultUpperbound < 0;
+ if (Negative)
+ VScaleResultUpperbound = ~VScaleResultUpperbound;
+ unsigned RequiredBits = Log2_64(VScaleResultUpperbound) + 1;
+ if (RequiredBits < BitWidth)
+ (Negative ? Known.One : Known.Zero).setHighBits(BitWidth - RequiredBits);
+ return false;
+ }
case ISD::SCALAR_TO_VECTOR: {
if (VT.isScalableVector())
return false;
diff --git a/llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll b/llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll
index 895f5da9a1e13..dbdab799c8352 100644
--- a/llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll
+++ b/llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll
@@ -14,9 +14,8 @@ define i32 @vscale_and_elimination() vscale_range(1,16) {
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: lsr x8, x8, #4
-; CHECK-NEXT: and w9, w8, #0x1f
-; CHECK-NEXT: and w8, w8, #0xfffffffc
-; CHECK-NEXT: add w0, w9, w8
+; CHECK-NEXT: and w9, w8, #0x1c
+; CHECK-NEXT: add w0, w8, w9
; CHECK-NEXT: ret
%vscale = call i32 @llvm.vscale.i32()
%and_redundant = and i32 %vscale, 31
@@ -85,8 +84,7 @@ define i64 @vscale_trunc_zext() vscale_range(1,16) {
; CHECK-LABEL: vscale_trunc_zext:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
-; CHECK-NEXT: lsr x8, x8, #4
-; CHECK-NEXT: and x0, x8, #0xffffffff
+; CHECK-NEXT: lsr x0, x8, #4
; CHECK-NEXT: ret
%vscale = call i32 @llvm.vscale.i32()
%zext = zext i32 %vscale to i64
@@ -97,8 +95,7 @@ define i64 @vscale_trunc_sext() vscale_range(1,16) {
; CHECK-LABEL: vscale_trunc_sext:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
-; CHECK-NEXT: lsr x8, x8, #4
-; CHECK-NEXT: sxtw x0, w8
+; CHECK-NEXT: lsr x0, x8, #4
; CHECK-NEXT: ret
%vscale = call i32 @llvm.vscale.i32()
%sext = sext i32 %vscale to i64
@@ -200,9 +197,8 @@ define i32 @vscale_with_multiplier() vscale_range(1,16) {
; CHECK-NEXT: mov w9, #5
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
-; CHECK-NEXT: and w9, w8, #0x7f
-; CHECK-NEXT: and w8, w8, #0x3f
-; CHECK-NEXT: add w0, w9, w8
+; CHECK-NEXT: and w9, w8, #0x3f
+; CHECK-NEXT: add w0, w8, w9
; CHECK-NEXT: ret
%vscale = call i32 @llvm.vscale.i32()
%mul = mul i32 %vscale, 5
@@ -219,9 +215,8 @@ define i32 @vscale_with_negative_multiplier() vscale_range(1,16) {
; CHECK-NEXT: mov x9, #-5
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
-; CHECK-NEXT: orr w9, w8, #0xffffff80
-; CHECK-NEXT: and w8, w8, #0xffffffc0
-; CHECK-NEXT: add w0, w9, w8
+; CHECK-NEXT: and w9, w8, #0xffffffc0
+; CHECK-NEXT: add w0, w8, w9
; CHECK-NEXT: ret
%vscale = call i32 @llvm.vscale.i32()
%mul = mul i32 %vscale, -5
@@ -231,6 +226,22 @@ define i32 @vscale_with_negative_multiplier() vscale_range(1,16) {
ret i32 %result
}
+define i32 @pow2_vscale_with_negative_multiplier() vscale_range(1,16) {
+; CHECK-LABEL: pow2_vscale_with_negative_multiplier:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntd x8
+; CHECK-NEXT: neg x8, x8
+; CHECK-NEXT: orr w9, w8, #0xfffffff0
+; CHECK-NEXT: add w0, w8, w9
+; CHECK-NEXT: ret
+ %vscale = call i32 @llvm.vscale.i32()
+ %mul = mul i32 %vscale, -2
+ %or_redundant = or i32 %mul, 4294967264
+ %or_required = or i32 %mul, 4294967280
+ %result = add i32 %or_redundant, %or_required
+ ret i32 %result
+}
+
declare i32 @llvm.vscale.i32()
declare i64 @llvm.aarch64.sve.cntb(i32 %pattern)
declare i64 @llvm.aarch64.sve.cnth(i32 %pattern)
More information about the llvm-commits
mailing list