[llvm] c165b05 - [TargetLowering] Teach DemandedBits about VSCALE

Wed Dec 14 07:52:07 PST 2022

Author: Benjamin Maxwell
Date: 2022-12-14T15:49:08Z
New Revision: c165b0553a96394b9bbf3984782703cdae99821d

URL: https://github.com/llvm/llvm-project/commit/c165b0553a96394b9bbf3984782703cdae99821d
DIFF: https://github.com/llvm/llvm-project/commit/c165b0553a96394b9bbf3984782703cdae99821d.diff

LOG: [TargetLowering] Teach DemandedBits about VSCALE

This allows DemandedBits to see the result of VSCALE will be at most
VScaleMax * some compile-time constant. This relies on the vscale_range()
attribute being present on the function, with a max set. (This is done by
default when clang is targeting AArch64+SVE).

Using this various redundant operations (zexts, sexts, ands, ors, etc)
can be eliminated.

Differential Revision: https://reviews.llvm.org/D138508

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 202178e8166a1..014c1c8ae7fa4 100644

--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1125,6 +1125,24 @@ bool TargetLowering::SimplifyDemandedBits(
 
   KnownBits Known2;
   switch (Op.getOpcode()) {
+  case ISD::VSCALE: {
+    Function const &F = TLO.DAG.getMachineFunction().getFunction();
+    Attribute const &Attr = F.getFnAttribute(Attribute::VScaleRange);
+    if (!Attr.isValid())
+      return false;
+    std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax();
+    if (!MaxVScale.has_value())
+      return false;
+    int64_t VScaleResultUpperbound =
+        *MaxVScale * Op.getConstantOperandAPInt(0).getSExtValue();
+    bool Negative = VScaleResultUpperbound < 0;
+    if (Negative)
+      VScaleResultUpperbound = ~VScaleResultUpperbound;
+    unsigned RequiredBits = Log2_64(VScaleResultUpperbound) + 1;
+    if (RequiredBits < BitWidth)
+      (Negative ? Known.One : Known.Zero).setHighBits(BitWidth - RequiredBits);
+    return false;
+  }
   case ISD::SCALAR_TO_VECTOR: {
     if (VT.isScalableVector())
       return false;

diff  --git a/llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll b/llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll
index 895f5da9a1e13..dbdab799c8352 100644
--- a/llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll
+++ b/llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll
@@ -14,9 +14,8 @@ define i32 @vscale_and_elimination() vscale_range(1,16) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rdvl x8, #1
 ; CHECK-NEXT:    lsr x8, x8, #4
-; CHECK-NEXT:    and w9, w8, #0x1f
-; CHECK-NEXT:    and w8, w8, #0xfffffffc
-; CHECK-NEXT:    add w0, w9, w8
+; CHECK-NEXT:    and w9, w8, #0x1c
+; CHECK-NEXT:    add w0, w8, w9
 ; CHECK-NEXT:    ret
   %vscale = call i32 @llvm.vscale.i32()
   %and_redundant = and i32 %vscale, 31
@@ -85,8 +84,7 @@ define i64 @vscale_trunc_zext() vscale_range(1,16) {
 ; CHECK-LABEL: vscale_trunc_zext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rdvl x8, #1
-; CHECK-NEXT:    lsr x8, x8, #4
-; CHECK-NEXT:    and x0, x8, #0xffffffff
+; CHECK-NEXT:    lsr x0, x8, #4
 ; CHECK-NEXT:    ret
   %vscale = call i32 @llvm.vscale.i32()
   %zext = zext i32 %vscale to i64
@@ -97,8 +95,7 @@ define i64 @vscale_trunc_sext() vscale_range(1,16) {
 ; CHECK-LABEL: vscale_trunc_sext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rdvl x8, #1
-; CHECK-NEXT:    lsr x8, x8, #4
-; CHECK-NEXT:    sxtw x0, w8
+; CHECK-NEXT:    lsr x0, x8, #4
 ; CHECK-NEXT:    ret
   %vscale = call i32 @llvm.vscale.i32()
   %sext = sext i32 %vscale to i64
@@ -200,9 +197,8 @@ define i32 @vscale_with_multiplier() vscale_range(1,16) {
 ; CHECK-NEXT:    mov w9, #5
 ; CHECK-NEXT:    lsr x8, x8, #4
 ; CHECK-NEXT:    mul x8, x8, x9
-; CHECK-NEXT:    and w9, w8, #0x7f
-; CHECK-NEXT:    and w8, w8, #0x3f
-; CHECK-NEXT:    add w0, w9, w8
+; CHECK-NEXT:    and w9, w8, #0x3f
+; CHECK-NEXT:    add w0, w8, w9
 ; CHECK-NEXT:    ret
   %vscale = call i32 @llvm.vscale.i32()
   %mul = mul i32 %vscale, 5
@@ -219,9 +215,8 @@ define i32 @vscale_with_negative_multiplier() vscale_range(1,16) {
 ; CHECK-NEXT:    mov x9, #-5
 ; CHECK-NEXT:    lsr x8, x8, #4
 ; CHECK-NEXT:    mul x8, x8, x9
-; CHECK-NEXT:    orr w9, w8, #0xffffff80
-; CHECK-NEXT:    and w8, w8, #0xffffffc0
-; CHECK-NEXT:    add w0, w9, w8
+; CHECK-NEXT:    and w9, w8, #0xffffffc0
+; CHECK-NEXT:    add w0, w8, w9
 ; CHECK-NEXT:    ret
   %vscale = call i32 @llvm.vscale.i32()
   %mul = mul i32 %vscale, -5
@@ -231,6 +226,22 @@ define i32 @vscale_with_negative_multiplier() vscale_range(1,16) {
   ret i32 %result
 }
 
+define i32 @pow2_vscale_with_negative_multiplier() vscale_range(1,16) {
+; CHECK-LABEL: pow2_vscale_with_negative_multiplier:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cntd x8
+; CHECK-NEXT:    neg x8, x8
+; CHECK-NEXT:    orr w9, w8, #0xfffffff0
+; CHECK-NEXT:    add w0, w8, w9
+; CHECK-NEXT:    ret
+  %vscale = call i32 @llvm.vscale.i32()
+  %mul = mul i32 %vscale, -2
+  %or_redundant = or i32 %mul, 4294967264
+  %or_required = or i32 %mul, 4294967280
+  %result = add i32 %or_redundant, %or_required
+  ret i32 %result
+}
+
 declare i32 @llvm.vscale.i32()
 declare i64 @llvm.aarch64.sve.cntb(i32 %pattern)
 declare i64 @llvm.aarch64.sve.cnth(i32 %pattern)