[llvm] 0d88f66 - GlobalISel: ComputeNumSignBits from load range metadata

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 1 06:27:02 PDT 2024


Author: Matt Arsenault
Date: 2024-07-01T15:26:50+02:00
New Revision: 0d88f662ff4db7e78a6c48db79ef62c5228d5f2a

URL: https://github.com/llvm/llvm-project/commit/0d88f662ff4db7e78a6c48db79ef62c5228d5f2a
DIFF: https://github.com/llvm/llvm-project/commit/0d88f662ff4db7e78a6c48db79ef62c5228d5f2a.diff

LOG: GlobalISel: ComputeNumSignBits from load range metadata

We're missing SimplifyDemandedBits styles of optimizations,
so one case differs from the DAG from not trimming the constant.
The other case is an optimization we get that the DAG doesn't do to
split the 64-bit shift.

https://reviews.llvm.org/D138082

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
    llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
    llvm/test/CodeGen/AMDGPU/load-range-metadata-sign-bits.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 995031f7c00be..d9f6f6540bdc8 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -184,6 +184,11 @@ class GAnyLoad : public GLoadStore {
   /// Get the definition register of the loaded value.
   Register getDstReg() const { return getOperand(0).getReg(); }
 
+  /// Returns the Ranges that describes the dereference.
+  const MDNode *getRanges() const {
+    return getMMO().getRanges();
+  }
+
   static bool classof(const MachineInstr *MI) {
     switch (MI->getOpcode()) {
     case TargetOpcode::G_LOAD:

diff  --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
index 4d22b4c2ed2a8..91fc9d764b3b8 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -13,11 +13,13 @@
 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
 #include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Target/TargetMachine.h"
 
@@ -624,6 +626,33 @@ unsigned GISelKnownBits::computeNumSignBitsMin(Register Src0, Register Src1,
   return std::min(computeNumSignBits(Src0, DemandedElts, Depth), Src1SignBits);
 }
 
+/// Compute the known number of sign bits with attached range metadata in the
+/// memory operand. If this is an extending load, accounts for the behavior of
+/// the high bits.
+static unsigned computeNumSignBitsFromRangeMetadata(const GAnyLoad *Ld,
+                                                    unsigned TyBits) {
+  const MDNode *Ranges = Ld->getRanges();
+  if (!Ranges)
+    return 1;
+
+  ConstantRange CR = getConstantRangeFromMetadata(*Ranges);
+  if (TyBits > CR.getBitWidth()) {
+    switch (Ld->getOpcode()) {
+    case TargetOpcode::G_SEXTLOAD:
+      CR = CR.signExtend(TyBits);
+      break;
+    case TargetOpcode::G_ZEXTLOAD:
+      CR = CR.zeroExtend(TyBits);
+      break;
+    default:
+      break;
+    }
+  }
+
+  return std::min(CR.getSignedMin().getNumSignBits(),
+                  CR.getSignedMax().getNumSignBits());
+}
+
 unsigned GISelKnownBits::computeNumSignBits(Register R,
                                             const APInt &DemandedElts,
                                             unsigned Depth) {
@@ -675,20 +704,39 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
     unsigned InRegBits = TyBits - SrcBits + 1;
     return std::max(computeNumSignBits(Src, DemandedElts, Depth + 1), InRegBits);
   }
+  case TargetOpcode::G_LOAD: {
+    GLoad *Ld = cast<GLoad>(&MI);
+    if (DemandedElts != 1 || !getDataLayout().isLittleEndian())
+      break;
+
+    return computeNumSignBitsFromRangeMetadata(Ld, TyBits);
+  }
   case TargetOpcode::G_SEXTLOAD: {
+    GSExtLoad *Ld = cast<GSExtLoad>(&MI);
+
     // FIXME: We need an in-memory type representation.
     if (DstTy.isVector())
       return 1;
 
+    unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits);
+    if (NumBits != 1)
+      return NumBits;
+
     // e.g. i16->i32 = '17' bits known.
     const MachineMemOperand *MMO = *MI.memoperands_begin();
     return TyBits - MMO->getSizeInBits().getValue() + 1;
   }
   case TargetOpcode::G_ZEXTLOAD: {
+    GZExtLoad *Ld = cast<GZExtLoad>(&MI);
+
     // FIXME: We need an in-memory type representation.
     if (DstTy.isVector())
       return 1;
 
+    unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits);
+    if (NumBits != 1)
+      return NumBits;
+
     // e.g. i16->i32 = '16' bits known.
     const MachineMemOperand *MMO = *MI.memoperands_begin();
     return TyBits - MMO->getSizeInBits().getValue();

diff  --git a/llvm/test/CodeGen/AMDGPU/load-range-metadata-sign-bits.ll b/llvm/test/CodeGen/AMDGPU/load-range-metadata-sign-bits.ll
index a4cf4d6ed2c8e..5fc1a87e71a1a 100644
--- a/llvm/test/CodeGen/AMDGPU/load-range-metadata-sign-bits.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-range-metadata-sign-bits.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GISEL %s
 
 define i32 @range_metadata_sext_i8_signed_range_i32(ptr addrspace(1) %ptr) {
 ; GCN-LABEL: range_metadata_sext_i8_signed_range_i32:
@@ -43,13 +44,21 @@ define i32 @range_metadata_sext_lower_range_limited_i32(ptr addrspace(1) %ptr) {
 }
 
 define i32 @range_metadata_sext_i8_neg_neg_range_i32(ptr addrspace(1) %ptr) {
-; GCN-LABEL: range_metadata_sext_i8_neg_neg_range_i32:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    global_load_dword v0, v[0:1], off glc
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_and_b32_e32 v0, 63, v0
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-LABEL: range_metadata_sext_i8_neg_neg_range_i32:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    global_load_dword v0, v[0:1], off glc
+; SDAG-NEXT:    s_waitcnt vmcnt(0)
+; SDAG-NEXT:    v_and_b32_e32 v0, 63, v0
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: range_metadata_sext_i8_neg_neg_range_i32:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    global_load_dword v0, v[0:1], off glc
+; GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GISEL-NEXT:    v_and_b32_e32 v0, 0x7f, v0
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
   %val = load volatile i32, ptr addrspace(1) %ptr, align 4, !range !3, !noundef !{}
   %shl = shl i32 %val, 25
   %ashr = ashr i32 %shl, 25
@@ -98,14 +107,23 @@ define i32 @range_metadata_i32_neg1_to_1(ptr addrspace(1) %ptr) {
 }
 
 define i64 @range_metadata_sext_i8_signed_range_i64(ptr addrspace(1) %ptr) {
-; GCN-LABEL: range_metadata_sext_i8_signed_range_i64:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off glc
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_lshlrev_b32_e32 v1, 23, v0
-; GCN-NEXT:    v_ashrrev_i64 v[0:1], 55, v[0:1]
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-LABEL: range_metadata_sext_i8_signed_range_i64:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off glc
+; SDAG-NEXT:    s_waitcnt vmcnt(0)
+; SDAG-NEXT:    v_lshlrev_b32_e32 v1, 23, v0
+; SDAG-NEXT:    v_ashrrev_i64 v[0:1], 55, v[0:1]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: range_metadata_sext_i8_signed_range_i64:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off glc
+; GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 9
+; GISEL-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
   %val = load volatile i64, ptr addrspace(1) %ptr, align 4, !range !7, !noundef !{}
   %shl = shl i64 %val, 55
   %ashr = ashr i64 %shl, 55


        


More information about the llvm-commits mailing list