[llvm] abc99ab - GlobalISel: Implement known bits for min/max

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 27 13:56:27 PDT 2020


Author: Matt Arsenault
Date: 2020-08-27T16:56:17-04:00
New Revision: abc99ab5725636c17fa9c9ced0269f92bf5398cb

URL: https://github.com/llvm/llvm-project/commit/abc99ab5725636c17fa9c9ced0269f92bf5398cb
DIFF: https://github.com/llvm/llvm-project/commit/abc99ab5725636c17fa9c9ced0269f92bf5398cb.diff

LOG: GlobalISel: Implement known bits for min/max

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h
    llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h b/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h
index f5662a27debe..f2c19f559a36 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h
@@ -34,6 +34,10 @@ class GISelKnownBits : public GISelChangeObserver {
   /// Cache maintained during a computeKnownBits request.
   SmallDenseMap<Register, KnownBits, 16> ComputeKnownBitsCache;
 
+  void computeKnownBitsMin(Register Src0, Register Src1, KnownBits &Known,
+                           const APInt &DemandedElts,
+                           unsigned Depth = 0);
+
 public:
   GISelKnownBits(MachineFunction &MF, unsigned MaxDepth = 6);
   virtual ~GISelKnownBits() = default;

diff  --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
index 25ca09e31e2b..2ffbde6a62c4 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -94,6 +94,25 @@ dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) {
          << "\n";
 }
 
+/// Compute known bits for the intersection of \p Src0 and \p Src1
+void GISelKnownBits::computeKnownBitsMin(Register Src0, Register Src1,
+                                         KnownBits &Known,
+                                         const APInt &DemandedElts,
+                                         unsigned Depth) {
+  // Test src1 first, since we canonicalize simpler expressions to the RHS.
+  computeKnownBitsImpl(Src1, Known, DemandedElts, Depth);
+
+  // If we don't know any bits, early out.
+  if (Known.isUnknown())
+    return;
+
+  KnownBits Known2;
+  computeKnownBitsImpl(Src0, Known2, DemandedElts, Depth);
+
+  // Only known if known in both the LHS and RHS.
+  Known &= Known2;
+}
+
 void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
                                           const APInt &DemandedElts,
                                           unsigned Depth) {
@@ -284,15 +303,16 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
     break;
   }
   case TargetOpcode::G_SELECT: {
-    computeKnownBitsImpl(MI.getOperand(3).getReg(), Known, DemandedElts,
-                         Depth + 1);
-    // If we don't know any bits, early out.
-    if (Known.isUnknown())
-      break;
-    computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
-                         Depth + 1);
-    // Only known if known in both the LHS and RHS.
-    Known &= Known2;
+    computeKnownBitsMin(MI.getOperand(2).getReg(), MI.getOperand(3).getReg(),
+                        Known, DemandedElts, Depth + 1);
+    break;
+  }
+  case TargetOpcode::G_SMIN:
+  case TargetOpcode::G_SMAX:
+  case TargetOpcode::G_UMIN:
+  case TargetOpcode::G_UMAX: {
+    computeKnownBitsMin(MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
+                        Known, DemandedElts, Depth + 1);
     break;
   }
   case TargetOpcode::G_FCMP:

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir
index 194d52e19b82..8623546b2cc5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir
@@ -20,3 +20,163 @@ body:             |
     $vgpr0 = COPY %and
 
 ...
+
+---
+name:  remove_and_255_smin_zextload
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; CHECK-LABEL: name: remove_and_255_smin_zextload
+    ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; CHECK: %ptr0:_(p1) = COPY $vgpr0_vgpr1
+    ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3
+    ; CHECK: %load0:_(s32) = G_ZEXTLOAD %ptr0(p1) :: (load 1, addrspace 1)
+    ; CHECK: %load1:_(s32) = G_ZEXTLOAD %ptr1(p1) :: (load 1, addrspace 1)
+    ; CHECK: %smin:_(s32) = G_SMIN %load0, %load1
+    ; CHECK: $vgpr0 = COPY %smin(s32)
+    %ptr0:_(p1) = COPY $vgpr0_vgpr1
+    %ptr1:_(p1) = COPY $vgpr2_vgpr3
+    %load0:_(s32) = G_ZEXTLOAD %ptr0 :: (load 1, addrspace 1, align 1)
+    %load1:_(s32) = G_ZEXTLOAD %ptr1 :: (load 1, addrspace 1, align 1)
+    %smin:_(s32) = G_SMIN %load0, %load1
+    %mask:_(s32) = G_CONSTANT i32 255
+    %and:_(s32) = G_AND %smin, %mask
+    $vgpr0 = COPY %and
+
+...
+
+---
+name:  remove_and_255_smax_zextload
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; CHECK-LABEL: name: remove_and_255_smax_zextload
+    ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; CHECK: %ptr0:_(p1) = COPY $vgpr0_vgpr1
+    ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3
+    ; CHECK: %load0:_(s32) = G_ZEXTLOAD %ptr0(p1) :: (load 1, addrspace 1)
+    ; CHECK: %load1:_(s32) = G_ZEXTLOAD %ptr1(p1) :: (load 1, addrspace 1)
+    ; CHECK: %smax:_(s32) = G_SMAX %load0, %load1
+    ; CHECK: $vgpr0 = COPY %smax(s32)
+    %ptr0:_(p1) = COPY $vgpr0_vgpr1
+    %ptr1:_(p1) = COPY $vgpr2_vgpr3
+    %load0:_(s32) = G_ZEXTLOAD %ptr0 :: (load 1, addrspace 1, align 1)
+    %load1:_(s32) = G_ZEXTLOAD %ptr1 :: (load 1, addrspace 1, align 1)
+    %smax:_(s32) = G_SMAX %load0, %load1
+    %mask:_(s32) = G_CONSTANT i32 255
+    %and:_(s32) = G_AND %smax, %mask
+    $vgpr0 = COPY %and
+
+...
+
+---
+name:  remove_and_255_umin_zextload
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; CHECK-LABEL: name: remove_and_255_umin_zextload
+    ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; CHECK: %ptr0:_(p1) = COPY $vgpr0_vgpr1
+    ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3
+    ; CHECK: %load0:_(s32) = G_ZEXTLOAD %ptr0(p1) :: (load 1, addrspace 1)
+    ; CHECK: %load1:_(s32) = G_ZEXTLOAD %ptr1(p1) :: (load 1, addrspace 1)
+    ; CHECK: %umin:_(s32) = G_UMIN %load0, %load1
+    ; CHECK: $vgpr0 = COPY %umin(s32)
+    %ptr0:_(p1) = COPY $vgpr0_vgpr1
+    %ptr1:_(p1) = COPY $vgpr2_vgpr3
+    %load0:_(s32) = G_ZEXTLOAD %ptr0 :: (load 1, addrspace 1, align 1)
+    %load1:_(s32) = G_ZEXTLOAD %ptr1 :: (load 1, addrspace 1, align 1)
+    %umin:_(s32) = G_UMIN %load0, %load1
+    %mask:_(s32) = G_CONSTANT i32 255
+    %and:_(s32) = G_AND %umin, %mask
+    $vgpr0 = COPY %and
+
+...
+
+---
+name:  remove_and_255_umax_zextload
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; CHECK-LABEL: name: remove_and_255_umax_zextload
+    ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; CHECK: %ptr0:_(p1) = COPY $vgpr0_vgpr1
+    ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3
+    ; CHECK: %load0:_(s32) = G_ZEXTLOAD %ptr0(p1) :: (load 1, addrspace 1)
+    ; CHECK: %load1:_(s32) = G_ZEXTLOAD %ptr1(p1) :: (load 1, addrspace 1)
+    ; CHECK: %umax:_(s32) = G_UMAX %load0, %load1
+    ; CHECK: $vgpr0 = COPY %umax(s32)
+    %ptr0:_(p1) = COPY $vgpr0_vgpr1
+    %ptr1:_(p1) = COPY $vgpr2_vgpr3
+    %load0:_(s32) = G_ZEXTLOAD %ptr0 :: (load 1, addrspace 1, align 1)
+    %load1:_(s32) = G_ZEXTLOAD %ptr1 :: (load 1, addrspace 1, align 1)
+    %umax:_(s32) = G_UMAX %load0, %load1
+    %mask:_(s32) = G_CONSTANT i32 255
+    %and:_(s32) = G_AND %umax, %mask
+    $vgpr0 = COPY %and
+
+...
+
+# Don't have enough known bits for lhs
+---
+name:  remove_and_255_smin_fail_lhs
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; CHECK-LABEL: name: remove_and_255_smin_fail_lhs
+    ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; CHECK: %ptr0:_(p1) = COPY $vgpr0_vgpr1
+    ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3
+    ; CHECK: %load0:_(s32) = G_LOAD %ptr0(p1) :: (load 4, addrspace 1)
+    ; CHECK: %load1:_(s32) = G_ZEXTLOAD %ptr1(p1) :: (load 1, addrspace 1)
+    ; CHECK: %smin:_(s32) = G_SMIN %load0, %load1
+    ; CHECK: $vgpr0 = COPY %smin(s32)
+    %ptr0:_(p1) = COPY $vgpr0_vgpr1
+    %ptr1:_(p1) = COPY $vgpr2_vgpr3
+    %load0:_(s32) = G_LOAD %ptr0 :: (load 4, addrspace 1, align 4)
+    %load1:_(s32) = G_ZEXTLOAD %ptr1 :: (load 1, addrspace 1, align 1)
+    %smin:_(s32) = G_SMIN %load0, %load1
+    %mask:_(s32) = G_CONSTANT i32 255
+    %and:_(s32) = G_AND %smin, %mask
+    $vgpr0 = COPY %and
+
+...
+
+# Don't have enough known bits for rhs
+---
+name:  remove_and_255_smin_fail_rhs
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; CHECK-LABEL: name: remove_and_255_smin_fail_rhs
+    ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; CHECK: %ptr0:_(p1) = COPY $vgpr0_vgpr1
+    ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3
+    ; CHECK: %load0:_(s32) = G_ZEXTLOAD %ptr0(p1) :: (load 1, addrspace 1)
+    ; CHECK: %load1:_(s32) = G_LOAD %ptr1(p1) :: (load 4, addrspace 1)
+    ; CHECK: %smin:_(s32) = G_SMIN %load0, %load1
+    ; CHECK: %mask:_(s32) = G_CONSTANT i32 255
+    ; CHECK: %and:_(s32) = G_AND %smin, %mask
+    ; CHECK: $vgpr0 = COPY %and(s32)
+    %ptr0:_(p1) = COPY $vgpr0_vgpr1
+    %ptr1:_(p1) = COPY $vgpr2_vgpr3
+    %load0:_(s32) = G_ZEXTLOAD %ptr0 :: (load 1, addrspace 1, align 1)
+    %load1:_(s32) = G_LOAD %ptr1 :: (load 4, addrspace 1, align 4)
+    %smin:_(s32) = G_SMIN %load0, %load1
+    %mask:_(s32) = G_CONSTANT i32 255
+    %and:_(s32) = G_AND %smin, %mask
+    $vgpr0 = COPY %and
+
+...


        


More information about the llvm-commits mailing list