[llvm] abc99ab - GlobalISel: Implement known bits for min/max
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 27 13:56:27 PDT 2020
Author: Matt Arsenault
Date: 2020-08-27T16:56:17-04:00
New Revision: abc99ab5725636c17fa9c9ced0269f92bf5398cb
URL: https://github.com/llvm/llvm-project/commit/abc99ab5725636c17fa9c9ced0269f92bf5398cb
DIFF: https://github.com/llvm/llvm-project/commit/abc99ab5725636c17fa9c9ced0269f92bf5398cb.diff
LOG: GlobalISel: Implement known bits for min/max
Added:
Modified:
llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h
llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h b/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h
index f5662a27debe..f2c19f559a36 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h
@@ -34,6 +34,10 @@ class GISelKnownBits : public GISelChangeObserver {
/// Cache maintained during a computeKnownBits request.
SmallDenseMap<Register, KnownBits, 16> ComputeKnownBitsCache;
+ void computeKnownBitsMin(Register Src0, Register Src1, KnownBits &Known,
+ const APInt &DemandedElts,
+ unsigned Depth = 0);
+
public:
GISelKnownBits(MachineFunction &MF, unsigned MaxDepth = 6);
virtual ~GISelKnownBits() = default;
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
index 25ca09e31e2b..2ffbde6a62c4 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -94,6 +94,25 @@ dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) {
<< "\n";
}
+/// Compute known bits for the intersection of \p Src0 and \p Src1
+void GISelKnownBits::computeKnownBitsMin(Register Src0, Register Src1,
+ KnownBits &Known,
+ const APInt &DemandedElts,
+ unsigned Depth) {
+ // Test src1 first, since we canonicalize simpler expressions to the RHS.
+ computeKnownBitsImpl(Src1, Known, DemandedElts, Depth);
+
+ // If we don't know any bits, early out.
+ if (Known.isUnknown())
+ return;
+
+ KnownBits Known2;
+ computeKnownBitsImpl(Src0, Known2, DemandedElts, Depth);
+
+ // Only known if known in both the LHS and RHS.
+ Known &= Known2;
+}
+
void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
const APInt &DemandedElts,
unsigned Depth) {
@@ -284,15 +303,16 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
break;
}
case TargetOpcode::G_SELECT: {
- computeKnownBitsImpl(MI.getOperand(3).getReg(), Known, DemandedElts,
- Depth + 1);
- // If we don't know any bits, early out.
- if (Known.isUnknown())
- break;
- computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
- Depth + 1);
- // Only known if known in both the LHS and RHS.
- Known &= Known2;
+ computeKnownBitsMin(MI.getOperand(2).getReg(), MI.getOperand(3).getReg(),
+ Known, DemandedElts, Depth + 1);
+ break;
+ }
+ case TargetOpcode::G_SMIN:
+ case TargetOpcode::G_SMAX:
+ case TargetOpcode::G_UMIN:
+ case TargetOpcode::G_UMAX: {
+ computeKnownBitsMin(MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
+ Known, DemandedElts, Depth + 1);
break;
}
case TargetOpcode::G_FCMP:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir
index 194d52e19b82..8623546b2cc5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir
@@ -20,3 +20,163 @@ body: |
$vgpr0 = COPY %and
...
+
+---
+name: remove_and_255_smin_zextload
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-LABEL: name: remove_and_255_smin_zextload
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK: %ptr0:_(p1) = COPY $vgpr0_vgpr1
+ ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3
+ ; CHECK: %load0:_(s32) = G_ZEXTLOAD %ptr0(p1) :: (load 1, addrspace 1)
+ ; CHECK: %load1:_(s32) = G_ZEXTLOAD %ptr1(p1) :: (load 1, addrspace 1)
+ ; CHECK: %smin:_(s32) = G_SMIN %load0, %load1
+ ; CHECK: $vgpr0 = COPY %smin(s32)
+ %ptr0:_(p1) = COPY $vgpr0_vgpr1
+ %ptr1:_(p1) = COPY $vgpr2_vgpr3
+ %load0:_(s32) = G_ZEXTLOAD %ptr0 :: (load 1, addrspace 1, align 1)
+ %load1:_(s32) = G_ZEXTLOAD %ptr1 :: (load 1, addrspace 1, align 1)
+ %smin:_(s32) = G_SMIN %load0, %load1
+ %mask:_(s32) = G_CONSTANT i32 255
+ %and:_(s32) = G_AND %smin, %mask
+ $vgpr0 = COPY %and
+
+...
+
+---
+name: remove_and_255_smax_zextload
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-LABEL: name: remove_and_255_smax_zextload
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK: %ptr0:_(p1) = COPY $vgpr0_vgpr1
+ ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3
+ ; CHECK: %load0:_(s32) = G_ZEXTLOAD %ptr0(p1) :: (load 1, addrspace 1)
+ ; CHECK: %load1:_(s32) = G_ZEXTLOAD %ptr1(p1) :: (load 1, addrspace 1)
+ ; CHECK: %smax:_(s32) = G_SMAX %load0, %load1
+ ; CHECK: $vgpr0 = COPY %smax(s32)
+ %ptr0:_(p1) = COPY $vgpr0_vgpr1
+ %ptr1:_(p1) = COPY $vgpr2_vgpr3
+ %load0:_(s32) = G_ZEXTLOAD %ptr0 :: (load 1, addrspace 1, align 1)
+ %load1:_(s32) = G_ZEXTLOAD %ptr1 :: (load 1, addrspace 1, align 1)
+ %smax:_(s32) = G_SMAX %load0, %load1
+ %mask:_(s32) = G_CONSTANT i32 255
+ %and:_(s32) = G_AND %smax, %mask
+ $vgpr0 = COPY %and
+
+...
+
+---
+name: remove_and_255_umin_zextload
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-LABEL: name: remove_and_255_umin_zextload
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK: %ptr0:_(p1) = COPY $vgpr0_vgpr1
+ ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3
+ ; CHECK: %load0:_(s32) = G_ZEXTLOAD %ptr0(p1) :: (load 1, addrspace 1)
+ ; CHECK: %load1:_(s32) = G_ZEXTLOAD %ptr1(p1) :: (load 1, addrspace 1)
+ ; CHECK: %umin:_(s32) = G_UMIN %load0, %load1
+ ; CHECK: $vgpr0 = COPY %umin(s32)
+ %ptr0:_(p1) = COPY $vgpr0_vgpr1
+ %ptr1:_(p1) = COPY $vgpr2_vgpr3
+ %load0:_(s32) = G_ZEXTLOAD %ptr0 :: (load 1, addrspace 1, align 1)
+ %load1:_(s32) = G_ZEXTLOAD %ptr1 :: (load 1, addrspace 1, align 1)
+ %umin:_(s32) = G_UMIN %load0, %load1
+ %mask:_(s32) = G_CONSTANT i32 255
+ %and:_(s32) = G_AND %umin, %mask
+ $vgpr0 = COPY %and
+
+...
+
+---
+name: remove_and_255_umax_zextload
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-LABEL: name: remove_and_255_umax_zextload
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK: %ptr0:_(p1) = COPY $vgpr0_vgpr1
+ ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3
+ ; CHECK: %load0:_(s32) = G_ZEXTLOAD %ptr0(p1) :: (load 1, addrspace 1)
+ ; CHECK: %load1:_(s32) = G_ZEXTLOAD %ptr1(p1) :: (load 1, addrspace 1)
+ ; CHECK: %umax:_(s32) = G_UMAX %load0, %load1
+ ; CHECK: $vgpr0 = COPY %umax(s32)
+ %ptr0:_(p1) = COPY $vgpr0_vgpr1
+ %ptr1:_(p1) = COPY $vgpr2_vgpr3
+ %load0:_(s32) = G_ZEXTLOAD %ptr0 :: (load 1, addrspace 1, align 1)
+ %load1:_(s32) = G_ZEXTLOAD %ptr1 :: (load 1, addrspace 1, align 1)
+ %umax:_(s32) = G_UMAX %load0, %load1
+ %mask:_(s32) = G_CONSTANT i32 255
+ %and:_(s32) = G_AND %umax, %mask
+ $vgpr0 = COPY %and
+
+...
+
+# Don't have enough known bits for lhs
+---
+name: remove_and_255_smin_fail_lhs
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-LABEL: name: remove_and_255_smin_fail_lhs
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK: %ptr0:_(p1) = COPY $vgpr0_vgpr1
+ ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3
+ ; CHECK: %load0:_(s32) = G_LOAD %ptr0(p1) :: (load 4, addrspace 1)
+ ; CHECK: %load1:_(s32) = G_ZEXTLOAD %ptr1(p1) :: (load 1, addrspace 1)
+ ; CHECK: %smin:_(s32) = G_SMIN %load0, %load1
+ ; CHECK: $vgpr0 = COPY %smin(s32)
+ %ptr0:_(p1) = COPY $vgpr0_vgpr1
+ %ptr1:_(p1) = COPY $vgpr2_vgpr3
+ %load0:_(s32) = G_LOAD %ptr0 :: (load 4, addrspace 1, align 4)
+ %load1:_(s32) = G_ZEXTLOAD %ptr1 :: (load 1, addrspace 1, align 1)
+ %smin:_(s32) = G_SMIN %load0, %load1
+ %mask:_(s32) = G_CONSTANT i32 255
+ %and:_(s32) = G_AND %smin, %mask
+ $vgpr0 = COPY %and
+
+...
+
+# Don't have enough known bits for rhs
+---
+name: remove_and_255_smin_fail_rhs
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-LABEL: name: remove_and_255_smin_fail_rhs
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK: %ptr0:_(p1) = COPY $vgpr0_vgpr1
+ ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3
+ ; CHECK: %load0:_(s32) = G_ZEXTLOAD %ptr0(p1) :: (load 1, addrspace 1)
+ ; CHECK: %load1:_(s32) = G_LOAD %ptr1(p1) :: (load 4, addrspace 1)
+ ; CHECK: %smin:_(s32) = G_SMIN %load0, %load1
+ ; CHECK: %mask:_(s32) = G_CONSTANT i32 255
+ ; CHECK: %and:_(s32) = G_AND %smin, %mask
+ ; CHECK: $vgpr0 = COPY %and(s32)
+ %ptr0:_(p1) = COPY $vgpr0_vgpr1
+ %ptr1:_(p1) = COPY $vgpr2_vgpr3
+ %load0:_(s32) = G_ZEXTLOAD %ptr0 :: (load 1, addrspace 1, align 1)
+ %load1:_(s32) = G_LOAD %ptr1 :: (load 4, addrspace 1, align 4)
+ %smin:_(s32) = G_SMIN %load0, %load1
+ %mask:_(s32) = G_CONSTANT i32 255
+ %and:_(s32) = G_AND %smin, %mask
+ $vgpr0 = COPY %and
+
+...
More information about the llvm-commits
mailing list