[llvm] be62b3b - [AArch64][GlobalISel] Add a combine to fold away truncate in: G_ICMP EQ/NE (G_TRUNC(v), 0)

Amara Emerson via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 28 16:29:31 PST 2021


Author: Amara Emerson
Date: 2021-01-28T16:29:14-08:00
New Revision: be62b3ba347d8d9e7ada364ddc9e16d7ddd05b8e

URL: https://github.com/llvm/llvm-project/commit/be62b3ba347d8d9e7ada364ddc9e16d7ddd05b8e
DIFF: https://github.com/llvm/llvm-project/commit/be62b3ba347d8d9e7ada364ddc9e16d7ddd05b8e.diff

LOG: [AArch64][GlobalISel] Add a combine to fold away truncate in: G_ICMP EQ/NE (G_TRUNC(v), 0)

We try to do this optimization if we can determine that testing for the
truncated bits with an eq/ne predicate results in the same thing as testing
the lower bits.

Differential Revision: https://reviews.llvm.org/D95645

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-icmp-redundant-trunc.mir

Modified: 
    llvm/lib/Target/AArch64/AArch64Combine.td
    llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index b1e714653f46..980d61dde1d7 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -17,9 +17,17 @@ def fconstant_to_constant : GICombineRule<
          [{ return matchFConstantToConstant(*${root}, MRI); }]),
   (apply [{ applyFConstantToConstant(*${root}); }])>;
 
+def icmp_redundant_trunc_matchdata : GIDefMatchData<"Register">;
+def icmp_redundant_trunc : GICombineRule<
+  (defs root:$root, icmp_redundant_trunc_matchdata:$matchinfo),
+  (match (wip_match_opcode G_ICMP):$root,
+         [{ return matchICmpRedundantTrunc(*${root}, MRI, Helper.getKnownBits(), ${matchinfo}); }]),
+  (apply [{ applyICmpRedundantTrunc(*${root}, MRI, B, Observer, ${matchinfo}); }])>;
+
 def AArch64PreLegalizerCombinerHelper: GICombinerHelper<
   "AArch64GenPreLegalizerCombinerHelper", [all_combines,
-                                           fconstant_to_constant]> {
+                                           fconstant_to_constant,
+                                           icmp_redundant_trunc]> {
   let DisableRuleOption = "aarch64prelegalizercombiner-disable-rule";
   let StateClass = "AArch64PreLegalizerCombinerHelperState";
   let AdditionalArguments = [];

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
index 5f9b64e274b3..a1cc0b42eb5a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
@@ -17,9 +17,13 @@
 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/Support/Debug.h"
 
 #define DEBUG_TYPE "aarch64-prelegalizer-combiner"
@@ -53,6 +57,56 @@ static void applyFConstantToConstant(MachineInstr &MI) {
   MI.eraseFromParent();
 }
 
+/// Try to match a G_ICMP of a G_TRUNC with zero, in which the truncated bits
+/// are sign bits. In this case, we can transform the G_ICMP to directly compare
+/// the wide value with a zero.
+static bool matchICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
+                                    GISelKnownBits *KB, Register &MatchInfo) {
+  assert(MI.getOpcode() == TargetOpcode::G_ICMP && KB);
+
+  auto Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
+  if (!ICmpInst::isEquality(Pred))
+    return false;
+
+  Register LHS = MI.getOperand(2).getReg();
+  LLT LHSTy = MRI.getType(LHS);
+  if (!LHSTy.isScalar())
+    return false;
+
+  Register RHS = MI.getOperand(3).getReg();
+  Register WideReg;
+
+  if (!mi_match(LHS, MRI, m_GTrunc(m_Reg(WideReg))) ||
+      !mi_match(RHS, MRI, m_SpecificICst(0)))
+    return false;
+
+  LLT WideTy = MRI.getType(WideReg);
+  if (KB->computeNumSignBits(WideReg) <=
+      WideTy.getSizeInBits() - LHSTy.getSizeInBits())
+    return false;
+
+  MatchInfo = WideReg;
+  return true;
+}
+
+static bool applyICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
+                                    MachineIRBuilder &Builder,
+                                    GISelChangeObserver &Observer,
+                                    Register &WideReg) {
+  assert(MI.getOpcode() == TargetOpcode::G_ICMP);
+
+  LLT WideTy = MRI.getType(WideReg);
+  // We're going to directly use the wide register as the LHS, and then use an
+  // equivalent size zero for RHS.
+  Builder.setInstrAndDebugLoc(MI);
+  auto WideZero = Builder.buildConstant(WideTy, 0);
+  Observer.changingInstr(MI);
+  MI.getOperand(2).setReg(WideReg);
+  MI.getOperand(3).setReg(WideZero.getReg(0));
+  Observer.changedInstr(MI);
+  return true;
+}
+
 class AArch64PreLegalizerCombinerHelperState {
 protected:
   CombinerHelper &Helper;

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-icmp-redundant-trunc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-icmp-redundant-trunc.mir
new file mode 100644
index 000000000000..f89d8e31fa48
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-icmp-redundant-trunc.mir
@@ -0,0 +1,107 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64-apple-ios  -run-pass=aarch64-prelegalizer-combiner %s -o - -verify-machineinstrs | FileCheck %s
+
+# This test checks the optimization to remove the G_TRUNC if we can determine it's redundant.
+---
+name: icmp_trunc_sextload
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0
+
+    ; CHECK-LABEL: name: icmp_trunc_sextload
+    ; CHECK: liveins: $x0
+    ; CHECK: %v:_(p0) = COPY $x0
+    ; CHECK: %load:_(s64) = G_SEXTLOAD %v(p0) :: (load 4)
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: %cmp:_(s1) = G_ICMP intpred(ne), %load(s64), [[C]]
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %cmp(s1)
+    ; CHECK: $w0 = COPY [[ANYEXT]](s32)
+    ; CHECK: RET_ReallyLR implicit $w0
+    %v:_(p0) = COPY $x0
+    %load:_(s64) = G_SEXTLOAD %v:_(p0) :: (load 4)
+    %trunc:_(s32) = G_TRUNC %load(s64)
+    %zero:_(s32) = G_CONSTANT i32 0
+    %cmp:_(s1) = G_ICMP intpred(ne), %trunc(s32), %zero
+    %5:_(s32) = G_ANYEXT %cmp
+    $w0 = COPY %5(s32)
+    RET_ReallyLR implicit $w0
+...
+---
+name: icmp_trunc_sextload_eq
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0
+
+    ; CHECK-LABEL: name: icmp_trunc_sextload_eq
+    ; CHECK: liveins: $x0
+    ; CHECK: %v:_(p0) = COPY $x0
+    ; CHECK: %load:_(s64) = G_SEXTLOAD %v(p0) :: (load 4)
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: %cmp:_(s1) = G_ICMP intpred(eq), %load(s64), [[C]]
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %cmp(s1)
+    ; CHECK: $w0 = COPY [[ANYEXT]](s32)
+    ; CHECK: RET_ReallyLR implicit $w0
+    %v:_(p0) = COPY $x0
+    %load:_(s64) = G_SEXTLOAD %v:_(p0) :: (load 4)
+    %trunc:_(s32) = G_TRUNC %load(s64)
+    %zero:_(s32) = G_CONSTANT i32 0
+    %cmp:_(s1) = G_ICMP intpred(eq), %trunc(s32), %zero
+    %5:_(s32) = G_ANYEXT %cmp
+    $w0 = COPY %5(s32)
+    RET_ReallyLR implicit $w0
+...
+---
+name: icmp_trunc_sextload_wrongpred
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0
+
+    ; CHECK-LABEL: name: icmp_trunc_sextload_wrongpred
+    ; CHECK: liveins: $x0
+    ; CHECK: %v:_(p0) = COPY $x0
+    ; CHECK: %load:_(s64) = G_SEXTLOAD %v(p0) :: (load 4)
+    ; CHECK: %trunc:_(s32) = G_TRUNC %load(s64)
+    ; CHECK: %zero:_(s32) = G_CONSTANT i32 0
+    ; CHECK: %cmp:_(s1) = G_ICMP intpred(slt), %trunc(s32), %zero
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %cmp(s1)
+    ; CHECK: $w0 = COPY [[ANYEXT]](s32)
+    ; CHECK: RET_ReallyLR implicit $w0
+    %v:_(p0) = COPY $x0
+    %load:_(s64) = G_SEXTLOAD %v:_(p0) :: (load 4)
+    %trunc:_(s32) = G_TRUNC %load(s64)
+    %zero:_(s32) = G_CONSTANT i32 0
+    %cmp:_(s1) = G_ICMP intpred(slt), %trunc(s32), %zero
+    %5:_(s32) = G_ANYEXT %cmp
+    $w0 = COPY %5(s32)
+    RET_ReallyLR implicit $w0
+...
+---
+name: icmp_trunc_sextload_extend_mismatch
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0
+
+    ; CHECK-LABEL: name: icmp_trunc_sextload_extend_mismatch
+    ; CHECK: liveins: $x0
+    ; CHECK: %v:_(p0) = COPY $x0
+    ; CHECK: %load:_(s64) = G_SEXTLOAD %v(p0) :: (load 4)
+    ; CHECK: %trunc:_(s16) = G_TRUNC %load(s64)
+    ; CHECK: %zero:_(s16) = G_CONSTANT i16 0
+    ; CHECK: %cmp:_(s1) = G_ICMP intpred(ne), %trunc(s16), %zero
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %cmp(s1)
+    ; CHECK: $w0 = COPY [[ANYEXT]](s32)
+    ; CHECK: RET_ReallyLR implicit $w0
+    %v:_(p0) = COPY $x0
+    %load:_(s64) = G_SEXTLOAD %v:_(p0) :: (load 4)
+    %trunc:_(s16) = G_TRUNC %load(s64)
+    %zero:_(s16) = G_CONSTANT i16 0
+    %cmp:_(s1) = G_ICMP intpred(ne), %trunc(s16), %zero
+    %5:_(s32) = G_ANYEXT %cmp
+    $w0 = COPY %5(s32)
+    RET_ReallyLR implicit $w0
+...
+


        


More information about the llvm-commits mailing list