[llvm] [X86] matchAddressRecursively - peek through ZEXT&shl&trunc to find indexreg = trunc's src (PR #73616)

Mon Nov 27 23:32:47 PST 2023

https://github.com/yubingex007-a11y created https://github.com/llvm/llvm-project/pull/73616

t69: i64 = zero_extend t68
t68: i32 = shl nuw nsw t67, Constant:i8<3>
  t67: i32 = truncate t64
    t64: i64 = X86ISD::MUL_IMM  t62, Constant:i64<5>
      t62: i64,ch = load<(load (s16) from %ir.<badref>), zext from i16>
           t0, t12, undef:i64
     =>
index: t64, scale = 8

>From a100d829b5576c8991e6a5137e9f44fe9bf5768a Mon Sep 17 00:00:00 2001
From: Bing1 Yu <bing1.yu at intel.com>
Date: Tue, 28 Nov 2023 15:28:30 +0800
Subject: [PATCH] [X86] matchAddressRecursively - peek through ZEXT&shl&trunc
 to find indexreg = trunc's src index: t69: i64 = zero_extend t68 t68: i32 =
 shl nuw nsw t67, Constant:i8<3>   t67: i32 = truncate t64     t64: i64 =
 X86ISD::MUL_IMM  t62, Constant:i64<5>       t62: i64,ch = load<(load (s16)
 from %ir.<badref>), zext from i16>            t0, t12, undef:i64      =>
 index: t64, scale = 8

---
 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 33 +++++++++++++++++++++++++
 llvm/test/CodeGen/X86/zext-shl-trunc.ll | 29 ++++++++++++++++++++++
 2 files changed, 62 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/zext-shl-trunc.ll

diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 1a989a3bccfdb45..dfaf30735e87692 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -2715,6 +2715,39 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
           !CurDAG->MaskedValueIsZero(ShlSrc, HighZeros & Mask))
         break;
 
+      // index: t69: i64 = zero_extend t68
+      // t68: i32 = shl nuw nsw t67, Constant:i8<3>
+      //   t67: i32 = truncate t64
+      //     t64: i64 = <<Unknown Node #610>> t62, Constant:i64<5>
+      //       t62: i64,ch = load<(load (s16) from %ir.<badref>), zext from i16>
+      //       t0, t12, undef:i64
+      // =>
+      // index: t64, scale = 8
+
+      SDValue Trunc = Src.getOperand(0);
+      // match: i32 = truncate(i64 mul_imm(i64 zext from load i16,5))
+      if (N.getValueType() == MVT::i64 && Trunc.getOpcode() == ISD::TRUNCATE &&
+          Trunc.getValueType() == MVT::i32 &&
+          Trunc.getOperand(0).getValueType() == MVT::i64) {
+        // check i64 mul_imm(i64 zext from load i16,5)
+        if (Trunc.getOperand(0)->getOpcode() == X86ISD::MUL_IMM) {
+          SDValue Mul = Trunc.getOperand(0);
+          if (auto *Imm = dyn_cast<ConstantSDNode>(Mul.getOperand(1))) {
+            // check: imm < INT32_MAX
+            if (Imm->getZExtValue() < INT32_MAX &&
+                ISD::isZEXTLoad(Mul.getOperand(0).getNode())) {
+              LoadSDNode *LN0 = cast<LoadSDNode>(Mul.getOperand(0).getNode());
+              EVT MemVT = LN0->getMemoryVT();
+              if (MemVT == MVT::i16) {
+                AM.Scale = 1 << ShAmtV;
+                AM.IndexReg =
+                    matchIndexRecursively(Trunc.getOperand(0), AM, Depth + 1);
+                return false;
+              }
+            }
+          }
+        }
+      }
       // zext (shl nuw i8 %x, C1) to i32
       // --> shl (zext i8 %x to i32), (zext C1)
       // zext (and (shl nuw i8 %x, C1), C2) to i32
diff --git a/llvm/test/CodeGen/X86/zext-shl-trunc.ll b/llvm/test/CodeGen/X86/zext-shl-trunc.ll
new file mode 100644
index 000000000000000..52b20ccd1975efc
--- /dev/null
+++ b/llvm/test/CodeGen/X86/zext-shl-trunc.ll
@@ -0,0 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64
+
+%structA = type { %classB, float, i16, i8, i32 }
+%classB = type { double, double, double }
+
+define double @func(double %0, double %1, double %2, double %3, ptr %4, ptr %5, i32 %6, ptr %7, double %8, ptr %9) {
+; X86-LABEL: func:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzwl (%ecx), %ecx
+; X86-NEXT:    leal (%ecx,%ecx,8), %ecx
+; X86-NEXT:    fldl 16(%eax,%ecx,4)
+; X86-NEXT:    retl
+;
+; X64-LABEL: func:
+; X64:       # %bb.0:
+; X64-NEXT:    movzwl (%rsi), %eax
+; X64-NEXT:    leaq (%rax,%rax,4), %rax
+; X64-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT:    retq
+  %11 = load i16, ptr %5, align 2
+  %12 = zext i16 %11 to i64
+  %13 = getelementptr inbounds %structA, ptr %4, i64 %12, i32 0, i32 2
+  %14 = load double, ptr %13, align 8
+  ret double %14
+}