[llvm] [X86] Do not directly fold for VINSERTPS (PR #65718)

Fri Sep 8 00:08:55 PDT 2023

https://github.com/phoebewang updated https://github.com/llvm/llvm-project/pull/65718:

>From cb3682e226ae1ad72121e58f7eccb594c9cd5632 Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang at intel.com>
Date: Fri, 8 Sep 2023 14:33:57 +0800
Subject: [PATCH 1/2] [X86] Do not directly fold for VINSERTPS

We have already customized folding for VINSERTPS by 7e6606f4f1, which do
the folding when alignment >= 4 bytes.

We cannot arbitrarily fold it like others because we need to calculate
the source offset.
---
 llvm/test/CodeGen/X86/avx.ll                | 6 ++++--
 llvm/test/TableGen/x86-fold-tables.inc      | 2 --
 llvm/utils/TableGen/X86ManualFoldTables.def | 2 ++
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/llvm/test/CodeGen/X86/avx.ll b/llvm/test/CodeGen/X86/avx.ll
index dc59186d568cc2c..6a10d855ea95c0f 100644
--- a/llvm/test/CodeGen/X86/avx.ll
+++ b/llvm/test/CodeGen/X86/avx.ll
@@ -184,12 +184,14 @@ define <4 x float> @nofold_insertps(ptr %a, <4 x float> %b) {
 ; X86-LABEL: nofold_insertps:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    vinsertps $176, (%eax), %xmm0, %xmm0 ## xmm0 = xmm0[0,1,2],mem[2]
+; X86-NEXT:    vmovups (%eax), %xmm1
+; X86-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[2]
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: nofold_insertps:
 ; X64:       ## %bb.0:
-; X64-NEXT:    vinsertps $176, (%rdi), %xmm0, %xmm0 ## xmm0 = xmm0[0,1,2],mem[2]
+; X64-NEXT:    vmovups (%rdi), %xmm1
+; X64-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[2]
 ; X64-NEXT:    retq
   %1 = load <4 x float>, ptr %a, align 1
   %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index 6168fdfd323c3b5..0e6b8b04f3ca758 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -2105,8 +2105,6 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
   {X86::VINSERTI64x2Z256rr, X86::VINSERTI64x2Z256rm, 0},
   {X86::VINSERTI64x2Zrr, X86::VINSERTI64x2Zrm, 0},
   {X86::VINSERTI64x4Zrr, X86::VINSERTI64x4Zrm, 0},
-  {X86::VINSERTPSZrr, X86::VINSERTPSZrm, TB_NO_REVERSE},
-  {X86::VINSERTPSrr, X86::VINSERTPSrm, TB_NO_REVERSE},
   {X86::VMAXCPDYrr, X86::VMAXCPDYrm, 0},
   {X86::VMAXCPDZ128rr, X86::VMAXCPDZ128rm, 0},
   {X86::VMAXCPDZ256rr, X86::VMAXCPDZ256rm, 0},
diff --git a/llvm/utils/TableGen/X86ManualFoldTables.def b/llvm/utils/TableGen/X86ManualFoldTables.def
index d949830b0988304..68f1c1177c821e4 100644
--- a/llvm/utils/TableGen/X86ManualFoldTables.def
+++ b/llvm/utils/TableGen/X86ManualFoldTables.def
@@ -43,6 +43,8 @@ NOFOLD(VEXTRACTI32x8Zrrk)
 NOFOLD(VEXTRACTI64x2Z256rrk)
 NOFOLD(VEXTRACTI64x2Zrrk)
 NOFOLD(VEXTRACTI64x4Zrrk)
+NOFOLD(VINSERTPSZrr)
+NOFOLD(VINSERTPSrr)
 NOFOLD(VMOVAPDZ128mrk)
 NOFOLD(VMOVAPDZ256mrk)
 NOFOLD(VMOVAPDZmrk)

>From 4d5bf917ee0b888fb6355c677110a7a3e23d00ac Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang at intel.com>
Date: Fri, 8 Sep 2023 15:08:09 +0800
Subject: [PATCH 2/2] Move to INSERTPSrr together

---
 llvm/utils/TableGen/X86ManualFoldTables.def | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/utils/TableGen/X86ManualFoldTables.def b/llvm/utils/TableGen/X86ManualFoldTables.def
index 68f1c1177c821e4..8e6cb4a7bd87983 100644
--- a/llvm/utils/TableGen/X86ManualFoldTables.def
+++ b/llvm/utils/TableGen/X86ManualFoldTables.def
@@ -43,8 +43,6 @@ NOFOLD(VEXTRACTI32x8Zrrk)
 NOFOLD(VEXTRACTI64x2Z256rrk)
 NOFOLD(VEXTRACTI64x2Zrrk)
 NOFOLD(VEXTRACTI64x4Zrrk)
-NOFOLD(VINSERTPSZrr)
-NOFOLD(VINSERTPSrr)
 NOFOLD(VMOVAPDZ128mrk)
 NOFOLD(VMOVAPDZ256mrk)
 NOFOLD(VMOVAPDZmrk)
@@ -227,6 +225,8 @@ NOFOLD(MMX_MOVQ64rr_REV)
 // =>
 // insertpsrm xmm1, m32, imm
 NOFOLD(INSERTPSrr)
+NOFOLD(VINSERTPSZrr)
+NOFOLD(VINSERTPSrr)
 #undef NOFOLD
 
 #ifndef ENTRY