[llvm] [LegalizeTypes][X86][PowerPC] Use shift by 1 instead of adding a value to itself to double. (PR #86857)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 27 12:09:27 PDT 2024
https://github.com/topperc created https://github.com/llvm/llvm-project/pull/86857
Using a shift is the correct way to handle undef and works better with our optimizations that move freeze around.
The X86 code looks like an improvment, but PowerPC might be a regression.
Hoping this improves some code for #86850.
>From 0cf3c679400ee26aefc10348dc84ef2e3d78bfa1 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 27 Mar 2024 12:04:15 -0700
Subject: [PATCH] [LegalizeTypes][X86][PowerPC] Use shift by 1 instead of
adding a value to itself to double.
Using a shift is the correct way to handle undef and works better with
our optimizations that move freeze around.
The X86 code looks like an improvment, but PowerPC might be a regression.
Hoping this improves some code for #86850.
---
.../SelectionDAG/LegalizeTypesGeneric.cpp | 3 +-
.../CodeGen/PowerPC/aix-vec_insert_elt.ll | 20 +++++----
llvm/test/CodeGen/PowerPC/vec_insert_elt.ll | 9 ++--
.../CodeGen/X86/insertelement-var-index.ll | 44 +++++++++----------
4 files changed, 38 insertions(+), 38 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index a55364ea2c4e5b..73e4b50e316a90 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -428,7 +428,8 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
std::swap(Lo, Hi);
SDValue Idx = N->getOperand(2);
- Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
+ Idx = DAG.getNode(ISD::SHL, dl, Idx.getValueType(), Idx,
+ DAG.getShiftAmountConstant(1, Idx.getValueType(), dl));
NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx);
Idx = DAG.getNode(ISD::ADD, dl,
Idx.getValueType(), Idx,
diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
index aae23265710ce0..cb21efc5a35a85 100644
--- a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
@@ -165,15 +165,16 @@ define <2 x i64> @testDoubleword(<2 x i64> %a, i64 %b, i64 %idx) {
;
; CHECK-32-LABEL: testDoubleword:
; CHECK-32: # %bb.0: # %entry
-; CHECK-32-NEXT: add 5, 6, 6
; CHECK-32-NEXT: addi 7, 1, -32
+; CHECK-32-NEXT: rlwinm 5, 6, 3, 28, 28
; CHECK-32-NEXT: stxv 34, -32(1)
-; CHECK-32-NEXT: rlwinm 6, 5, 2, 28, 29
-; CHECK-32-NEXT: stwx 3, 7, 6
-; CHECK-32-NEXT: addi 3, 5, 1
-; CHECK-32-NEXT: addi 5, 1, -16
+; CHECK-32-NEXT: stwx 3, 7, 5
+; CHECK-32-NEXT: slwi 3, 6, 1
+; CHECK-32-NEXT: li 5, 1
; CHECK-32-NEXT: lxv 0, -32(1)
-; CHECK-32-NEXT: rlwinm 3, 3, 2, 28, 29
+; CHECK-32-NEXT: rlwimi 5, 3, 0, 0, 30
+; CHECK-32-NEXT: rlwinm 3, 5, 2, 28, 29
+; CHECK-32-NEXT: addi 5, 1, -16
; CHECK-32-NEXT: stxv 0, -16(1)
; CHECK-32-NEXT: stwx 4, 5, 3
; CHECK-32-NEXT: lxv 34, -16(1)
@@ -187,10 +188,11 @@ define <2 x i64> @testDoubleword(<2 x i64> %a, i64 %b, i64 %idx) {
;
; CHECK-32-P10-LABEL: testDoubleword:
; CHECK-32-P10: # %bb.0: # %entry
-; CHECK-32-P10-NEXT: add 5, 6, 6
-; CHECK-32-P10-NEXT: slwi 6, 5, 2
+; CHECK-32-P10-NEXT: slwi 5, 6, 1
+; CHECK-32-P10-NEXT: slwi 6, 6, 3
; CHECK-32-P10-NEXT: vinswlx 2, 6, 3
-; CHECK-32-P10-NEXT: addi 3, 5, 1
+; CHECK-32-P10-NEXT: li 3, 1
+; CHECK-32-P10-NEXT: rlwimi 3, 5, 0, 0, 30
; CHECK-32-P10-NEXT: slwi 3, 3, 2
; CHECK-32-P10-NEXT: vinswlx 2, 3, 4
; CHECK-32-P10-NEXT: blr
diff --git a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
index b98aed8616509e..92dbb7e6c4b0ff 100644
--- a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
@@ -241,15 +241,16 @@ define <2 x i64> @testDoubleword(<2 x i64> %a, i64 %b, i64 %idx) {
;
; AIX-P8-32-LABEL: testDoubleword:
; AIX-P8-32: # %bb.0: # %entry
-; AIX-P8-32-NEXT: add r6, r6, r6
; AIX-P8-32-NEXT: addi r5, r1, -32
-; AIX-P8-32-NEXT: rlwinm r7, r6, 2, 28, 29
+; AIX-P8-32-NEXT: rlwinm r7, r6, 3, 28, 28
; AIX-P8-32-NEXT: stxvw4x v2, 0, r5
; AIX-P8-32-NEXT: stwx r3, r5, r7
; AIX-P8-32-NEXT: addi r3, r1, -16
; AIX-P8-32-NEXT: lxvw4x vs0, 0, r5
-; AIX-P8-32-NEXT: addi r5, r6, 1
-; AIX-P8-32-NEXT: rlwinm r5, r5, 2, 28, 29
+; AIX-P8-32-NEXT: slwi r5, r6, 1
+; AIX-P8-32-NEXT: li r6, 1
+; AIX-P8-32-NEXT: rlwimi r6, r5, 0, 0, 30
+; AIX-P8-32-NEXT: rlwinm r5, r6, 2, 28, 29
; AIX-P8-32-NEXT: stxvw4x vs0, 0, r3
; AIX-P8-32-NEXT: stwx r4, r3, r5
; AIX-P8-32-NEXT: lxvw4x v2, 0, r3
diff --git a/llvm/test/CodeGen/X86/insertelement-var-index.ll b/llvm/test/CodeGen/X86/insertelement-var-index.ll
index 8ed8495d7a4614..3f4ee5c64a8abb 100644
--- a/llvm/test/CodeGen/X86/insertelement-var-index.ll
+++ b/llvm/test/CodeGen/X86/insertelement-var-index.ll
@@ -1013,14 +1013,13 @@ define <2 x i64> @arg_i64_v2i64(<2 x i64> %v, i64 %x, i32 %y) nounwind {
; X86AVX2-NEXT: movl 12(%ebp), %ecx
; X86AVX2-NEXT: movl 16(%ebp), %edx
; X86AVX2-NEXT: vmovaps %xmm0, (%esp)
-; X86AVX2-NEXT: leal (%edx,%edx), %esi
-; X86AVX2-NEXT: andl $3, %esi
-; X86AVX2-NEXT: movl %eax, (%esp,%esi,4)
+; X86AVX2-NEXT: leal 1(%edx,%edx), %esi
+; X86AVX2-NEXT: andl $1, %edx
+; X86AVX2-NEXT: movl %eax, (%esp,%edx,8)
; X86AVX2-NEXT: vmovaps (%esp), %xmm0
; X86AVX2-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
-; X86AVX2-NEXT: leal 1(%edx,%edx), %eax
-; X86AVX2-NEXT: andl $3, %eax
-; X86AVX2-NEXT: movl %ecx, 16(%esp,%eax,4)
+; X86AVX2-NEXT: andl $3, %esi
+; X86AVX2-NEXT: movl %ecx, 16(%esp,%esi,4)
; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0
; X86AVX2-NEXT: leal -4(%ebp), %esp
; X86AVX2-NEXT: popl %esi
@@ -1362,14 +1361,13 @@ define <2 x i64> @load_i64_v2i64(<2 x i64> %v, ptr %p, i32 %y) nounwind {
; X86AVX2-NEXT: movl (%ecx), %edx
; X86AVX2-NEXT: movl 4(%ecx), %ecx
; X86AVX2-NEXT: vmovaps %xmm0, (%esp)
-; X86AVX2-NEXT: leal (%eax,%eax), %esi
-; X86AVX2-NEXT: andl $3, %esi
-; X86AVX2-NEXT: movl %edx, (%esp,%esi,4)
+; X86AVX2-NEXT: leal 1(%eax,%eax), %esi
+; X86AVX2-NEXT: andl $1, %eax
+; X86AVX2-NEXT: movl %edx, (%esp,%eax,8)
; X86AVX2-NEXT: vmovaps (%esp), %xmm0
; X86AVX2-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
-; X86AVX2-NEXT: leal 1(%eax,%eax), %eax
-; X86AVX2-NEXT: andl $3, %eax
-; X86AVX2-NEXT: movl %ecx, 16(%esp,%eax,4)
+; X86AVX2-NEXT: andl $3, %esi
+; X86AVX2-NEXT: movl %ecx, 16(%esp,%esi,4)
; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0
; X86AVX2-NEXT: leal -4(%ebp), %esp
; X86AVX2-NEXT: popl %esi
@@ -1746,14 +1744,13 @@ define <4 x i64> @arg_i64_v4i64(<4 x i64> %v, i64 %x, i32 %y) nounwind {
; X86AVX2-NEXT: movl 12(%ebp), %ecx
; X86AVX2-NEXT: movl 16(%ebp), %edx
; X86AVX2-NEXT: vmovaps %ymm0, (%esp)
-; X86AVX2-NEXT: leal (%edx,%edx), %esi
-; X86AVX2-NEXT: andl $7, %esi
-; X86AVX2-NEXT: movl %eax, (%esp,%esi,4)
+; X86AVX2-NEXT: leal 1(%edx,%edx), %esi
+; X86AVX2-NEXT: andl $3, %edx
+; X86AVX2-NEXT: movl %eax, (%esp,%edx,8)
; X86AVX2-NEXT: vmovaps (%esp), %ymm0
; X86AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
-; X86AVX2-NEXT: leal 1(%edx,%edx), %eax
-; X86AVX2-NEXT: andl $7, %eax
-; X86AVX2-NEXT: movl %ecx, 32(%esp,%eax,4)
+; X86AVX2-NEXT: andl $7, %esi
+; X86AVX2-NEXT: movl %ecx, 32(%esp,%esi,4)
; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %ymm0
; X86AVX2-NEXT: leal -4(%ebp), %esp
; X86AVX2-NEXT: popl %esi
@@ -2128,14 +2125,13 @@ define <4 x i64> @load_i64_v4i64(<4 x i64> %v, ptr %p, i32 %y) nounwind {
; X86AVX2-NEXT: movl (%ecx), %edx
; X86AVX2-NEXT: movl 4(%ecx), %ecx
; X86AVX2-NEXT: vmovaps %ymm0, (%esp)
-; X86AVX2-NEXT: leal (%eax,%eax), %esi
-; X86AVX2-NEXT: andl $7, %esi
-; X86AVX2-NEXT: movl %edx, (%esp,%esi,4)
+; X86AVX2-NEXT: leal 1(%eax,%eax), %esi
+; X86AVX2-NEXT: andl $3, %eax
+; X86AVX2-NEXT: movl %edx, (%esp,%eax,8)
; X86AVX2-NEXT: vmovaps (%esp), %ymm0
; X86AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
-; X86AVX2-NEXT: leal 1(%eax,%eax), %eax
-; X86AVX2-NEXT: andl $7, %eax
-; X86AVX2-NEXT: movl %ecx, 32(%esp,%eax,4)
+; X86AVX2-NEXT: andl $7, %esi
+; X86AVX2-NEXT: movl %ecx, 32(%esp,%esi,4)
; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %ymm0
; X86AVX2-NEXT: leal -4(%ebp), %esp
; X86AVX2-NEXT: popl %esi
More information about the llvm-commits
mailing list