[llvm] af32e51 - [X86] LowerRotate - manually expand rotate by splat constant patterns.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 22 09:55:08 PDT 2023
Author: Simon Pilgrim
Date: 2023-07-22T17:54:57+01:00
New Revision: af32e51a43fb4343f4c407bf1ee051ff78a57494
URL: https://github.com/llvm/llvm-project/commit/af32e51a43fb4343f4c407bf1ee051ff78a57494
DIFF: https://github.com/llvm/llvm-project/commit/af32e51a43fb4343f4c407bf1ee051ff78a57494.diff
LOG: [X86] LowerRotate - manually expand rotate by splat constant patterns.
Fixes issue identified on #63980 where the undef rotate amounts (during widening from v2i32 -> v4i32) were being constant folded to 0 when the shift amounts are created during expansion, losing the splat'd shift amounts.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll
llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 71366db71c3d36..ad6f12f6ca6921 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -32489,8 +32489,18 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
}
// Rotate by an uniform constant - expand back to shifts.
- if (IsCstSplat)
- return SDValue();
+ // TODO: Can't use generic expansion as UNDEF amt elements can be converted
+ // to other values when folded to shift amounts, losing the splat.
+ if (IsCstSplat) {
+ uint64_t RotAmt = CstSplatValue.urem(EltSizeInBits);
+ uint64_t ShlAmt = IsROTL ? RotAmt : (EltSizeInBits - RotAmt);
+ uint64_t SrlAmt = IsROTL ? (EltSizeInBits - RotAmt) : RotAmt;
+ SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, R,
+ DAG.getShiftAmountConstant(ShlAmt, VT, DL));
+ SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, R,
+ DAG.getShiftAmountConstant(SrlAmt, VT, DL));
+ return DAG.getNode(ISD::OR, DL, VT, Shl, Srl);
+ }
// Split 512-bit integers on non 512-bit BWI targets.
if (VT.is512BitVector() && !Subtarget.useBWIRegs())
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll
index 6fe03f54123c3c..825ca727b624ea 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll
@@ -394,34 +394,29 @@ define <2 x i32> @splatconstant_funnnel_v2i32(<2 x i32> %x) nounwind {
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrld $28, %xmm1
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: pslld $4, %xmm2
-; SSE2-NEXT: por %xmm1, %xmm2
-; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
+; SSE2-NEXT: pslld $4, %xmm0
+; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: splatconstant_funnnel_v2i32:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrld $28, %xmm1
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: pslld $4, %xmm2
-; SSE41-NEXT: por %xmm1, %xmm2
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT: pslld $4, %xmm0
+; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: splatconstant_funnnel_v2i32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpsrld $28, %xmm0, %xmm1
-; AVX1-NEXT: vpslld $4, %xmm0, %xmm2
-; AVX1-NEXT: vpor %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; AVX1-NEXT: vpslld $4, %xmm0, %xmm0
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: splatconstant_funnnel_v2i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
-; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpsrld $28, %xmm0, %xmm1
+; AVX2-NEXT: vpslld $4, %xmm0, %xmm0
; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
@@ -473,10 +468,8 @@ define <2 x i32> @splatconstant_funnnel_v2i32(<2 x i32> %x) nounwind {
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
; X86-SSE2-NEXT: psrld $28, %xmm1
-; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
-; X86-SSE2-NEXT: pslld $4, %xmm2
-; X86-SSE2-NEXT: por %xmm1, %xmm2
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
+; X86-SSE2-NEXT: pslld $4, %xmm0
+; X86-SSE2-NEXT: por %xmm1, %xmm0
; X86-SSE2-NEXT: retl
%res = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> <i32 4, i32 4>)
ret <2 x i32> %res
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll
index 72a1422d2b9e01..b8c356711921d0 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll
@@ -408,34 +408,29 @@ define <2 x i32> @splatconstant_funnnel_v2i32(<2 x i32> %x) nounwind {
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrld $4, %xmm1
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: pslld $28, %xmm2
-; SSE2-NEXT: por %xmm1, %xmm2
-; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
+; SSE2-NEXT: pslld $28, %xmm0
+; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: splatconstant_funnnel_v2i32:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrld $4, %xmm1
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: pslld $28, %xmm2
-; SSE41-NEXT: por %xmm1, %xmm2
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT: pslld $28, %xmm0
+; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: splatconstant_funnnel_v2i32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpsrld $4, %xmm0, %xmm1
-; AVX1-NEXT: vpslld $28, %xmm0, %xmm2
-; AVX1-NEXT: vpor %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; AVX1-NEXT: vpslld $28, %xmm0, %xmm0
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: splatconstant_funnnel_v2i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
-; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpsrld $4, %xmm0, %xmm1
+; AVX2-NEXT: vpslld $28, %xmm0, %xmm0
; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
@@ -487,10 +482,8 @@ define <2 x i32> @splatconstant_funnnel_v2i32(<2 x i32> %x) nounwind {
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
; X86-SSE2-NEXT: psrld $4, %xmm1
-; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
-; X86-SSE2-NEXT: pslld $28, %xmm2
-; X86-SSE2-NEXT: por %xmm1, %xmm2
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
+; X86-SSE2-NEXT: pslld $28, %xmm0
+; X86-SSE2-NEXT: por %xmm1, %xmm0
; X86-SSE2-NEXT: retl
%res = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> <i32 4, i32 4>)
ret <2 x i32> %res
More information about the llvm-commits
mailing list