[llvm] X86: Remove LowerToHorizontalOp and modified test case (PR #148477)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 13 08:20:22 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: 黃國庭 (houngkoungting)
<details>
<summary>Changes</summary>
FIX #<!-- -->143000
Remove LowerToHorizontalOp and adjust test case ; all tests pass after the change.
@<!-- -->RKSimon
---
Patch is 205.94 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/148477.diff
6 Files Affected:
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (-118)
- (renamed) llvm/test/Transforms/PhaseOrdering/X86/haddsub-2.ll (+182-13)
- (renamed) llvm/test/Transforms/PhaseOrdering/X86/haddsub-shuf.ll (+273-12)
- (renamed) llvm/test/Transforms/PhaseOrdering/X86/haddsub-undef.ll (+398-9)
- (renamed) llvm/test/Transforms/PhaseOrdering/X86/haddsub.ll (+581-9)
- (renamed) llvm/test/Transforms/PhaseOrdering/X86/phaddsub-undef.ll (+68-10)
``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f8f29b9f2cdc7..677ecf8801e2d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -8569,122 +8569,6 @@ static SDValue getHopForBuildVector(const BuildVectorSDNode *BV,
return DAG.getNode(HOpcode, DL, VT, V0, V1);
}
-/// Lower BUILD_VECTOR to a horizontal add/sub operation if possible.
-static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV, const SDLoc &DL,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
- // We need at least 2 non-undef elements to make this worthwhile by default.
- unsigned NumNonUndefs =
- count_if(BV->op_values(), [](SDValue V) { return !V.isUndef(); });
- if (NumNonUndefs < 2)
- return SDValue();
-
- // There are 4 sets of horizontal math operations distinguished by type:
- // int/FP at 128-bit/256-bit. Each type was introduced with a different
- // subtarget feature. Try to match those "native" patterns first.
- MVT VT = BV->getSimpleValueType(0);
- if (((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget.hasSSE3()) ||
- ((VT == MVT::v8i16 || VT == MVT::v4i32) && Subtarget.hasSSSE3()) ||
- ((VT == MVT::v8f32 || VT == MVT::v4f64) && Subtarget.hasAVX()) ||
- ((VT == MVT::v16i16 || VT == MVT::v8i32) && Subtarget.hasAVX2())) {
- unsigned HOpcode;
- SDValue V0, V1;
- if (isHopBuildVector(BV, DAG, HOpcode, V0, V1))
- return getHopForBuildVector(BV, DL, DAG, HOpcode, V0, V1);
- }
-
- // Try harder to match 256-bit ops by using extract/concat.
- if (!Subtarget.hasAVX() || !VT.is256BitVector())
- return SDValue();
-
- // Count the number of UNDEF operands in the build_vector in input.
- unsigned NumElts = VT.getVectorNumElements();
- unsigned Half = NumElts / 2;
- unsigned NumUndefsLO = 0;
- unsigned NumUndefsHI = 0;
- for (unsigned i = 0, e = Half; i != e; ++i)
- if (BV->getOperand(i)->isUndef())
- NumUndefsLO++;
-
- for (unsigned i = Half, e = NumElts; i != e; ++i)
- if (BV->getOperand(i)->isUndef())
- NumUndefsHI++;
-
- SDValue InVec0, InVec1;
- if (VT == MVT::v8i32 || VT == MVT::v16i16) {
- SDValue InVec2, InVec3;
- unsigned X86Opcode;
- bool CanFold = true;
-
- if (isHorizontalBinOpPart(BV, ISD::ADD, DL, DAG, 0, Half, InVec0, InVec1) &&
- isHorizontalBinOpPart(BV, ISD::ADD, DL, DAG, Half, NumElts, InVec2,
- InVec3) &&
- ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
- ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
- X86Opcode = X86ISD::HADD;
- else if (isHorizontalBinOpPart(BV, ISD::SUB, DL, DAG, 0, Half, InVec0,
- InVec1) &&
- isHorizontalBinOpPart(BV, ISD::SUB, DL, DAG, Half, NumElts, InVec2,
- InVec3) &&
- ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
- ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
- X86Opcode = X86ISD::HSUB;
- else
- CanFold = false;
-
- if (CanFold) {
- // Do not try to expand this build_vector into a pair of horizontal
- // add/sub if we can emit a pair of scalar add/sub.
- if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
- return SDValue();
-
- // Convert this build_vector into a pair of horizontal binops followed by
- // a concat vector. We must adjust the outputs from the partial horizontal
- // matching calls above to account for undefined vector halves.
- SDValue V0 = InVec0.isUndef() ? InVec2 : InVec0;
- SDValue V1 = InVec1.isUndef() ? InVec3 : InVec1;
- assert((!V0.isUndef() || !V1.isUndef()) && "Horizontal-op of undefs?");
- bool isUndefLO = NumUndefsLO == Half;
- bool isUndefHI = NumUndefsHI == Half;
- return ExpandHorizontalBinOp(V0, V1, DL, DAG, X86Opcode, false, isUndefLO,
- isUndefHI);
- }
- }
-
- if (VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 ||
- VT == MVT::v16i16) {
- unsigned X86Opcode;
- if (isHorizontalBinOpPart(BV, ISD::ADD, DL, DAG, 0, NumElts, InVec0,
- InVec1))
- X86Opcode = X86ISD::HADD;
- else if (isHorizontalBinOpPart(BV, ISD::SUB, DL, DAG, 0, NumElts, InVec0,
- InVec1))
- X86Opcode = X86ISD::HSUB;
- else if (isHorizontalBinOpPart(BV, ISD::FADD, DL, DAG, 0, NumElts, InVec0,
- InVec1))
- X86Opcode = X86ISD::FHADD;
- else if (isHorizontalBinOpPart(BV, ISD::FSUB, DL, DAG, 0, NumElts, InVec0,
- InVec1))
- X86Opcode = X86ISD::FHSUB;
- else
- return SDValue();
-
- // Don't try to expand this build_vector into a pair of horizontal add/sub
- // if we can simply emit a pair of scalar add/sub.
- if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
- return SDValue();
-
- // Convert this build_vector into two horizontal add/sub followed by
- // a concat vector.
- bool isUndefLO = NumUndefsLO == Half;
- bool isUndefHI = NumUndefsHI == Half;
- return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, true,
- isUndefLO, isUndefHI);
- }
-
- return SDValue();
-}
-
static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG);
@@ -9270,8 +9154,6 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (SDValue AddSub = lowerToAddSubOrFMAddSub(BV, dl, Subtarget, DAG))
return AddSub;
- if (SDValue HorizontalOp = LowerToHorizontalOp(BV, dl, Subtarget, DAG))
- return HorizontalOp;
if (SDValue Broadcast = lowerBuildVectorAsBroadcast(BV, dl, Subtarget, DAG))
return Broadcast;
if (SDValue BitOp = lowerBuildVectorToBitOp(BV, dl, Subtarget, DAG))
diff --git a/llvm/test/CodeGen/X86/haddsub-2.ll b/llvm/test/Transforms/PhaseOrdering/X86/haddsub-2.ll
similarity index 81%
rename from llvm/test/CodeGen/X86/haddsub-2.ll
rename to llvm/test/Transforms/PhaseOrdering/X86/haddsub-2.ll
index bca446fa8fb56..4eb5bdba9edb6 100644
--- a/llvm/test/CodeGen/X86/haddsub-2.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/haddsub-2.ll
@@ -1,38 +1,39 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+sse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSE3
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+sse3,+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes="default<O3>" -S %s | FileCheck %s
define <4 x float> @hadd_ps_test1(<4 x float> %A, <4 x float> %B) {
-; SSE-LABEL: hadd_ps_test1:
-; SSE: # %bb.0:
-; SSE-NEXT: haddps %xmm1, %xmm0
-; SSE-NEXT: retq
+; CHECK-LABEL: define <4 x float> @hadd_ps_test1(
+; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <4 x float> [[TMP3]]
;
-; AVX-LABEL: hadd_ps_test1:
-; AVX: # %bb.0:
-; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+
%vecext = extractelement <4 x float> %A, i32 0
%vecext1 = extractelement <4 x float> %A, i32 1
%add = fadd float %vecext, %vecext1
%vecinit = insertelement <4 x float> undef, float %add, i32 0
+
%vecext2 = extractelement <4 x float> %A, i32 2
%vecext3 = extractelement <4 x float> %A, i32 3
%add4 = fadd float %vecext2, %vecext3
%vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 1
+
%vecext6 = extractelement <4 x float> %B, i32 0
%vecext7 = extractelement <4 x float> %B, i32 1
%add8 = fadd float %vecext6, %vecext7
%vecinit9 = insertelement <4 x float> %vecinit5, float %add8, i32 2
+
%vecext10 = extractelement <4 x float> %B, i32 2
%vecext11 = extractelement <4 x float> %B, i32 3
%add12 = fadd float %vecext10, %vecext11
%vecinit13 = insertelement <4 x float> %vecinit9, float %add12, i32 3
+
ret <4 x float> %vecinit13
}
+
define <4 x float> @hadd_ps_test2(<4 x float> %A, <4 x float> %B) {
; SSE-LABEL: hadd_ps_test2:
; SSE: # %bb.0:
@@ -43,6 +44,13 @@ define <4 x float> @hadd_ps_test2(<4 x float> %A, <4 x float> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <4 x float> @hadd_ps_test2(
+; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <4 x float> [[TMP3]]
+;
%vecext = extractelement <4 x float> %A, i32 2
%vecext1 = extractelement <4 x float> %A, i32 3
%add = fadd float %vecext, %vecext1
@@ -72,6 +80,13 @@ define <4 x float> @hsub_ps_test1(<4 x float> %A, <4 x float> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vhsubps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <4 x float> @hsub_ps_test1(
+; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <4 x float> [[TMP3]]
+;
%vecext = extractelement <4 x float> %A, i32 0
%vecext1 = extractelement <4 x float> %A, i32 1
%sub = fsub float %vecext, %vecext1
@@ -101,6 +116,13 @@ define <4 x float> @hsub_ps_test2(<4 x float> %A, <4 x float> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vhsubps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <4 x float> @hsub_ps_test2(
+; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <4 x float> [[TMP3]]
+;
%vecext = extractelement <4 x float> %A, i32 2
%vecext1 = extractelement <4 x float> %A, i32 3
%sub = fsub float %vecext, %vecext1
@@ -159,6 +181,13 @@ define <4 x i32> @phadd_d_test1(<4 x i32> %A, <4 x i32> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vphaddd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <4 x i32> @phadd_d_test1(
+; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
%vecext = extractelement <4 x i32> %A, i32 0
%vecext1 = extractelement <4 x i32> %A, i32 1
%add = add i32 %vecext, %vecext1
@@ -217,6 +246,13 @@ define <4 x i32> @phadd_d_test2(<4 x i32> %A, <4 x i32> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vphaddd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <4 x i32> @phadd_d_test2(
+; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 2, i32 5, i32 7>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 4, i32 6>
+; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
%vecext = extractelement <4 x i32> %A, i32 2
%vecext1 = extractelement <4 x i32> %A, i32 3
%add = add i32 %vecext, %vecext1
@@ -275,6 +311,13 @@ define <4 x i32> @phsub_d_test1(<4 x i32> %A, <4 x i32> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vphsubd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <4 x i32> @phsub_d_test1(
+; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
%vecext = extractelement <4 x i32> %A, i32 0
%vecext1 = extractelement <4 x i32> %A, i32 1
%sub = sub i32 %vecext, %vecext1
@@ -333,6 +376,13 @@ define <4 x i32> @phsub_d_test2(<4 x i32> %A, <4 x i32> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vphsubd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <4 x i32> @phsub_d_test2(
+; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
%vecext = extractelement <4 x i32> %A, i32 2
%vecext1 = extractelement <4 x i32> %A, i32 3
%sub = sub i32 %vecext, %vecext1
@@ -362,6 +412,13 @@ define <2 x double> @hadd_pd_test1(<2 x double> %A, <2 x double> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <2 x double> @hadd_pd_test1(
+; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: [[VECINIT2:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <2 x double> [[VECINIT2]]
+;
%vecext = extractelement <2 x double> %A, i32 0
%vecext1 = extractelement <2 x double> %A, i32 1
%add = fadd double %vecext, %vecext1
@@ -383,6 +440,13 @@ define <2 x double> @hadd_pd_test2(<2 x double> %A, <2 x double> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <2 x double> @hadd_pd_test2(
+; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: [[VECINIT2:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <2 x double> [[VECINIT2]]
+;
%vecext = extractelement <2 x double> %A, i32 1
%vecext1 = extractelement <2 x double> %A, i32 0
%add = fadd double %vecext, %vecext1
@@ -404,6 +468,13 @@ define <2 x double> @hsub_pd_test1(<2 x double> %A, <2 x double> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vhsubpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <2 x double> @hsub_pd_test1(
+; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: [[VECINIT2:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <2 x double> [[VECINIT2]]
+;
%vecext = extractelement <2 x double> %A, i32 0
%vecext1 = extractelement <2 x double> %A, i32 1
%sub = fsub double %vecext, %vecext1
@@ -425,6 +496,13 @@ define <2 x double> @hsub_pd_test2(<2 x double> %A, <2 x double> %B) {
; AVX: # %bb.0:
; AVX-NEXT: vhsubpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+; CHECK-LABEL: define <2 x double> @hsub_pd_test2(
+; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: [[VECINIT2:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <2 x double> [[VECINIT2]]
+;
%vecext = extractelement <2 x double> %B, i32 0
%vecext1 = extractelement <2 x double> %B, i32 1
%sub = fsub double %vecext, %vecext1
@@ -456,6 +534,13 @@ define <4 x double> @avx_vhadd_pd_test(<4 x double> %A, <4 x double> %B) {
; AVX2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
; AVX2-NEXT: retq
+; CHECK-LABEL: define <4 x double> @avx_vhadd_pd_test(
+; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <4 x double> [[TMP3]]
+;
%vecext = extractelement <4 x double> %A, i32 0
%vecext1 = extractelement <4 x double> %A, i32 1
%add = fadd double %vecext, %vecext1
@@ -495,6 +580,13 @@ define <4 x double> @avx_vhsub_pd_test(<4 x double> %A, <4 x double> %B) {
; AVX2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
; AVX2-NEXT: retq
+; CHECK-LABEL: define <4 x double> @avx_vhsub_pd_test(
+; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = fsub <4 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <4 x double> [[TMP3]]
+;
%vecext = extractelement <4 x double> %A, i32 0
%vecext1 = extractelement <4 x double> %A, i32 1
%sub = fsub double %vecext, %vecext1
@@ -590,6 +682,13 @@ define <8 x i32> @avx2_vphadd_d_test(<8 x i32> %A, <8 x i32> %B) {
; AVX2-NEXT: vphaddd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
; AVX2-NEXT: retq
+; CHECK-LABEL: define <8 x i32> @avx2_vphadd_d_test(
+; CHECK-SAME: <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <8 x i32> [[TMP3]]
+;
%vecext = extractelement <8 x i32> %A, i32 0
%vecext1 = extractelement <8 x i32> %A, i32 1
...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/148477
More information about the llvm-commits
mailing list