[llvm] [X86] Attempt to use VPMADD52L/VPMULUDQ instead of VPMULLQ on slow VPMULLQ targets (or when VPMULLQ is unavailable) (PR #171760)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 10 21:58:48 PST 2025
https://github.com/houngkoungting updated https://github.com/llvm/llvm-project/pull/171760
>From 80e303c6e0976d8c2437a806679a54d5919c5917 Mon Sep 17 00:00:00 2001
From: william <we3223 at gmail.com>
Date: Wed, 6 Aug 2025 16:17:48 +0800
Subject: [PATCH 01/17] [DAG] Fold trunc(avg(x,y)) for avgceil/floor u/s nodes
if they have sufficient leading zero/sign bits-1
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 45 +++++++++++++++++++
llvm/test/CodeGen/AArch64/trunc-avg-fold.ll | 43 ++++++++++++++++++
2 files changed, 88 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d70e96938ed9a..9ff256f8090ba 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16294,6 +16294,51 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// because targets may prefer a wider type during later combines and invert
// this transform.
switch (N0.getOpcode()) {
+ case ISD::AVGCEILU:
+ case ISD::AVGFLOORU:
+ if (!LegalOperations && N0.hasOneUse() &&
+ TLI.isOperationLegal(N0.getOpcode(), VT)) {
+ SDValue X = N0.getOperand(0);
+ SDValue Y = N0.getOperand(1);
+
+ KnownBits KnownX = DAG.computeKnownBits(X);
+ KnownBits KnownY = DAG.computeKnownBits(Y);
+
+ unsigned SrcBits = X.getScalarValueSizeInBits();
+ unsigned DstBits = VT.getScalarSizeInBits();
+ unsigned NeededLeadingZeros = SrcBits - DstBits + 1;
+
+ if (KnownX.countMinLeadingZeros() >= NeededLeadingZeros &&
+ KnownY.countMinLeadingZeros() >= NeededLeadingZeros) {
+ SDValue Tx = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
+ SDValue Ty = DAG.getNode(ISD::TRUNCATE, DL, VT, Y);
+ return DAG.getNode(N0.getOpcode(), DL, VT, Tx, Ty);
+ }
+ }
+ break;
+
+ case ISD::AVGCEILS:
+ case ISD::AVGFLOORS:
+ if (!LegalOperations && N0.hasOneUse() &&
+ TLI.isOperationLegal(N0.getOpcode(), VT)) {
+ SDValue X = N0.getOperand(0);
+ SDValue Y = N0.getOperand(1);
+
+ unsigned SignBitsX = DAG.ComputeNumSignBits(X);
+ unsigned SignBitsY = DAG.ComputeNumSignBits(Y);
+
+ unsigned SrcBits = X.getScalarValueSizeInBits();
+ unsigned DstBits = VT.getScalarSizeInBits();
+ unsigned NeededSignBits = SrcBits - DstBits + 1;
+
+ if (SignBitsX >= NeededSignBits && SignBitsY >= NeededSignBits) {
+ SDValue Tx = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
+ SDValue Ty = DAG.getNode(ISD::TRUNCATE, DL, VT, Y);
+ return DAG.getNode(N0.getOpcode(), DL, VT, Tx, Ty);
+ }
+ }
+ break;
+
case ISD::ADD:
case ISD::SUB:
case ISD::MUL:
diff --git a/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll b/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
new file mode 100644
index 0000000000000..175f54d6f9c05
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
@@ -0,0 +1,43 @@
+; RUN: llc -mtriple=aarch64-- -O2 -mattr=+neon < %s | FileCheck %s
+
+; CHECK-LABEL: test_avgceil_u
+; CHECK: uhadd v0.8b, v0.8b, v1.8b
+define <8 x i8> @test_avgceil_u(<8 x i16> %a, <8 x i16> %b) {
+ %ta = trunc <8 x i16> %a to <8 x i8>
+ %tb = trunc <8 x i16> %b to <8 x i8>
+ %res = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %ta, <8 x i8> %tb)
+ ret <8 x i8> %res
+}
+
+; CHECK-LABEL: test_avgceil_s
+; CHECK: shadd v0.8b, v0.8b, v1.8b
+define <8 x i8> @test_avgceil_s(<8 x i16> %a, <8 x i16> %b) {
+ %ta = trunc <8 x i16> %a to <8 x i8>
+ %tb = trunc <8 x i16> %b to <8 x i8>
+ %res = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %ta, <8 x i8> %tb)
+ ret <8 x i8> %res
+}
+
+; CHECK-LABEL: test_avgfloor_u
+; CHECK: urhadd v0.8b, v0.8b, v1.8b
+define <8 x i8> @test_avgfloor_u(<8 x i16> %a, <8 x i16> %b) {
+ %ta = trunc <8 x i16> %a to <8 x i8>
+ %tb = trunc <8 x i16> %b to <8 x i8>
+ %res = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %ta, <8 x i8> %tb)
+ ret <8 x i8> %res
+}
+
+; CHECK-LABEL: test_avgfloor_s
+; CHECK: srhadd v0.8b, v0.8b, v1.8b
+define <8 x i8> @test_avgfloor_s(<8 x i16> %a, <8 x i16> %b) {
+ %ta = trunc <8 x i16> %a to <8 x i8>
+ %tb = trunc <8 x i16> %b to <8 x i8>
+ %res = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %ta, <8 x i8> %tb)
+ ret <8 x i8> %res
+}
+
+declare <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8>, <8 x i8>)
+declare <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>)
+declare <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8>, <8 x i8>)
+declare <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8>, <8 x i8>)
+
>From 24287f7f08d3bd238761b6e798aee655af931c3d Mon Sep 17 00:00:00 2001
From: william <we3223 at gmail.com>
Date: Fri, 8 Aug 2025 00:04:32 +0800
Subject: [PATCH 02/17] [DAG] Fold trunc(avg(x,y)) for avgceil/floor u/s nodes
if they have sufficient leading zero/sign bits -2
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 16 +---
llvm/test/CodeGen/AArch64/trunc-avg-fold.ll | 96 +++++++++++++++----
2 files changed, 79 insertions(+), 33 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9ff256f8090ba..0cba06215d3fe 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16300,37 +16300,28 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
TLI.isOperationLegal(N0.getOpcode(), VT)) {
SDValue X = N0.getOperand(0);
SDValue Y = N0.getOperand(1);
-
- KnownBits KnownX = DAG.computeKnownBits(X);
- KnownBits KnownY = DAG.computeKnownBits(Y);
-
unsigned SrcBits = X.getScalarValueSizeInBits();
unsigned DstBits = VT.getScalarSizeInBits();
- unsigned NeededLeadingZeros = SrcBits - DstBits + 1;
-
- if (KnownX.countMinLeadingZeros() >= NeededLeadingZeros &&
- KnownY.countMinLeadingZeros() >= NeededLeadingZeros) {
+ unsigned MaxBitsX = DAG.ComputeMaxSignificantBits(X);
+ unsigned MaxBitsY = DAG.ComputeMaxSignificantBits(Y);
+ if (MaxBitsX <= DstBits && MaxBitsY <= DstBits) {
SDValue Tx = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
SDValue Ty = DAG.getNode(ISD::TRUNCATE, DL, VT, Y);
return DAG.getNode(N0.getOpcode(), DL, VT, Tx, Ty);
}
}
break;
-
case ISD::AVGCEILS:
case ISD::AVGFLOORS:
if (!LegalOperations && N0.hasOneUse() &&
TLI.isOperationLegal(N0.getOpcode(), VT)) {
SDValue X = N0.getOperand(0);
SDValue Y = N0.getOperand(1);
-
unsigned SignBitsX = DAG.ComputeNumSignBits(X);
unsigned SignBitsY = DAG.ComputeNumSignBits(Y);
-
unsigned SrcBits = X.getScalarValueSizeInBits();
unsigned DstBits = VT.getScalarSizeInBits();
unsigned NeededSignBits = SrcBits - DstBits + 1;
-
if (SignBitsX >= NeededSignBits && SignBitsY >= NeededSignBits) {
SDValue Tx = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
SDValue Ty = DAG.getNode(ISD::TRUNCATE, DL, VT, Y);
@@ -16338,7 +16329,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
}
}
break;
-
case ISD::ADD:
case ISD::SUB:
case ISD::MUL:
diff --git a/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll b/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
index 175f54d6f9c05..db40746776d43 100644
--- a/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
+++ b/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
@@ -1,38 +1,91 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=aarch64-- -O2 -mattr=+neon < %s | FileCheck %s
-; CHECK-LABEL: test_avgceil_u
-; CHECK: uhadd v0.8b, v0.8b, v1.8b
+
define <8 x i8> @test_avgceil_u(<8 x i16> %a, <8 x i16> %b) {
- %ta = trunc <8 x i16> %a to <8 x i8>
- %tb = trunc <8 x i16> %b to <8 x i8>
- %res = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %ta, <8 x i8> %tb)
+; CHECK-LABEL: test_avgceil_u:
+; CHECK: // %bb.0:
+; CHECK-NEXT: xtn v0.8b, v0.8h
+; CHECK-NEXT: xtn v1.8b, v1.8h
+; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: ret
+
+ %mask = insertelement <8 x i16> undef, i16 255, i32 0
+ %mask.splat = shufflevector <8 x i16> %mask, <8 x i16> undef, <8 x i32> zeroinitializer
+ %ta16 = and <8 x i16> %a, %mask.splat
+ %tb16 = and <8 x i16> %b, %mask.splat
+ %ta8 = trunc <8 x i16> %ta16 to <8 x i8>
+ %tb8 = trunc <8 x i16> %tb16 to <8 x i8>
+ %res = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %ta8, <8 x i8> %tb8)
ret <8 x i8> %res
}
-; CHECK-LABEL: test_avgceil_s
-; CHECK: shadd v0.8b, v0.8b, v1.8b
+
define <8 x i8> @test_avgceil_s(<8 x i16> %a, <8 x i16> %b) {
- %ta = trunc <8 x i16> %a to <8 x i8>
- %tb = trunc <8 x i16> %b to <8 x i8>
- %res = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %ta, <8 x i8> %tb)
+; CHECK-LABEL: test_avgceil_s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqxtn v0.8b, v0.8h
+; CHECK-NEXT: sqxtn v1.8b, v1.8h
+; CHECK-NEXT: shadd v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: ret
+
+ %smin = insertelement <8 x i16> undef, i16 -128, i32 0
+ %smax = insertelement <8 x i16> undef, i16 127, i32 0
+ %min = shufflevector <8 x i16> %smin, <8 x i16> undef, <8 x i32> zeroinitializer
+ %max = shufflevector <8 x i16> %smax, <8 x i16> undef, <8 x i32> zeroinitializer
+
+ %ta16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> %max)
+ %ta16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %ta16, <8 x i16> %min)
+ %tb16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %b, <8 x i16> %max)
+ %tb16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %tb16, <8 x i16> %min)
+
+ %ta8 = trunc <8 x i16> %ta16.clamped to <8 x i8>
+ %tb8 = trunc <8 x i16> %tb16.clamped to <8 x i8>
+ %res = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %ta8, <8 x i8> %tb8)
ret <8 x i8> %res
}
-; CHECK-LABEL: test_avgfloor_u
-; CHECK: urhadd v0.8b, v0.8b, v1.8b
+
define <8 x i8> @test_avgfloor_u(<8 x i16> %a, <8 x i16> %b) {
- %ta = trunc <8 x i16> %a to <8 x i8>
- %tb = trunc <8 x i16> %b to <8 x i8>
- %res = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %ta, <8 x i8> %tb)
+; CHECK-LABEL: test_avgfloor_u:
+; CHECK: // %bb.0:
+; CHECK-NEXT: xtn v0.8b, v0.8h
+; CHECK-NEXT: xtn v1.8b, v1.8h
+; CHECK-NEXT: urhadd v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: ret
+
+ %mask = insertelement <8 x i16> undef, i16 255, i32 0
+ %mask.splat = shufflevector <8 x i16> %mask, <8 x i16> undef, <8 x i32> zeroinitializer
+ %ta16 = and <8 x i16> %a, %mask.splat
+ %tb16 = and <8 x i16> %b, %mask.splat
+ %ta8 = trunc <8 x i16> %ta16 to <8 x i8>
+ %tb8 = trunc <8 x i16> %tb16 to <8 x i8>
+ %res = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %ta8, <8 x i8> %tb8)
ret <8 x i8> %res
}
-; CHECK-LABEL: test_avgfloor_s
-; CHECK: srhadd v0.8b, v0.8b, v1.8b
+
define <8 x i8> @test_avgfloor_s(<8 x i16> %a, <8 x i16> %b) {
- %ta = trunc <8 x i16> %a to <8 x i8>
- %tb = trunc <8 x i16> %b to <8 x i8>
- %res = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %ta, <8 x i8> %tb)
+; CHECK-LABEL: test_avgfloor_s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqxtn v0.8b, v0.8h
+; CHECK-NEXT: sqxtn v1.8b, v1.8h
+; CHECK-NEXT: srhadd v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: ret
+
+ %smin = insertelement <8 x i16> undef, i16 -128, i32 0
+ %smax = insertelement <8 x i16> undef, i16 127, i32 0
+ %min = shufflevector <8 x i16> %smin, <8 x i16> undef, <8 x i32> zeroinitializer
+ %max = shufflevector <8 x i16> %smax, <8 x i16> undef, <8 x i32> zeroinitializer
+
+ %ta16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> %max)
+ %ta16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %ta16, <8 x i16> %min)
+ %tb16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %b, <8 x i16> %max)
+ %tb16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %tb16, <8 x i16> %min)
+
+ %ta8 = trunc <8 x i16> %ta16.clamped to <8 x i8>
+ %tb8 = trunc <8 x i16> %tb16.clamped to <8 x i8>
+ %res = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %ta8, <8 x i8> %tb8)
ret <8 x i8> %res
}
@@ -41,3 +94,6 @@ declare <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>)
declare <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8>, <8 x i8>)
declare <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8>, <8 x i8>)
+declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>)
+
>From c8cc2a98030154d6a95154d8fe8d7461cfb0daf4 Mon Sep 17 00:00:00 2001
From: william <we3223 at gmail.com>
Date: Fri, 8 Aug 2025 10:51:33 +0800
Subject: [PATCH 03/17] [DAG] Fold trunc(avg(x,y)) for avgceil/floor u/s nodes
if they have sufficient leading zero/sign bits -3
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 8 +++++---
llvm/test/CodeGen/AArch64/trunc-avg-fold.ll | 20 +++++++++----------
2 files changed, 15 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0cba06215d3fe..7aea288c03208 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16302,9 +16302,10 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDValue Y = N0.getOperand(1);
unsigned SrcBits = X.getScalarValueSizeInBits();
unsigned DstBits = VT.getScalarSizeInBits();
- unsigned MaxBitsX = DAG.ComputeMaxSignificantBits(X);
- unsigned MaxBitsY = DAG.ComputeMaxSignificantBits(Y);
- if (MaxBitsX <= DstBits && MaxBitsY <= DstBits) {
+ KnownBits KnownX = DAG.computeKnownBits(X);
+ KnownBits KnownY = DAG.computeKnownBits(Y);
+ if (KnownX.countMinLeadingZeros() >= (SrcBits - DstBits) &&
+ KnownY.countMinLeadingZeros() >= (SrcBits - DstBits)) {
SDValue Tx = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
SDValue Ty = DAG.getNode(ISD::TRUNCATE, DL, VT, Y);
return DAG.getNode(N0.getOpcode(), DL, VT, Tx, Ty);
@@ -16322,6 +16323,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
unsigned SrcBits = X.getScalarValueSizeInBits();
unsigned DstBits = VT.getScalarSizeInBits();
unsigned NeededSignBits = SrcBits - DstBits + 1;
+
if (SignBitsX >= NeededSignBits && SignBitsY >= NeededSignBits) {
SDValue Tx = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
SDValue Ty = DAG.getNode(ISD::TRUNCATE, DL, VT, Y);
diff --git a/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll b/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
index db40746776d43..ede39e237a9c9 100644
--- a/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
+++ b/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
@@ -10,8 +10,8 @@ define <8 x i8> @test_avgceil_u(<8 x i16> %a, <8 x i16> %b) {
; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
- %mask = insertelement <8 x i16> undef, i16 255, i32 0
- %mask.splat = shufflevector <8 x i16> %mask, <8 x i16> undef, <8 x i32> zeroinitializer
+ %mask = insertelement <8 x i16> poison, i16 255, i32 0
+ %mask.splat = shufflevector <8 x i16> %mask, <8 x i16> poison, <8 x i32> zeroinitializer
%ta16 = and <8 x i16> %a, %mask.splat
%tb16 = and <8 x i16> %b, %mask.splat
%ta8 = trunc <8 x i16> %ta16 to <8 x i8>
@@ -29,10 +29,10 @@ define <8 x i8> @test_avgceil_s(<8 x i16> %a, <8 x i16> %b) {
; CHECK-NEXT: shadd v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
- %smin = insertelement <8 x i16> undef, i16 -128, i32 0
- %smax = insertelement <8 x i16> undef, i16 127, i32 0
- %min = shufflevector <8 x i16> %smin, <8 x i16> undef, <8 x i32> zeroinitializer
- %max = shufflevector <8 x i16> %smax, <8 x i16> undef, <8 x i32> zeroinitializer
+ %smin = insertelement <8 x i16> poison, i16 -128, i32 0
+ %smax = insertelement <8 x i16> poison, i16 127, i32 0
+ %min = shufflevector <8 x i16> %smin, <8 x i16> poison, <8 x i32> zeroinitializer
+ %max = shufflevector <8 x i16> %smax, <8 x i16> poison, <8 x i32> zeroinitializer
%ta16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> %max)
%ta16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %ta16, <8 x i16> %min)
@@ -73,10 +73,10 @@ define <8 x i8> @test_avgfloor_s(<8 x i16> %a, <8 x i16> %b) {
; CHECK-NEXT: srhadd v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
- %smin = insertelement <8 x i16> undef, i16 -128, i32 0
- %smax = insertelement <8 x i16> undef, i16 127, i32 0
- %min = shufflevector <8 x i16> %smin, <8 x i16> undef, <8 x i32> zeroinitializer
- %max = shufflevector <8 x i16> %smax, <8 x i16> undef, <8 x i32> zeroinitializer
+ %smin = insertelement <8 x i16> poison, i16 -128, i32 0
+ %smax = insertelement <8 x i16> poison, i16 127, i32 0
+ %min = shufflevector <8 x i16> %smin, <8 x i16> poison, <8 x i32> zeroinitializer
+ %max = shufflevector <8 x i16> %smax, <8 x i16> poison, <8 x i32> zeroinitializer
%ta16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> %max)
%ta16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %ta16, <8 x i16> %min)
>From 11152562f1255a4fcd60404d1e08ca80bf422090 Mon Sep 17 00:00:00 2001
From: william <we3223 at gmail.com>
Date: Fri, 8 Aug 2025 11:40:46 +0800
Subject: [PATCH 04/17] [DAG] Fold trunc(avg(x,y)) for avgceil/floor u/s nodes
if they have sufficient leading zero/sign bits-4
---
llvm/test/CodeGen/AArch64/trunc-avg-fold.ll | 59 ++++++++-------------
1 file changed, 22 insertions(+), 37 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll b/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
index ede39e237a9c9..4d4e828a751bd 100644
--- a/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
+++ b/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
@@ -1,7 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=aarch64-- -O2 -mattr=+neon < %s | FileCheck %s
-
define <8 x i8> @test_avgceil_u(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_avgceil_u:
; CHECK: // %bb.0:
@@ -9,7 +8,6 @@ define <8 x i8> @test_avgceil_u(<8 x i16> %a, <8 x i16> %b) {
; CHECK-NEXT: xtn v1.8b, v1.8h
; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
-
%mask = insertelement <8 x i16> poison, i16 255, i32 0
%mask.splat = shufflevector <8 x i16> %mask, <8 x i16> poison, <8 x i32> zeroinitializer
%ta16 = and <8 x i16> %a, %mask.splat
@@ -20,7 +18,6 @@ define <8 x i8> @test_avgceil_u(<8 x i16> %a, <8 x i16> %b) {
ret <8 x i8> %res
}
-
define <8 x i8> @test_avgceil_s(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_avgceil_s:
; CHECK: // %bb.0:
@@ -28,72 +25,60 @@ define <8 x i8> @test_avgceil_s(<8 x i16> %a, <8 x i16> %b) {
; CHECK-NEXT: sqxtn v1.8b, v1.8h
; CHECK-NEXT: shadd v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
-
- %smin = insertelement <8 x i16> poison, i16 -128, i32 0
- %smax = insertelement <8 x i16> poison, i16 127, i32 0
- %min = shufflevector <8 x i16> %smin, <8 x i16> poison, <8 x i32> zeroinitializer
- %max = shufflevector <8 x i16> %smax, <8 x i16> poison, <8 x i32> zeroinitializer
-
- %ta16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> %max)
- %ta16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %ta16, <8 x i16> %min)
- %tb16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %b, <8 x i16> %max)
- %tb16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %tb16, <8 x i16> %min)
-
+ %min = insertelement <8 x i16> poison, i16 -128, i32 0
+ %min.splat = shufflevector <8 x i16> %min, <8 x i16> poison, <8 x i32> zeroinitializer
+ %max = insertelement <8 x i16> poison, i16 127, i32 0
+ %max.splat = shufflevector <8 x i16> %max, <8 x i16> poison, <8 x i32> zeroinitializer
+ %ta16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> %max.splat)
+ %ta16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %ta16, <8 x i16> %min.splat)
+ %tb16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %b, <8 x i16> %max.splat)
+ %tb16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %tb16, <8 x i16> %min.splat)
%ta8 = trunc <8 x i16> %ta16.clamped to <8 x i8>
%tb8 = trunc <8 x i16> %tb16.clamped to <8 x i8>
%res = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %ta8, <8 x i8> %tb8)
ret <8 x i8> %res
}
-
define <8 x i8> @test_avgfloor_u(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_avgfloor_u:
; CHECK: // %bb.0:
; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: xtn v1.8b, v1.8h
-; CHECK-NEXT: urhadd v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
-
- %mask = insertelement <8 x i16> undef, i16 255, i32 0
- %mask.splat = shufflevector <8 x i16> %mask, <8 x i16> undef, <8 x i32> zeroinitializer
+ %mask = insertelement <8 x i16> poison, i16 255, i32 0
+ %mask.splat = shufflevector <8 x i16> %mask, <8 x i16> poison, <8 x i32> zeroinitializer
%ta16 = and <8 x i16> %a, %mask.splat
%tb16 = and <8 x i16> %b, %mask.splat
%ta8 = trunc <8 x i16> %ta16 to <8 x i8>
%tb8 = trunc <8 x i16> %tb16 to <8 x i8>
- %res = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %ta8, <8 x i8> %tb8)
+ %res = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %ta8, <8 x i8> %tb8)
ret <8 x i8> %res
}
-
define <8 x i8> @test_avgfloor_s(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_avgfloor_s:
; CHECK: // %bb.0:
; CHECK-NEXT: sqxtn v0.8b, v0.8h
; CHECK-NEXT: sqxtn v1.8b, v1.8h
-; CHECK-NEXT: srhadd v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: shadd v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
-
- %smin = insertelement <8 x i16> poison, i16 -128, i32 0
- %smax = insertelement <8 x i16> poison, i16 127, i32 0
- %min = shufflevector <8 x i16> %smin, <8 x i16> poison, <8 x i32> zeroinitializer
- %max = shufflevector <8 x i16> %smax, <8 x i16> poison, <8 x i32> zeroinitializer
-
- %ta16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> %max)
- %ta16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %ta16, <8 x i16> %min)
- %tb16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %b, <8 x i16> %max)
- %tb16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %tb16, <8 x i16> %min)
-
+ %min = insertelement <8 x i16> poison, i16 -128, i32 0
+ %min.splat = shufflevector <8 x i16> %min, <8 x i16> poison, <8 x i32> zeroinitializer
+ %max = insertelement <8 x i16> poison, i16 127, i32 0
+ %max.splat = shufflevector <8 x i16> %max, <8 x i16> poison, <8 x i32> zeroinitializer
+ %ta16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> %max.splat)
+ %ta16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %ta16, <8 x i16> %min.splat)
+ %tb16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %b, <8 x i16> %max.splat)
+ %tb16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %tb16, <8 x i16> %min.splat)
%ta8 = trunc <8 x i16> %ta16.clamped to <8 x i8>
%tb8 = trunc <8 x i16> %tb16.clamped to <8 x i8>
- %res = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %ta8, <8 x i8> %tb8)
+ %res = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %ta8, <8 x i8> %tb8)
ret <8 x i8> %res
}
declare <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8>, <8 x i8>)
declare <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8>, <8 x i8>)
-
declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>)
>From 08138a2fde9896a580d11a2b4249eea86d42fefe Mon Sep 17 00:00:00 2001
From: william <we3223 at gmail.com>
Date: Fri, 8 Aug 2025 12:55:44 +0800
Subject: [PATCH 05/17] [DAG] Fold trunc(avg(x,y)) for avgceil/floor u/s nodes
if they have sufficient leading zero/sign bits-5
---
llvm/test/CodeGen/AArch64/trunc-avg-fold.ll | 52 ++++++++++++---------
1 file changed, 30 insertions(+), 22 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll b/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
index 4d4e828a751bd..36fddedd78df6 100644
--- a/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
+++ b/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
@@ -4,26 +4,31 @@
define <8 x i8> @test_avgceil_u(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_avgceil_u:
; CHECK: // %bb.0:
+; CHECK-NEXT: bic v0.8h, #255, lsl #8
+; CHECK-NEXT: bic v1.8h, #255, lsl #8
+; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: xtn v0.8b, v0.8h
-; CHECK-NEXT: xtn v1.8b, v1.8h
-; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%mask = insertelement <8 x i16> poison, i16 255, i32 0
%mask.splat = shufflevector <8 x i16> %mask, <8 x i16> poison, <8 x i32> zeroinitializer
%ta16 = and <8 x i16> %a, %mask.splat
%tb16 = and <8 x i16> %b, %mask.splat
- %ta8 = trunc <8 x i16> %ta16 to <8 x i8>
- %tb8 = trunc <8 x i16> %tb16 to <8 x i8>
- %res = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %ta8, <8 x i8> %tb8)
+ %avg16 = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %ta16, <8 x i16> %tb16)
+ %res = trunc <8 x i16> %avg16 to <8 x i8>
ret <8 x i8> %res
}
define <8 x i8> @test_avgceil_s(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_avgceil_s:
; CHECK: // %bb.0:
-; CHECK-NEXT: sqxtn v0.8b, v0.8h
-; CHECK-NEXT: sqxtn v1.8b, v1.8h
-; CHECK-NEXT: shadd v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: movi v2.8h, #127
+; CHECK-NEXT: mvni v3.8h, #127
+; CHECK-NEXT: smin v0.8h, v0.8h, v2.8h
+; CHECK-NEXT: smin v1.8h, v1.8h, v2.8h
+; CHECK-NEXT: smax v0.8h, v0.8h, v3.8h
+; CHECK-NEXT: smax v1.8h, v1.8h, v3.8h
+; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: ret
%min = insertelement <8 x i16> poison, i16 -128, i32 0
%min.splat = shufflevector <8 x i16> %min, <8 x i16> poison, <8 x i32> zeroinitializer
@@ -33,35 +38,39 @@ define <8 x i8> @test_avgceil_s(<8 x i16> %a, <8 x i16> %b) {
%ta16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %ta16, <8 x i16> %min.splat)
%tb16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %b, <8 x i16> %max.splat)
%tb16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %tb16, <8 x i16> %min.splat)
- %ta8 = trunc <8 x i16> %ta16.clamped to <8 x i8>
- %tb8 = trunc <8 x i16> %tb16.clamped to <8 x i8>
- %res = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %ta8, <8 x i8> %tb8)
+ %avg16 = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %ta16.clamped, <8 x i16> %tb16.clamped)
+ %res = trunc <8 x i16> %avg16 to <8 x i8>
ret <8 x i8> %res
}
define <8 x i8> @test_avgfloor_u(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_avgfloor_u:
; CHECK: // %bb.0:
+; CHECK-NEXT: bic v0.8h, #255, lsl #8
+; CHECK-NEXT: bic v1.8h, #255, lsl #8
+; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: xtn v0.8b, v0.8h
-; CHECK-NEXT: xtn v1.8b, v1.8h
-; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%mask = insertelement <8 x i16> poison, i16 255, i32 0
%mask.splat = shufflevector <8 x i16> %mask, <8 x i16> poison, <8 x i32> zeroinitializer
%ta16 = and <8 x i16> %a, %mask.splat
%tb16 = and <8 x i16> %b, %mask.splat
- %ta8 = trunc <8 x i16> %ta16 to <8 x i8>
- %tb8 = trunc <8 x i16> %tb16 to <8 x i8>
- %res = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %ta8, <8 x i8> %tb8)
+ %avg16 = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %ta16, <8 x i16> %tb16)
+ %res = trunc <8 x i16> %avg16 to <8 x i8>
ret <8 x i8> %res
}
define <8 x i8> @test_avgfloor_s(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_avgfloor_s:
; CHECK: // %bb.0:
-; CHECK-NEXT: sqxtn v0.8b, v0.8h
-; CHECK-NEXT: sqxtn v1.8b, v1.8h
-; CHECK-NEXT: shadd v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: movi v2.8h, #127
+; CHECK-NEXT: mvni v3.8h, #127
+; CHECK-NEXT: smin v0.8h, v0.8h, v2.8h
+; CHECK-NEXT: smin v1.8h, v1.8h, v2.8h
+; CHECK-NEXT: smax v0.8h, v0.8h, v3.8h
+; CHECK-NEXT: smax v1.8h, v1.8h, v3.8h
+; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: ret
%min = insertelement <8 x i16> poison, i16 -128, i32 0
%min.splat = shufflevector <8 x i16> %min, <8 x i16> poison, <8 x i32> zeroinitializer
@@ -71,9 +80,8 @@ define <8 x i8> @test_avgfloor_s(<8 x i16> %a, <8 x i16> %b) {
%ta16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %ta16, <8 x i16> %min.splat)
%tb16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %b, <8 x i16> %max.splat)
%tb16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %tb16, <8 x i16> %min.splat)
- %ta8 = trunc <8 x i16> %ta16.clamped to <8 x i8>
- %tb8 = trunc <8 x i16> %tb16.clamped to <8 x i8>
- %res = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %ta8, <8 x i8> %tb8)
+ %avg16 = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %ta16.clamped, <8 x i16> %tb16.clamped)
+ %res = trunc <8 x i16> %avg16 to <8 x i8>
ret <8 x i8> %res
}
>From 728b37db85a9821aec9931af00a8338ae9d7c95e Mon Sep 17 00:00:00 2001
From: william <we3223 at gmail.com>
Date: Fri, 8 Aug 2025 13:05:41 +0800
Subject: [PATCH 06/17] [DAG] Fold trunc(avg(x,y)) for avgceil/floor u/s nodes
if they have sufficient leading zero/sign bits-6
---
llvm/test/CodeGen/AArch64/trunc-avg-fold.ll | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll b/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
index 36fddedd78df6..24a1e6f60c078 100644
--- a/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
+++ b/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
@@ -85,8 +85,8 @@ define <8 x i8> @test_avgfloor_s(<8 x i16> %a, <8 x i16> %b) {
ret <8 x i8> %res
}
-declare <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>)
+declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>)
>From 44609a3b749675b758f1030b9401497192491dd4 Mon Sep 17 00:00:00 2001
From: william <we3223 at gmail.com>
Date: Sat, 9 Aug 2025 21:13:30 +0800
Subject: [PATCH 07/17] [DAG] Fold trunc(avg(x,y)) for avgceil/floor u/s nodes
if they have sufficient leading zero/sign bits-7
---
llvm/test/CodeGen/AArch64/trunc-avg-fold.ll | 36 +++++++--------------
1 file changed, 12 insertions(+), 24 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll b/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
index 24a1e6f60c078..ca8e713cafc13 100644
--- a/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
+++ b/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
@@ -9,10 +9,8 @@ define <8 x i8> @test_avgceil_u(<8 x i16> %a, <8 x i16> %b) {
; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: ret
- %mask = insertelement <8 x i16> poison, i16 255, i32 0
- %mask.splat = shufflevector <8 x i16> %mask, <8 x i16> poison, <8 x i32> zeroinitializer
- %ta16 = and <8 x i16> %a, %mask.splat
- %tb16 = and <8 x i16> %b, %mask.splat
+ %ta16 = and <8 x i16> %a, splat (i16 255)
+ %tb16 = and <8 x i16> %b, splat (i16 255)
%avg16 = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %ta16, <8 x i16> %tb16)
%res = trunc <8 x i16> %avg16 to <8 x i8>
ret <8 x i8> %res
@@ -30,14 +28,10 @@ define <8 x i8> @test_avgceil_s(<8 x i16> %a, <8 x i16> %b) {
; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: ret
- %min = insertelement <8 x i16> poison, i16 -128, i32 0
- %min.splat = shufflevector <8 x i16> %min, <8 x i16> poison, <8 x i32> zeroinitializer
- %max = insertelement <8 x i16> poison, i16 127, i32 0
- %max.splat = shufflevector <8 x i16> %max, <8 x i16> poison, <8 x i32> zeroinitializer
- %ta16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> %max.splat)
- %ta16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %ta16, <8 x i16> %min.splat)
- %tb16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %b, <8 x i16> %max.splat)
- %tb16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %tb16, <8 x i16> %min.splat)
+ %ta16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> splat (i16 127))
+ %ta16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %ta16, <8 x i16> splat (i16 -128))
+ %tb16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %b, <8 x i16> splat (i16 127))
+ %tb16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %tb16, <8 x i16> splat (i16 -128))
%avg16 = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %ta16.clamped, <8 x i16> %tb16.clamped)
%res = trunc <8 x i16> %avg16 to <8 x i8>
ret <8 x i8> %res
@@ -51,10 +45,8 @@ define <8 x i8> @test_avgfloor_u(<8 x i16> %a, <8 x i16> %b) {
; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: ret
- %mask = insertelement <8 x i16> poison, i16 255, i32 0
- %mask.splat = shufflevector <8 x i16> %mask, <8 x i16> poison, <8 x i32> zeroinitializer
- %ta16 = and <8 x i16> %a, %mask.splat
- %tb16 = and <8 x i16> %b, %mask.splat
+ %ta16 = and <8 x i16> %a, splat (i16 255)
+ %tb16 = and <8 x i16> %b, splat (i16 255)
%avg16 = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %ta16, <8 x i16> %tb16)
%res = trunc <8 x i16> %avg16 to <8 x i8>
ret <8 x i8> %res
@@ -72,14 +64,10 @@ define <8 x i8> @test_avgfloor_s(<8 x i16> %a, <8 x i16> %b) {
; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: ret
- %min = insertelement <8 x i16> poison, i16 -128, i32 0
- %min.splat = shufflevector <8 x i16> %min, <8 x i16> poison, <8 x i32> zeroinitializer
- %max = insertelement <8 x i16> poison, i16 127, i32 0
- %max.splat = shufflevector <8 x i16> %max, <8 x i16> poison, <8 x i32> zeroinitializer
- %ta16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> %max.splat)
- %ta16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %ta16, <8 x i16> %min.splat)
- %tb16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %b, <8 x i16> %max.splat)
- %tb16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %tb16, <8 x i16> %min.splat)
+ %ta16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> splat (i16 127))
+ %ta16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %ta16, <8 x i16> splat (i16 -128))
+ %tb16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %b, <8 x i16> splat (i16 127))
+ %tb16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %tb16, <8 x i16> splat (i16 -128))
%avg16 = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %ta16.clamped, <8 x i16> %tb16.clamped)
%res = trunc <8 x i16> %avg16 to <8 x i8>
ret <8 x i8> %res
>From 2d268fc6bd5de28d1dd6adbabc732e475a530014 Mon Sep 17 00:00:00 2001
From: william <we3223 at gmail.com>
Date: Sun, 17 Aug 2025 00:09:15 +0800
Subject: [PATCH 08/17] [DAG] Fold trunc(avg(x,y)) for avgceil/floor u/s nodes
if they have sufficient leading zero/sign bits-8
---
llvm/test/CodeGen/AArch64/trunc-avg-fold.ll | 81 +++++++--------------
1 file changed, 27 insertions(+), 54 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll b/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
index ca8e713cafc13..8d9ea6c9d9922 100644
--- a/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
+++ b/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
@@ -1,80 +1,53 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=aarch64-- -O2 -mattr=+neon < %s | FileCheck %s
-define <8 x i8> @test_avgceil_u(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_avgceil_u:
+define <8 x i8> @avgceil_u_i8_to_i16(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: avgceil_u_i8_to_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: bic v0.8h, #255, lsl #8
-; CHECK-NEXT: bic v1.8h, #255, lsl #8
-; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: xtn v0.8b, v0.8h
+; CHECK-NEXT: urhadd v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
- %ta16 = and <8 x i16> %a, splat (i16 255)
- %tb16 = and <8 x i16> %b, splat (i16 255)
- %avg16 = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %ta16, <8 x i16> %tb16)
- %res = trunc <8 x i16> %avg16 to <8 x i8>
- ret <8 x i8> %res
+ %a16 = zext <8 x i8> %a to <8 x i16>
+ %b16 = zext <8 x i8> %b to <8 x i16>
+ %avg16 = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %a16, <8 x i16> %b16)
+ %r = trunc <8 x i16> %avg16 to <8 x i8>
+ ret <8 x i8> %r
}
-define <8 x i8> @test_avgceil_s(<8 x i16> %a, <8 x i16> %b) {
+
+define <8 x i8> @test_avgceil_s(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: test_avgceil_s:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.8h, #127
-; CHECK-NEXT: mvni v3.8h, #127
-; CHECK-NEXT: smin v0.8h, v0.8h, v2.8h
-; CHECK-NEXT: smin v1.8h, v1.8h, v2.8h
-; CHECK-NEXT: smax v0.8h, v0.8h, v3.8h
-; CHECK-NEXT: smax v1.8h, v1.8h, v3.8h
-; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: xtn v0.8b, v0.8h
+; CHECK-NEXT: srhadd v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
- %ta16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> splat (i16 127))
- %ta16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %ta16, <8 x i16> splat (i16 -128))
- %tb16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %b, <8 x i16> splat (i16 127))
- %tb16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %tb16, <8 x i16> splat (i16 -128))
- %avg16 = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %ta16.clamped, <8 x i16> %tb16.clamped)
- %res = trunc <8 x i16> %avg16 to <8 x i8>
+ %a16 = sext <8 x i8> %a to <8 x i16>
+ %b16 = sext <8 x i8> %b to <8 x i16>
+ %avg16 = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %a16, <8 x i16> %b16)
+ %res = trunc <8 x i16> %avg16 to <8 x i8>
ret <8 x i8> %res
}
-define <8 x i8> @test_avgfloor_u(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_avgfloor_u:
+define <8 x i8> @avgfloor_u_from_intrin(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: avgfloor_u_from_intrin:
; CHECK: // %bb.0:
-; CHECK-NEXT: bic v0.8h, #255, lsl #8
-; CHECK-NEXT: bic v1.8h, #255, lsl #8
-; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: xtn v0.8b, v0.8h
+; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
- %ta16 = and <8 x i16> %a, splat (i16 255)
- %tb16 = and <8 x i16> %b, splat (i16 255)
- %avg16 = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %ta16, <8 x i16> %tb16)
+ %a16 = zext <8 x i8> %a to <8 x i16>
+ %b16 = zext <8 x i8> %b to <8 x i16>
+ %avg16 = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %a16, <8 x i16> %b16)
%res = trunc <8 x i16> %avg16 to <8 x i8>
ret <8 x i8> %res
}
-define <8 x i8> @test_avgfloor_s(<8 x i16> %a, <8 x i16> %b) {
+define <8 x i8> @test_avgfloor_s(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: test_avgfloor_s:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.8h, #127
-; CHECK-NEXT: mvni v3.8h, #127
-; CHECK-NEXT: smin v0.8h, v0.8h, v2.8h
-; CHECK-NEXT: smin v1.8h, v1.8h, v2.8h
-; CHECK-NEXT: smax v0.8h, v0.8h, v3.8h
-; CHECK-NEXT: smax v1.8h, v1.8h, v3.8h
-; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: xtn v0.8b, v0.8h
+; CHECK-NEXT: shadd v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
- %ta16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> splat (i16 127))
- %ta16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %ta16, <8 x i16> splat (i16 -128))
- %tb16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %b, <8 x i16> splat (i16 127))
- %tb16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %tb16, <8 x i16> splat (i16 -128))
- %avg16 = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %ta16.clamped, <8 x i16> %tb16.clamped)
- %res = trunc <8 x i16> %avg16 to <8 x i8>
+ %a16 = sext <8 x i8> %a to <8 x i16>
+ %b16 = sext <8 x i8> %b to <8 x i16>
+ %avg16 = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %a16, <8 x i16> %b16)
+ %res = trunc <8 x i16> %avg16 to <8 x i8>
ret <8 x i8> %res
}
-declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>)
>From 32041fbb0b9696b8ab59feab66354aad96e4b1f7 Mon Sep 17 00:00:00 2001
From: william <we3223 at gmail.com>
Date: Sun, 17 Aug 2025 00:10:04 +0800
Subject: [PATCH 09/17] [DAG] Fold trunc(avg(x,y)) for avgceil/floor u/s nodes
if they have sufficient leading zero/sign bits-9
---
llvm/test/CodeGen/AArch64/trunc-avg-fold.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll b/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
index 8d9ea6c9d9922..030e9ea994264 100644
--- a/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
+++ b/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
@@ -26,7 +26,7 @@ define <8 x i8> @test_avgceil_s(<8 x i8> %a, <8 x i8> %b) {
ret <8 x i8> %res
}
-define <8 x i8> @avgfloor_u_from_intrin(<8 x i8> %a, <8 x i8> %b) {
+define <8 x i8> @avgfloor_u_i8_to_i16(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: avgfloor_u_from_intrin:
; CHECK: // %bb.0:
; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b
>From 4e1af14d3efaed8c47448a158f547bdcd47879b3 Mon Sep 17 00:00:00 2001
From: william <we3223 at gmail.com>
Date: Sun, 17 Aug 2025 23:32:43 +0800
Subject: [PATCH 10/17] Fold trunc(avg(x,y)) for avgceil/floor u/s nodes if
they have sufficient leading zero/sign bits-10
---
llvm/test/CodeGen/AArch64/trunc-avg-fold.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll b/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
index 030e9ea994264..54fcae4ba28b7 100644
--- a/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
+++ b/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
@@ -27,7 +27,7 @@ define <8 x i8> @test_avgceil_s(<8 x i8> %a, <8 x i8> %b) {
}
define <8 x i8> @avgfloor_u_i8_to_i16(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-LABEL: avgfloor_u_from_intrin:
+; CHECK-LABEL: avgfloor_u_i8_to_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
>From c4ea7bdf7df0749e30479967d7643b363df43bf7 Mon Sep 17 00:00:00 2001
From: william <we3223 at gmail.com>
Date: Mon, 18 Aug 2025 21:05:31 +0800
Subject: [PATCH 11/17] [DAG] Fold trunc(avg(x,y)) for avgceil/floor u/s nodes
if they have sufficient leading zero/sign bits-11
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 13 +++++--------
1 file changed, 5 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 7aea288c03208..738aa96b729ec 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16302,10 +16302,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDValue Y = N0.getOperand(1);
unsigned SrcBits = X.getScalarValueSizeInBits();
unsigned DstBits = VT.getScalarSizeInBits();
- KnownBits KnownX = DAG.computeKnownBits(X);
- KnownBits KnownY = DAG.computeKnownBits(Y);
- if (KnownX.countMinLeadingZeros() >= (SrcBits - DstBits) &&
- KnownY.countMinLeadingZeros() >= (SrcBits - DstBits)) {
+ APInt UpperBits = APInt::getBitsSetFrom(SrcBits, DstBits);
+ if (DAG.MaskedValueIsZero(X, UpperBits) &&
+ DAG.MaskedValueIsZero(Y, UpperBits)) {
SDValue Tx = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
SDValue Ty = DAG.getNode(ISD::TRUNCATE, DL, VT, Y);
return DAG.getNode(N0.getOpcode(), DL, VT, Tx, Ty);
@@ -16318,13 +16317,11 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
TLI.isOperationLegal(N0.getOpcode(), VT)) {
SDValue X = N0.getOperand(0);
SDValue Y = N0.getOperand(1);
- unsigned SignBitsX = DAG.ComputeNumSignBits(X);
- unsigned SignBitsY = DAG.ComputeNumSignBits(Y);
unsigned SrcBits = X.getScalarValueSizeInBits();
unsigned DstBits = VT.getScalarSizeInBits();
unsigned NeededSignBits = SrcBits - DstBits + 1;
-
- if (SignBitsX >= NeededSignBits && SignBitsY >= NeededSignBits) {
+ if (DAG.ComputeNumSignBits(X) >= NeededSignBits &&
+ DAG.ComputeNumSignBits(Y) >= NeededSignBits) {
SDValue Tx = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
SDValue Ty = DAG.getNode(ISD::TRUNCATE, DL, VT, Y);
return DAG.getNode(N0.getOpcode(), DL, VT, Tx, Ty);
>From fac54fffd2fc76a4523bb26008e2e2b5a37c0a16 Mon Sep 17 00:00:00 2001
From: william <we3223 at gmail.com>
Date: Wed, 17 Sep 2025 10:23:43 +0800
Subject: [PATCH 12/17] [X86] X86TargetLowering::computeKnownBitsForTargetNode
- add X86ISD::VPMADD52L\H handling-1
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 20 +++
llvm/test/CodeGen/X86/knownbits-vpmadd52.ll | 138 ++++++++++++++++++++
2 files changed, 158 insertions(+)
create mode 100644 llvm/test/CodeGen/X86/knownbits-vpmadd52.ll
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f81efdc6414aa..b345a57d46863 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -38999,6 +38999,26 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
}
break;
}
+ case X86ISD::VPMADD52L:
+ case X86ISD::VPMADD52H: {
+ assert(Op.getValueType().isVector() &&
+ Op.getValueType().getScalarType() == MVT::i64 &&
+ "Unexpected VPMADD52 type");
+ KnownBits K0 =
+ DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ KnownBits K1 =
+ DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ KnownBits KAcc =
+ DAG.computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
+ K0 = K0.trunc(52);
+ K1 = K1.trunc(52);
+ KnownBits KnownMul = (Op.getOpcode() == X86ISD::VPMADD52L)
+ ? KnownBits::mul(K0, K1)
+ : KnownBits::mulhu(K0, K1);
+ KnownMul = KnownMul.zext(64);
+ Known = KnownBits::add(KAcc, KnownMul);
+ return;
+ }
}
// Handle target shuffles.
diff --git a/llvm/test/CodeGen/X86/knownbits-vpmadd52.ll b/llvm/test/CodeGen/X86/knownbits-vpmadd52.ll
new file mode 100644
index 0000000000000..0b5be5fc9900b
--- /dev/null
+++ b/llvm/test/CodeGen/X86/knownbits-vpmadd52.ll
@@ -0,0 +1,138 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefixes=AVX512VL
+
+
+
+; H path: take the high 52 bits of the product and add them to the accumulator
+; 25-bit = (1<<25)-1 = 33554431
+; 26-bit = (1<<26)-1 = 67108863
+
+declare <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>)
+declare <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>)
+declare <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64>, <8 x i64>, <8 x i64>)
+
+define <2 x i64> @kb52h_128_mask25_and1(<2 x i64> %x, <2 x i64> %y) {
+; AVX512VL-LABEL: kb52h_128_mask25_and1:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vmovddup {{.*#+}} xmm0 = [1,1]
+; AVX512VL-NEXT: # xmm0 = mem[0,0]
+; AVX512VL-NEXT: retq
+ %mx = and <2 x i64> %x, <i64 33554431, i64 33554431>
+ %my = and <2 x i64> %y, <i64 33554431, i64 33554431>
+ %r = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(
+ <2 x i64> <i64 1, i64 1>, ; acc
+ <2 x i64> %mx, ; x (masked to 25-bit)
+ <2 x i64> %my) ; y (masked to 25-bit)
+ %ret = and <2 x i64> %r, <i64 1, i64 1>
+ ret <2 x i64> %ret
+}
+
+define <4 x i64> @kb52h_256_mask25x26_acc1(<4 x i64> %x, <4 x i64> %y) {
+; AVX512VL-LABEL: kb52h_256_mask25x26_acc1:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vbroadcastsd {{.*#+}} ymm0 = [1,1,1,1]
+; AVX512VL-NEXT: retq
+ %mx = and <4 x i64> %x, <i64 33554431, i64 33554431, i64 33554431, i64 33554431>
+ %my = and <4 x i64> %y, <i64 67108863, i64 67108863, i64 67108863, i64 67108863>
+ %r = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(
+ <4 x i64> <i64 1, i64 1, i64 1, i64 1>,
+ <4 x i64> %mx,
+ <4 x i64> %my)
+ ret <4 x i64> %r
+}
+
+define <8 x i64> @kb52h_512_mask25_and1(<8 x i64> %x, <8 x i64> %y) {
+; AVX512VL-LABEL: kb52h_512_mask25_and1:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vbroadcastsd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1]
+; AVX512VL-NEXT: retq
+ %mx = and <8 x i64> %x, <i64 33554431, i64 33554431, i64 33554431, i64 33554431, i64 33554431, i64 33554431, i64 33554431, i64 33554431>
+ %my = and <8 x i64> %y, <i64 33554431, i64 33554431, i64 33554431, i64 33554431, i64 33554431, i64 33554431, i64 33554431, i64 33554431>
+ %r = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(
+ <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>,
+ <8 x i64> %mx,
+ <8 x i64> %my)
+ %ret = and <8 x i64> %r, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+ ret <8 x i64> %ret
+}
+
+
+; 26-bit = 67108863 = (1<<26)-1
+; 50-bit = 1125899906842623 = (1<<50)-1
+
+declare <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64>, <2 x i64>, <2 x i64>)
+declare <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64>, <4 x i64>, <4 x i64>)
+declare <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64>, <8 x i64>, <8 x i64>)
+
+
+
+define <2 x i64> @kb52l_128_mask26x26_add_intrin(<2 x i64> %x, <2 x i64> %y, <2 x i64> %acc) {
+; AVX512VL-LABEL: kb52l_128_mask26x26_add_intrin:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [67108863,67108863]
+; AVX512VL-NEXT: vpand %xmm3, %xmm0, %xmm0
+; AVX512VL-NEXT: vpand %xmm3, %xmm1, %xmm1
+; AVX512VL-NEXT: vpmadd52luq %xmm1, %xmm0, %xmm2
+; AVX512VL-NEXT: vmovdqa %xmm2, %xmm0
+; AVX512VL-NEXT: retq
+ %xm = and <2 x i64> %x, <i64 67108863, i64 67108863>
+ %ym = and <2 x i64> %y, <i64 67108863, i64 67108863>
+ %r = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %acc, <2 x i64> %xm, <2 x i64> %ym)
+ ret <2 x i64> %r
+}
+
+
+
+define <4 x i64> @kb52l_256_mask50x3_add_intrin(<4 x i64> %x, <4 x i64> %y, <4 x i64> %acc) {
+; AVX512VL-LABEL: kb52l_256_mask50x3_add_intrin:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
+; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm1
+; AVX512VL-NEXT: vpmadd52luq %ymm1, %ymm0, %ymm2
+; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
+; AVX512VL-NEXT: retq
+ %xm = and <4 x i64> %x, <i64 1125899906842623, i64 1125899906842623, i64 1125899906842623, i64 1125899906842623>
+ %ym = and <4 x i64> %y, <i64 3, i64 3, i64 3, i64 3>
+ %r = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %acc, <4 x i64> %xm, <4 x i64> %ym)
+ ret <4 x i64> %r
+}
+
+
+
+define <8 x i64> @kb52l_512_mask26x26_add_intrin(<8 x i64> %x, <8 x i64> %y, <8 x i64> %acc) {
+; AVX512-NOVL-LABEL: kb52l_512_mask26x26_add_intrin:
+; AVX512-NOVL: vpmadd52luq
+; AVX512-NOVL: retq
+; AVX512VL-LABEL: kb52l_512_mask26x26_add_intrin:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpbroadcastq {{.*#+}} zmm3 = [67108863,67108863,67108863,67108863,67108863,67108863,67108863,67108863]
+; AVX512VL-NEXT: vpandq %zmm3, %zmm0, %zmm0
+; AVX512VL-NEXT: vpandq %zmm3, %zmm1, %zmm1
+; AVX512VL-NEXT: vpmadd52luq %zmm1, %zmm0, %zmm2
+; AVX512VL-NEXT: vmovdqa64 %zmm2, %zmm0
+; AVX512VL-NEXT: retq
+ %xm = and <8 x i64> %x, <i64 67108863, i64 67108863, i64 67108863, i64 67108863, i64 67108863, i64 67108863, i64 67108863, i64 67108863>
+ %ym = and <8 x i64> %y, <i64 67108863, i64 67108863, i64 67108863, i64 67108863, i64 67108863, i64 67108863, i64 67108863, i64 67108863>
+ %r = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %acc, <8 x i64> %xm, <8 x i64> %ym)
+ ret <8 x i64> %r
+}
+
+
+
+
+define <2 x i64> @kb52l_128_neg_27x27_plain(<2 x i64> %x, <2 x i64> %y, <2 x i64> %acc) {
+; AVX512VL-LABEL: kb52l_128_neg_27x27_plain:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [67108864,67108864]
+; AVX512VL-NEXT: vpand %xmm3, %xmm0, %xmm0
+; AVX512VL-NEXT: vpand %xmm3, %xmm1, %xmm1
+; AVX512VL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpaddq %xmm2, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
+ %xm = and <2 x i64> %x, <i64 67108864, i64 67108864> ; 1<<26
+ %ym = and <2 x i64> %y, <i64 67108864, i64 67108864>
+ %mul = mul <2 x i64> %xm, %ym
+ %res = add <2 x i64> %mul, %acc
+ ret <2 x i64> %res
+}
+
>From c5100dcee32919cd250088ece985123e6bf231ab Mon Sep 17 00:00:00 2001
From: william <we3223 at gmail.com>
Date: Wed, 17 Sep 2025 10:39:09 +0800
Subject: [PATCH 13/17] Remove unintended changes to DAGCombiner.cpp
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 34 -------------------
1 file changed, 34 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8f4e84a34a8bd..4b20b756f8a15 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16354,40 +16354,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// because targets may prefer a wider type during later combines and invert
// this transform.
switch (N0.getOpcode()) {
- case ISD::AVGCEILU:
- case ISD::AVGFLOORU:
- if (!LegalOperations && N0.hasOneUse() &&
- TLI.isOperationLegal(N0.getOpcode(), VT)) {
- SDValue X = N0.getOperand(0);
- SDValue Y = N0.getOperand(1);
- unsigned SrcBits = X.getScalarValueSizeInBits();
- unsigned DstBits = VT.getScalarSizeInBits();
- APInt UpperBits = APInt::getBitsSetFrom(SrcBits, DstBits);
- if (DAG.MaskedValueIsZero(X, UpperBits) &&
- DAG.MaskedValueIsZero(Y, UpperBits)) {
- SDValue Tx = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
- SDValue Ty = DAG.getNode(ISD::TRUNCATE, DL, VT, Y);
- return DAG.getNode(N0.getOpcode(), DL, VT, Tx, Ty);
- }
- }
- break;
- case ISD::AVGCEILS:
- case ISD::AVGFLOORS:
- if (!LegalOperations && N0.hasOneUse() &&
- TLI.isOperationLegal(N0.getOpcode(), VT)) {
- SDValue X = N0.getOperand(0);
- SDValue Y = N0.getOperand(1);
- unsigned SrcBits = X.getScalarValueSizeInBits();
- unsigned DstBits = VT.getScalarSizeInBits();
- unsigned NeededSignBits = SrcBits - DstBits + 1;
- if (DAG.ComputeNumSignBits(X) >= NeededSignBits &&
- DAG.ComputeNumSignBits(Y) >= NeededSignBits) {
- SDValue Tx = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
- SDValue Ty = DAG.getNode(ISD::TRUNCATE, DL, VT, Y);
- return DAG.getNode(N0.getOpcode(), DL, VT, Tx, Ty);
- }
- }
- break;
case ISD::ADD:
case ISD::SUB:
case ISD::MUL:
>From 27f0f4295c972e3b5611f13352c79d24c04a8bcf Mon Sep 17 00:00:00 2001
From: william <we3223 at gmail.com>
Date: Fri, 19 Sep 2025 00:19:05 +0800
Subject: [PATCH 14/17] update test case
---
llvm/test/CodeGen/X86/knownbits-vpmadd52.ll | 137 ++++++++------------
1 file changed, 52 insertions(+), 85 deletions(-)
diff --git a/llvm/test/CodeGen/X86/knownbits-vpmadd52.ll b/llvm/test/CodeGen/X86/knownbits-vpmadd52.ll
index 0b5be5fc9900b..b3f7fe205a958 100644
--- a/llvm/test/CodeGen/X86/knownbits-vpmadd52.ll
+++ b/llvm/test/CodeGen/X86/knownbits-vpmadd52.ll
@@ -1,15 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefixes=AVX512VL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefixes=AVX512VL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxifma | FileCheck %s --check-prefixes=AVXIFMA
-
-; H path: take the high 52 bits of the product and add them to the accumulator
-; 25-bit = (1<<25)-1 = 33554431
-; 26-bit = (1<<26)-1 = 67108863
+; High-52 path
declare <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>)
declare <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>)
-declare <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64>, <8 x i64>, <8 x i64>)
define <2 x i64> @kb52h_128_mask25_and1(<2 x i64> %x, <2 x i64> %y) {
; AVX512VL-LABEL: kb52h_128_mask25_and1:
@@ -17,13 +14,19 @@ define <2 x i64> @kb52h_128_mask25_and1(<2 x i64> %x, <2 x i64> %y) {
; AVX512VL-NEXT: vmovddup {{.*#+}} xmm0 = [1,1]
; AVX512VL-NEXT: # xmm0 = mem[0,0]
; AVX512VL-NEXT: retq
- %mx = and <2 x i64> %x, <i64 33554431, i64 33554431>
- %my = and <2 x i64> %y, <i64 33554431, i64 33554431>
+;
+; AVXIFMA-LABEL: kb52h_128_mask25_and1:
+; AVXIFMA: # %bb.0:
+; AVXIFMA-NEXT: vmovddup {{.*#+}} xmm0 = [1,1]
+; AVXIFMA-NEXT: # xmm0 = mem[0,0]
+; AVXIFMA-NEXT: retq
+ %mx = and <2 x i64> %x, splat (i64 33554431) ; (1<<25)-1
+ %my = and <2 x i64> %y, splat (i64 33554431) ; (1<<25)-1
%r = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(
- <2 x i64> <i64 1, i64 1>, ; acc
- <2 x i64> %mx, ; x (masked to 25-bit)
- <2 x i64> %my) ; y (masked to 25-bit)
- %ret = and <2 x i64> %r, <i64 1, i64 1>
+ <2 x i64> splat (i64 1),
+ <2 x i64> %mx,
+ <2 x i64> %my)
+ %ret = and <2 x i64> %r, splat (i64 1)
ret <2 x i64> %ret
}
@@ -32,39 +35,23 @@ define <4 x i64> @kb52h_256_mask25x26_acc1(<4 x i64> %x, <4 x i64> %y) {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vbroadcastsd {{.*#+}} ymm0 = [1,1,1,1]
; AVX512VL-NEXT: retq
- %mx = and <4 x i64> %x, <i64 33554431, i64 33554431, i64 33554431, i64 33554431>
- %my = and <4 x i64> %y, <i64 67108863, i64 67108863, i64 67108863, i64 67108863>
+;
+; AVXIFMA-LABEL: kb52h_256_mask25x26_acc1:
+; AVXIFMA: # %bb.0:
+; AVXIFMA-NEXT: vbroadcastsd {{.*#+}} ymm0 = [1,1,1,1]
+; AVXIFMA-NEXT: retq
+ %mx = and <4 x i64> %x, splat (i64 33554431) ; (1<<25)-1
+ %my = and <4 x i64> %y, splat (i64 67108863) ; (1<<26)-1
%r = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(
- <4 x i64> <i64 1, i64 1, i64 1, i64 1>,
- <4 x i64> %mx,
- <4 x i64> %my)
+ <4 x i64> splat (i64 1),
+ <4 x i64> %mx, <4 x i64> %my)
ret <4 x i64> %r
}
-define <8 x i64> @kb52h_512_mask25_and1(<8 x i64> %x, <8 x i64> %y) {
-; AVX512VL-LABEL: kb52h_512_mask25_and1:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vbroadcastsd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1]
-; AVX512VL-NEXT: retq
- %mx = and <8 x i64> %x, <i64 33554431, i64 33554431, i64 33554431, i64 33554431, i64 33554431, i64 33554431, i64 33554431, i64 33554431>
- %my = and <8 x i64> %y, <i64 33554431, i64 33554431, i64 33554431, i64 33554431, i64 33554431, i64 33554431, i64 33554431, i64 33554431>
- %r = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(
- <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>,
- <8 x i64> %mx,
- <8 x i64> %my)
- %ret = and <8 x i64> %r, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
- ret <8 x i64> %ret
-}
-
-
-; 26-bit = 67108863 = (1<<26)-1
-; 50-bit = 1125899906842623 = (1<<50)-1
+; Low-52 path
declare <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64>, <2 x i64>, <2 x i64>)
declare <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64>, <4 x i64>, <4 x i64>)
-declare <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64>, <8 x i64>, <8 x i64>)
-
-
define <2 x i64> @kb52l_128_mask26x26_add_intrin(<2 x i64> %x, <2 x i64> %y, <2 x i64> %acc) {
; AVX512VL-LABEL: kb52l_128_mask26x26_add_intrin:
@@ -75,14 +62,22 @@ define <2 x i64> @kb52l_128_mask26x26_add_intrin(<2 x i64> %x, <2 x i64> %y, <2
; AVX512VL-NEXT: vpmadd52luq %xmm1, %xmm0, %xmm2
; AVX512VL-NEXT: vmovdqa %xmm2, %xmm0
; AVX512VL-NEXT: retq
- %xm = and <2 x i64> %x, <i64 67108863, i64 67108863>
- %ym = and <2 x i64> %y, <i64 67108863, i64 67108863>
- %r = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %acc, <2 x i64> %xm, <2 x i64> %ym)
+;
+; AVXIFMA-LABEL: kb52l_128_mask26x26_add_intrin:
+; AVXIFMA: # %bb.0:
+; AVXIFMA-NEXT: vpbroadcastq {{.*#+}} xmm3 = [67108863,67108863]
+; AVXIFMA-NEXT: vpand %xmm3, %xmm0, %xmm0
+; AVXIFMA-NEXT: vpand %xmm3, %xmm1, %xmm1
+; AVXIFMA-NEXT: {vex} vpmadd52luq %xmm1, %xmm0, %xmm2
+; AVXIFMA-NEXT: vmovdqa %xmm2, %xmm0
+; AVXIFMA-NEXT: retq
+ %xm = and <2 x i64> %x, splat (i64 67108863) ; (1<<26)-1
+ %ym = and <2 x i64> %y, splat (i64 67108863) ; (1<<26)-1
+ %r = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(
+ <2 x i64> %acc, <2 x i64> %xm, <2 x i64> %ym)
ret <2 x i64> %r
}
-
-
define <4 x i64> @kb52l_256_mask50x3_add_intrin(<4 x i64> %x, <4 x i64> %y, <4 x i64> %acc) {
; AVX512VL-LABEL: kb52l_256_mask50x3_add_intrin:
; AVX512VL: # %bb.0:
@@ -91,48 +86,20 @@ define <4 x i64> @kb52l_256_mask50x3_add_intrin(<4 x i64> %x, <4 x i64> %y, <4 x
; AVX512VL-NEXT: vpmadd52luq %ymm1, %ymm0, %ymm2
; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
; AVX512VL-NEXT: retq
- %xm = and <4 x i64> %x, <i64 1125899906842623, i64 1125899906842623, i64 1125899906842623, i64 1125899906842623>
- %ym = and <4 x i64> %y, <i64 3, i64 3, i64 3, i64 3>
- %r = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %acc, <4 x i64> %xm, <4 x i64> %ym)
+;
+; AVXIFMA-LABEL: kb52l_256_mask50x3_add_intrin:
+; AVXIFMA: # %bb.0:
+; AVXIFMA-NEXT: vpbroadcastq {{.*#+}} ymm3 = [1125899906842623,1125899906842623,1125899906842623,1125899906842623]
+; AVXIFMA-NEXT: vpand %ymm3, %ymm0, %ymm0
+; AVXIFMA-NEXT: vpbroadcastq {{.*#+}} ymm3 = [3,3,3,3]
+; AVXIFMA-NEXT: vpand %ymm3, %ymm1, %ymm1
+; AVXIFMA-NEXT: {vex} vpmadd52luq %ymm1, %ymm0, %ymm2
+; AVXIFMA-NEXT: vmovdqa %ymm2, %ymm0
+; AVXIFMA-NEXT: retq
+ %xm = and <4 x i64> %x, splat (i64 1125899906842623) ; (1<<50)-1
+ %ym = and <4 x i64> %y, splat (i64 3)
+ %r = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(
+ <4 x i64> %acc, <4 x i64> %xm, <4 x i64> %ym)
ret <4 x i64> %r
}
-
-
-define <8 x i64> @kb52l_512_mask26x26_add_intrin(<8 x i64> %x, <8 x i64> %y, <8 x i64> %acc) {
-; AVX512-NOVL-LABEL: kb52l_512_mask26x26_add_intrin:
-; AVX512-NOVL: vpmadd52luq
-; AVX512-NOVL: retq
-; AVX512VL-LABEL: kb52l_512_mask26x26_add_intrin:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq {{.*#+}} zmm3 = [67108863,67108863,67108863,67108863,67108863,67108863,67108863,67108863]
-; AVX512VL-NEXT: vpandq %zmm3, %zmm0, %zmm0
-; AVX512VL-NEXT: vpandq %zmm3, %zmm1, %zmm1
-; AVX512VL-NEXT: vpmadd52luq %zmm1, %zmm0, %zmm2
-; AVX512VL-NEXT: vmovdqa64 %zmm2, %zmm0
-; AVX512VL-NEXT: retq
- %xm = and <8 x i64> %x, <i64 67108863, i64 67108863, i64 67108863, i64 67108863, i64 67108863, i64 67108863, i64 67108863, i64 67108863>
- %ym = and <8 x i64> %y, <i64 67108863, i64 67108863, i64 67108863, i64 67108863, i64 67108863, i64 67108863, i64 67108863, i64 67108863>
- %r = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %acc, <8 x i64> %xm, <8 x i64> %ym)
- ret <8 x i64> %r
-}
-
-
-
-
-define <2 x i64> @kb52l_128_neg_27x27_plain(<2 x i64> %x, <2 x i64> %y, <2 x i64> %acc) {
-; AVX512VL-LABEL: kb52l_128_neg_27x27_plain:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [67108864,67108864]
-; AVX512VL-NEXT: vpand %xmm3, %xmm0, %xmm0
-; AVX512VL-NEXT: vpand %xmm3, %xmm1, %xmm1
-; AVX512VL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: vpaddq %xmm2, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
- %xm = and <2 x i64> %x, <i64 67108864, i64 67108864> ; 1<<26
- %ym = and <2 x i64> %y, <i64 67108864, i64 67108864>
- %mul = mul <2 x i64> %xm, %ym
- %res = add <2 x i64> %mul, %acc
- ret <2 x i64> %res
-}
-
>From efeb7402d3a899e2a420cdf8057408e331080834 Mon Sep 17 00:00:00 2001
From: william <we3223 at gmail.com>
Date: Mon, 22 Sep 2025 11:23:18 +0800
Subject: [PATCH 15/17] update test case: knownbits-vpmadd52.ll
---
llvm/test/CodeGen/X86/knownbits-vpmadd52.ll | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/X86/knownbits-vpmadd52.ll b/llvm/test/CodeGen/X86/knownbits-vpmadd52.ll
index b3f7fe205a958..0e322fec2c7d9 100644
--- a/llvm/test/CodeGen/X86/knownbits-vpmadd52.ll
+++ b/llvm/test/CodeGen/X86/knownbits-vpmadd52.ll
@@ -8,6 +8,7 @@
declare <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>)
declare <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>)
+; High-52, 25x25 masked inputs, accumulator = 1, expected constant fold.
define <2 x i64> @kb52h_128_mask25_and1(<2 x i64> %x, <2 x i64> %y) {
; AVX512VL-LABEL: kb52h_128_mask25_and1:
; AVX512VL: # %bb.0:
@@ -30,6 +31,7 @@ define <2 x i64> @kb52h_128_mask25_and1(<2 x i64> %x, <2 x i64> %y) {
ret <2 x i64> %ret
}
+; High-52, 25x26 masked inputs, accumulator = 1, expected constant fold.
define <4 x i64> @kb52h_256_mask25x26_acc1(<4 x i64> %x, <4 x i64> %y) {
; AVX512VL-LABEL: kb52h_256_mask25x26_acc1:
; AVX512VL: # %bb.0:
@@ -53,6 +55,7 @@ define <4 x i64> @kb52h_256_mask25x26_acc1(<4 x i64> %x, <4 x i64> %y) {
declare <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64>, <2 x i64>, <2 x i64>)
declare <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64>, <4 x i64>, <4 x i64>)
+; Low-52, 26x26 masked inputs, add with accumulator.
define <2 x i64> @kb52l_128_mask26x26_add_intrin(<2 x i64> %x, <2 x i64> %y, <2 x i64> %acc) {
; AVX512VL-LABEL: kb52l_128_mask26x26_add_intrin:
; AVX512VL: # %bb.0:
@@ -78,6 +81,7 @@ define <2 x i64> @kb52l_128_mask26x26_add_intrin(<2 x i64> %x, <2 x i64> %y, <2
ret <2 x i64> %r
}
+; Low-52, 50-bit × 2-bit masked inputs, add with accumulator.
define <4 x i64> @kb52l_256_mask50x3_add_intrin(<4 x i64> %x, <4 x i64> %y, <4 x i64> %acc) {
; AVX512VL-LABEL: kb52l_256_mask50x3_add_intrin:
; AVX512VL: # %bb.0:
@@ -97,7 +101,7 @@ define <4 x i64> @kb52l_256_mask50x3_add_intrin(<4 x i64> %x, <4 x i64> %y, <4 x
; AVXIFMA-NEXT: vmovdqa %ymm2, %ymm0
; AVXIFMA-NEXT: retq
%xm = and <4 x i64> %x, splat (i64 1125899906842623) ; (1<<50)-1
- %ym = and <4 x i64> %y, splat (i64 3)
+ %ym = and <4 x i64> %y, splat (i64 3) ; (1<<2)-1
%r = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(
<4 x i64> %acc, <4 x i64> %xm, <4 x i64> %ym)
ret <4 x i64> %r
>From 59cef1930b7987c2b5b8c9eafd8cafb6972b5e0c Mon Sep 17 00:00:00 2001
From: william <we3223 at gmail.com>
Date: Mon, 8 Dec 2025 16:20:28 +0800
Subject: [PATCH 16/17] Attempt to use VPMADD52L/VPMULUDQ instead of VPMULLQ on
slow VPMULLQ targets (or when VPMULLQ is unavailable)-1
---
llvm/lib/Target/X86/X86.td | 30 +++----
llvm/lib/Target/X86/X86ISelLowering.cpp | 35 ++++++++
llvm/test/CodeGen/X86/slow-pmullq.ll | 110 ++++++++++++++++++++++++
3 files changed, 160 insertions(+), 15 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/slow-pmullq.ll
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 8f29a64d58194..136799292944d 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -485,6 +485,9 @@ def TuningSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
def TuningSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
"PMULLD instruction is slow (compared to PMULLW/PMULHW and PMULUDQ)">;
+def TuningSlowPMULLQ : SubtargetFeature<"slow-pmullq", "HasSlowPMULLQ", "true",
+ "PMULLQ instruction is slow">;
+
def TuningSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
"true",
"PMADDWD is slower than PMULLD">;
@@ -1049,14 +1052,11 @@ def ProcessorFeatures {
FeatureVBMI,
FeatureIFMA,
FeatureSHA];
- list<SubtargetFeature> CNLTuning = [TuningFastGather,
- TuningMacroFusion,
- TuningSlow3OpsLEA,
- TuningSlowDivide64,
+ list<SubtargetFeature> CNLTuning = [TuningFastGather, TuningMacroFusion,
+ TuningSlow3OpsLEA, TuningSlowDivide64,
TuningFastScalarFSQRT,
TuningFastVectorFSQRT,
- TuningFastSHLDRotate,
- TuningFast15ByteNOP,
+ TuningFastSHLDRotate, TuningFast15ByteNOP,
TuningFastVariableCrossLaneShuffle,
TuningFastVariablePerLaneShuffle,
TuningPrefer256Bit,
@@ -1065,7 +1065,8 @@ def ProcessorFeatures {
TuningNoDomainDelayMov,
TuningNoDomainDelayShuffle,
TuningNoDomainDelayBlend,
- TuningFastImmVectorShift];
+ TuningFastImmVectorShift,
+ TuningSlowPMULLQ];
list<SubtargetFeature> CNLFeatures =
!listconcat(SKLFeatures, CNLAdditionalFeatures);
@@ -1079,13 +1080,10 @@ def ProcessorFeatures {
FeatureGFNI,
FeatureRDPID,
FeatureFSRM];
- list<SubtargetFeature> ICLTuning = [TuningFastGather,
- TuningMacroFusion,
- TuningSlowDivide64,
- TuningFastScalarFSQRT,
+ list<SubtargetFeature> ICLTuning = [TuningFastGather, TuningMacroFusion,
+ TuningSlowDivide64, TuningFastScalarFSQRT,
TuningFastVectorFSQRT,
- TuningFastSHLDRotate,
- TuningFast15ByteNOP,
+ TuningFastSHLDRotate, TuningFast15ByteNOP,
TuningFastVariableCrossLaneShuffle,
TuningFastVariablePerLaneShuffle,
TuningPrefer256Bit,
@@ -1094,7 +1092,8 @@ def ProcessorFeatures {
TuningNoDomainDelayMov,
TuningNoDomainDelayShuffle,
TuningNoDomainDelayBlend,
- TuningFastImmVectorShift];
+ TuningFastImmVectorShift,
+ TuningSlowPMULLQ];
list<SubtargetFeature> ICLFeatures =
!listconcat(CNLFeatures, ICLAdditionalFeatures);
@@ -1291,7 +1290,8 @@ def ProcessorFeatures {
FeatureWAITPKG];
list<SubtargetFeature> ADLAdditionalTuning = [TuningPERMFalseDeps,
TuningPreferMovmskOverVTest,
- TuningFastImmVectorShift];
+ TuningFastImmVectorShift,
+ TuningSlowPMULLQ];
list<SubtargetFeature> ADLRemoveTuning = [TuningPOPCNTFalseDeps];
list<SubtargetFeature> ADLTuning =
!listremove(!listconcat(SKLTuning, ADLAdditionalTuning), ADLRemoveTuning);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d0ae75b2e6785..87e66570c9fc3 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -49872,6 +49872,41 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineMulToPMULDQ(N, DL, DAG, Subtarget))
return V;
+ // ==============================================================
+ // Optimize VPMULLQ on slow targets
+ // ==============================================================
+ if (VT.getScalarType() == MVT::i64 && Subtarget.hasSlowPMULLQ()) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+
+ KnownBits Known0 = DAG.computeKnownBits(Op0);
+ KnownBits Known1 = DAG.computeKnownBits(Op1);
+ unsigned Count0 = Known0.countMinLeadingZeros();
+ unsigned Count1 = Known1.countMinLeadingZeros();
+
+ // Optimization 1: Use VPMULUDQ (32-bit multiply).
+ // If the upper 32 bits are zero, we can use the standard PMULUDQ
+ // instruction. This is generally the fastest option and widely supported.
+ if (Count0 >= 32 && Count1 >= 32) {
+ return DAG.getNode(X86ISD::PMULUDQ, DL, VT, Op0, Op1);
+ }
+
+ // Optimization 2: Use VPMADD52L (52-bit multiply-add).
+ // On targets with slow VPMULLQ (e.g., Ice Lake), VPMADD52L is significantly
+ // faster (lower latency/better throughput).
+ // VPMADD52L performs (A * B) + C. We can use it for pure multiplication if
+ // the operands fit within 52 bits (top 12 bits are zero) by setting the
+ // accumulator (C) to zero.
+ if (Subtarget.hasAVX512() && Subtarget.hasIFMA()) {
+ if (Count0 >= 12 && Count1 >= 12) {
+ SDValue Zero = getZeroVector(VT.getSimpleVT(), Subtarget, DAG, DL);
+ return DAG.getNode(X86ISD::VPMADD52L, DL, VT, Op0, Op1, Zero);
+ }
+ }
+
+ // Fallback: If no optimization applies, LLVM will proceed to select
+ // the standard VPMULLQ instruction.
+ }
if (DCI.isBeforeLegalize() && VT.isVector())
return reduceVMULWidth(N, DL, DAG, Subtarget);
diff --git a/llvm/test/CodeGen/X86/slow-pmullq.ll b/llvm/test/CodeGen/X86/slow-pmullq.ll
new file mode 100644
index 0000000000000..8685139327131
--- /dev/null
+++ b/llvm/test/CodeGen/X86/slow-pmullq.ll
@@ -0,0 +1,110 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=cannonlake | FileCheck %s --check-prefix=CNL
+
+; ============================================================================
+; Case 1: 52-bit Optimization (vpmadd52luq)
+; ============================================================================
+
+define <8 x i64> @test_mul_52bit_fits(<8 x i64> %a, <8 x i64> %b) {
+; CNL-LABEL: test_mul_52bit_fits:
+; CNL: # %bb.0:
+; CNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CNL-NEXT: vpmadd52luq %zmm1, %zmm0, %zmm2
+; CNL-NEXT: vmovdqa64 %zmm2, %zmm0
+; CNL-NEXT: retq
+ %a_masked = and <8 x i64> %a,
+ <i64 4503599627370495,
+ i64 4503599627370495,
+ i64 4503599627370495,
+ i64 4503599627370495,
+ i64 4503599627370495,
+ i64 4503599627370495,
+ i64 4503599627370495,
+ i64 4503599627370495>
+
+ %b_masked = and <8 x i64> %b,
+ <i64 4503599627370495,
+ i64 4503599627370495,
+ i64 4503599627370495,
+ i64 4503599627370495,
+ i64 4503599627370495,
+ i64 4503599627370495,
+ i64 4503599627370495,
+ i64 4503599627370495>
+
+ %res = mul <8 x i64> %a_masked, %b_masked
+ ret <8 x i64> %res
+}
+
+; ============================================================================
+; Case 2: 32-bit Optimization (vpmuludq)
+; ============================================================================
+
+define <8 x i64> @test_mul_32bit_fits(<8 x i64> %a, <8 x i64> %b) {
+; CNL-LABEL: test_mul_32bit_fits:
+; CNL: # %bb.0:
+; CNL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
+; CNL-NEXT: retq
+ %a_masked = and <8 x i64> %a,
+ <i64 4294967295,
+ i64 4294967295,
+ i64 4294967295,
+ i64 4294967295,
+ i64 4294967295,
+ i64 4294967295,
+ i64 4294967295,
+ i64 4294967295>
+
+ %b_masked = and <8 x i64> %b,
+ <i64 4294967295,
+ i64 4294967295,
+ i64 4294967295,
+ i64 4294967295,
+ i64 4294967295,
+ i64 4294967295,
+ i64 4294967295,
+ i64 4294967295>
+
+ %res = mul <8 x i64> %a_masked, %b_masked
+ ret <8 x i64> %res
+}
+
+; ============================================================================
+; Case 3: No Optimization (Full 64-bit)
+; ============================================================================
+
+define <8 x i64> @test_mul_full_64bit(<8 x i64> %a, <8 x i64> %b) {
+; CNL-LABEL: test_mul_full_64bit:
+; CNL: # %bb.0:
+; CNL-NEXT: vpmullq %zmm1, %zmm0, %zmm0
+; CNL-NEXT: retq
+ %res = mul <8 x i64> %a, %b
+ ret <8 x i64> %res
+}
+
+; ============================================================================
+; Case 4: Vector Width Variety (Check 256-bit / YMM)
+; ============================================================================
+
+define <4 x i64> @test_mul_52bit_ymm(<4 x i64> %a, <4 x i64> %b) {
+; CNL-LABEL: test_mul_52bit_ymm:
+; CNL: # %bb.0:
+; CNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CNL-NEXT: vpmadd52luq %ymm1, %ymm0, %ymm2
+; CNL-NEXT: vmovdqa %ymm2, %ymm0
+; CNL-NEXT: retq
+ %a_masked = and <4 x i64> %a,
+ <i64 4503599627370495,
+ i64 4503599627370495,
+ i64 4503599627370495,
+ i64 4503599627370495>
+
+ %b_masked = and <4 x i64> %b,
+ <i64 4503599627370495,
+ i64 4503599627370495,
+ i64 4503599627370495,
+ i64 4503599627370495>
+
+ %res = mul <4 x i64> %a_masked, %b_masked
+ ret <4 x i64> %res
+}
>From d4654173ddf50edef72eecd3504cf55a66b63bb0 Mon Sep 17 00:00:00 2001
From: william <we3223 at gmail.com>
Date: Thu, 11 Dec 2025 12:39:27 +0800
Subject: [PATCH 17/17] Attempt to use VPMADD52L/VPMULUDQ instead of VPMULLQ on
slow VPMULLQ targets (or when VPMULLQ is unavailable)-2
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 5135 ++++++++++++-----------
1 file changed, 2599 insertions(+), 2536 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 87e66570c9fc3..9bff28696da19 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -189,10 +189,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// We don't accept any truncstore of integer registers.
setTruncStoreAction(MVT::i64, MVT::i32, Expand);
setTruncStoreAction(MVT::i64, MVT::i16, Expand);
- setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
+ setTruncStoreAction(MVT::i64, MVT::i8, Expand);
setTruncStoreAction(MVT::i32, MVT::i16, Expand);
- setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
- setTruncStoreAction(MVT::i16, MVT::i8, Expand);
+ setTruncStoreAction(MVT::i32, MVT::i8, Expand);
+ setTruncStoreAction(MVT::i16, MVT::i8, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
@@ -204,106 +204,106 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// Integer absolute.
if (Subtarget.canUseCMOV()) {
- setOperationAction(ISD::ABS , MVT::i16 , Custom);
- setOperationAction(ISD::ABS , MVT::i32 , Custom);
+ setOperationAction(ISD::ABS, MVT::i16, Custom);
+ setOperationAction(ISD::ABS, MVT::i32, Custom);
if (Subtarget.is64Bit())
- setOperationAction(ISD::ABS , MVT::i64 , Custom);
+ setOperationAction(ISD::ABS, MVT::i64, Custom);
}
// Absolute difference.
for (auto Op : {ISD::ABDS, ISD::ABDU}) {
- setOperationAction(Op , MVT::i8 , Custom);
- setOperationAction(Op , MVT::i16 , Custom);
- setOperationAction(Op , MVT::i32 , Custom);
+ setOperationAction(Op, MVT::i8, Custom);
+ setOperationAction(Op, MVT::i16, Custom);
+ setOperationAction(Op, MVT::i32, Custom);
if (Subtarget.is64Bit())
- setOperationAction(Op , MVT::i64 , Custom);
+ setOperationAction(Op, MVT::i64, Custom);
}
// Signed saturation subtraction.
- setOperationAction(ISD::SSUBSAT , MVT::i8 , Custom);
- setOperationAction(ISD::SSUBSAT , MVT::i16 , Custom);
- setOperationAction(ISD::SSUBSAT , MVT::i32 , Custom);
+ setOperationAction(ISD::SSUBSAT, MVT::i8, Custom);
+ setOperationAction(ISD::SSUBSAT, MVT::i16, Custom);
+ setOperationAction(ISD::SSUBSAT, MVT::i32, Custom);
if (Subtarget.is64Bit())
- setOperationAction(ISD::SSUBSAT , MVT::i64 , Custom);
+ setOperationAction(ISD::SSUBSAT, MVT::i64, Custom);
// Funnel shifts.
for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
// For slow shld targets we only lower for code size.
LegalizeAction ShiftDoubleAction = Subtarget.isSHLDSlow() ? Custom : Legal;
- setOperationAction(ShiftOp , MVT::i8 , Custom);
- setOperationAction(ShiftOp , MVT::i16 , Custom);
- setOperationAction(ShiftOp , MVT::i32 , ShiftDoubleAction);
+ setOperationAction(ShiftOp, MVT::i8, Custom);
+ setOperationAction(ShiftOp, MVT::i16, Custom);
+ setOperationAction(ShiftOp, MVT::i32, ShiftDoubleAction);
if (Subtarget.is64Bit())
- setOperationAction(ShiftOp , MVT::i64 , ShiftDoubleAction);
+ setOperationAction(ShiftOp, MVT::i64, ShiftDoubleAction);
}
if (!Subtarget.useSoftFloat()) {
// Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
// operation.
- setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i8, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i16, Promote);
// We have an algorithm for SSE2, and we turn this into a 64-bit
// FILD or VCVTUSI2SS/SD for other targets.
- setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
// We have an algorithm for SSE2->double, and we turn this into a
// 64-bit FILD followed by conditional FADD for other targets.
- setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
// Promote i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
// this operation.
- setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i8, Promote);
// SSE has no i16 to fp conversion, only i32. We promote in the handler
// to allow f80 to use i16 and f64 to use i16 with sse1 only
- setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i16, Custom);
// f32 and f64 cases are Legal with SSE1/SSE2, f80 case is not
- setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
// In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
// are Legal, f80 is custom lowered.
- setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
// Promote i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
// this operation.
- setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
// FIXME: This doesn't generate invalid exception when it should. PR44019.
- setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i8, Promote);
- setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i16, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
// In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
// are Legal, f80 is custom lowered.
- setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
// Handle FP_TO_UINT by promoting the destination to a larger signed
// conversion.
- setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
// FIXME: This doesn't generate invalid exception when it should. PR44019.
- setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i8, Promote);
- setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
// FIXME: This doesn't generate invalid exception when it should. PR44019.
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i16, Promote);
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
- setOperationAction(ISD::LRINT, MVT::f32, Custom);
- setOperationAction(ISD::LRINT, MVT::f64, Custom);
- setOperationAction(ISD::LLRINT, MVT::f32, Custom);
- setOperationAction(ISD::LLRINT, MVT::f64, Custom);
+ setOperationAction(ISD::LRINT, MVT::f32, Custom);
+ setOperationAction(ISD::LRINT, MVT::f64, Custom);
+ setOperationAction(ISD::LLRINT, MVT::f32, Custom);
+ setOperationAction(ISD::LLRINT, MVT::f64, Custom);
if (!Subtarget.is64Bit()) {
- setOperationAction(ISD::LRINT, MVT::i64, Custom);
+ setOperationAction(ISD::LRINT, MVT::i64, Custom);
setOperationAction(ISD::LLRINT, MVT::i64, Custom);
}
}
@@ -311,7 +311,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget.hasSSE2()) {
// Custom lowering for saturating float to int conversions.
// We handle promotion to larger result types manually.
- for (MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) {
+ for (MVT VT : {MVT::i8, MVT::i16, MVT::i32}) {
setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
}
@@ -344,17 +344,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// TODO: when we have SSE, these could be more efficient, by using movd/movq.
if (!Subtarget.hasSSE2()) {
- setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
- setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
+ setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i32, Expand);
setOperationAction(ISD::FCANONICALIZE, MVT::f32, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom);
if (Subtarget.is64Bit()) {
- setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
+ setOperationAction(ISD::BITCAST, MVT::f64, Expand);
// Without SSE, i64->f64 goes through memory.
- setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
+ setOperationAction(ISD::BITCAST, MVT::i64, Expand);
}
} else if (!Subtarget.is64Bit())
- setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
+ setOperationAction(ISD::BITCAST, MVT::i64, Custom);
// Scalar integer divide and remainder are lowered to use operations that
// produce two results, to match the available instructions. This exposes
@@ -366,7 +366,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// (low) operations are left as Legal, as there are single-result
// instructions for this in x86. Using the two-result multiply instructions
// when both high and low results are needed must be arranged by dagcombine.
- for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
+ for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
setOperationAction(ISD::SDIV, VT, Expand);
@@ -375,47 +375,47 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UREM, VT, Expand);
}
- setOperationAction(ISD::BR_JT , MVT::Other, Expand);
- setOperationAction(ISD::BRCOND , MVT::Other, Custom);
- for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
- MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
- setOperationAction(ISD::BR_CC, VT, Expand);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BRCOND, MVT::Other, Custom);
+ for (auto VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128, MVT::i8, MVT::i16,
+ MVT::i32, MVT::i64}) {
+ setOperationAction(ISD::BR_CC, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
}
if (Subtarget.is64Bit())
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- setOperationAction(ISD::FREM , MVT::f32 , Expand);
- setOperationAction(ISD::FREM , MVT::f64 , Expand);
- setOperationAction(ISD::FREM , MVT::f80 , Expand);
- setOperationAction(ISD::FREM , MVT::f128 , Expand);
+ setOperationAction(ISD::FREM, MVT::f32, Expand);
+ setOperationAction(ISD::FREM, MVT::f64, Expand);
+ setOperationAction(ISD::FREM, MVT::f80, Expand);
+ setOperationAction(ISD::FREM, MVT::f128, Expand);
if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) {
- setOperationAction(ISD::GET_ROUNDING , MVT::i32 , Custom);
- setOperationAction(ISD::SET_ROUNDING , MVT::Other, Custom);
- setOperationAction(ISD::GET_FPENV_MEM , MVT::Other, Custom);
- setOperationAction(ISD::SET_FPENV_MEM , MVT::Other, Custom);
- setOperationAction(ISD::RESET_FPENV , MVT::Other, Custom);
+ setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);
+ setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
+ setOperationAction(ISD::GET_FPENV_MEM, MVT::Other, Custom);
+ setOperationAction(ISD::SET_FPENV_MEM, MVT::Other, Custom);
+ setOperationAction(ISD::RESET_FPENV, MVT::Other, Custom);
}
// Promote the i8 variants and force them on up to i32 which has a shorter
// encoding.
- setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
- setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
+ setOperationPromotedToType(ISD::CTTZ, MVT::i8, MVT::i32);
+ setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8, MVT::i32);
// Promoted i16. tzcntw has a false dependency on Intel CPUs. For BSF, we emit
// a REP prefix to encode it as TZCNT for modern CPUs so it makes sense to
// promote that too.
- setOperationPromotedToType(ISD::CTTZ , MVT::i16 , MVT::i32);
- setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , MVT::i32);
+ setOperationPromotedToType(ISD::CTTZ, MVT::i16, MVT::i32);
+ setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i16, MVT::i32);
if (!Subtarget.hasBMI()) {
- setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
+ setOperationAction(ISD::CTTZ, MVT::i32, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Legal);
if (Subtarget.is64Bit()) {
- setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
+ setOperationAction(ISD::CTTZ, MVT::i64, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
}
}
@@ -423,13 +423,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget.hasLZCNT()) {
// When promoting the i8 variants, force them to i32 for a shorter
// encoding.
- setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
- setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
+ setOperationPromotedToType(ISD::CTLZ, MVT::i8, MVT::i32);
+ setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8, MVT::i32);
} else {
for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
if (VT == MVT::i64 && !Subtarget.is64Bit())
continue;
- setOperationAction(ISD::CTLZ , VT, Custom);
+ setOperationAction(ISD::CTLZ, VT, Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
}
}
@@ -474,36 +474,36 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// on the dest that popcntl hasn't had since Cannon Lake.
setOperationPromotedToType(ISD::CTPOP, MVT::i16, MVT::i32);
} else {
- setOperationAction(ISD::CTPOP , MVT::i8 , Custom);
- setOperationAction(ISD::CTPOP , MVT::i16 , Custom);
- setOperationAction(ISD::CTPOP , MVT::i32 , Custom);
- setOperationAction(ISD::CTPOP , MVT::i64 , Custom);
+ setOperationAction(ISD::CTPOP, MVT::i8, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i16, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i32, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i64, Custom);
}
- setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
if (!Subtarget.hasMOVBE())
- setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
+ setOperationAction(ISD::BSWAP, MVT::i16, Expand);
// X86 wants to expand cmov itself.
- for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
+ for (auto VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) {
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
}
- for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
+ for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
if (VT == MVT::i64 && !Subtarget.is64Bit())
continue;
setOperationAction(ISD::SELECT, VT, Custom);
- setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::SETCC, VT, Custom);
}
// Custom action for SELECT MMX and expand action for SELECT_CC MMX
setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
- setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
+ setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
// NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
// LLVM/Clang supports zero-cost DWARF and SEH exception handling.
setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
@@ -511,19 +511,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
// Darwin ABI issue.
- for (auto VT : { MVT::i32, MVT::i64 }) {
+ for (auto VT : {MVT::i32, MVT::i64}) {
if (VT == MVT::i64 && !Subtarget.is64Bit())
continue;
- setOperationAction(ISD::ConstantPool , VT, Custom);
- setOperationAction(ISD::JumpTable , VT, Custom);
- setOperationAction(ISD::GlobalAddress , VT, Custom);
+ setOperationAction(ISD::ConstantPool, VT, Custom);
+ setOperationAction(ISD::JumpTable, VT, Custom);
+ setOperationAction(ISD::GlobalAddress, VT, Custom);
setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
- setOperationAction(ISD::ExternalSymbol , VT, Custom);
- setOperationAction(ISD::BlockAddress , VT, Custom);
+ setOperationAction(ISD::ExternalSymbol, VT, Custom);
+ setOperationAction(ISD::BlockAddress, VT, Custom);
}
// 64-bit shl, sra, srl (iff 32-bit x86)
- for (auto VT : { MVT::i32, MVT::i64 }) {
+ for (auto VT : {MVT::i32, MVT::i64}) {
if (VT == MVT::i64 && !Subtarget.is64Bit())
continue;
setOperationAction(ISD::SHL_PARTS, VT, Custom);
@@ -532,12 +532,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
if (Subtarget.hasSSEPrefetch())
- setOperationAction(ISD::PREFETCH , MVT::Other, Custom);
+ setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
- setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
// Expand certain atomics
- for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
+ for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
@@ -581,14 +581,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
- setOperationAction(ISD::VASTART , MVT::Other, Custom);
- setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
bool Is64Bit = Subtarget.is64Bit();
- setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
+ setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
- setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
- setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
@@ -598,7 +598,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
- auto setF16Action = [&] (MVT VT, LegalizeAction Action) {
+ auto setF16Action = [&](MVT VT, LegalizeAction Action) {
setOperationAction(ISD::FABS, VT, Action);
setOperationAction(ISD::FNEG, VT, Action);
setOperationAction(ISD::FCOPYSIGN, VT, Expand);
@@ -654,7 +654,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// non-optsize case.
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
- for (auto VT : { MVT::f32, MVT::f64 }) {
+ for (auto VT : {MVT::f32, MVT::f64}) {
// Use ANDPD to simulate FABS.
setOperationAction(ISD::FABS, VT, Custom);
@@ -669,8 +669,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FSUB, VT, Custom);
// We don't support sin/cos/fmod
- setOperationAction(ISD::FSIN , VT, Expand);
- setOperationAction(ISD::FCOS , VT, Expand);
+ setOperationAction(ISD::FSIN, VT, Expand);
+ setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
}
@@ -733,10 +733,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addRegisterClass(MVT::f64, &X86::RFP64RegClass);
// Use ANDPS to simulate FABS.
- setOperationAction(ISD::FABS , MVT::f32, Custom);
+ setOperationAction(ISD::FABS, MVT::f32, Custom);
// Use XORP to simulate FNEG.
- setOperationAction(ISD::FNEG , MVT::f32, Custom);
+ setOperationAction(ISD::FNEG, MVT::f32, Custom);
if (UseX87)
setOperationAction(ISD::UNDEF, MVT::f64, Expand);
@@ -747,8 +747,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
// We don't support sin/cos/fmod
- setOperationAction(ISD::FSIN , MVT::f32, Expand);
- setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSIN, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
if (UseX87) {
@@ -763,13 +763,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addRegisterClass(MVT::f64, &X86::RFP64RegClass);
addRegisterClass(MVT::f32, &X86::RFP32RegClass);
- for (auto VT : { MVT::f32, MVT::f64 }) {
- setOperationAction(ISD::UNDEF, VT, Expand);
+ for (auto VT : {MVT::f32, MVT::f64}) {
+ setOperationAction(ISD::UNDEF, VT, Expand);
setOperationAction(ISD::FCOPYSIGN, VT, Expand);
// Always expand sin/cos functions even though x87 has an instruction.
- setOperationAction(ISD::FSIN , VT, Expand);
- setOperationAction(ISD::FCOS , VT, Expand);
+ setOperationAction(ISD::FSIN, VT, Expand);
+ setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
}
}
@@ -781,7 +781,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addLegalFPImmediate(APFloat(+1.0f)); // FLD1
addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
- } else // SSE immediates.
+ } else // SSE immediates.
addLegalFPImmediate(APFloat(+0.0f)); // xorps
}
// Expand FP64 immediates into loads from the stack, save special cases.
@@ -791,7 +791,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addLegalFPImmediate(APFloat(+1.0)); // FLD1
addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
- } else // SSE immediates.
+ } else // SSE immediates.
addLegalFPImmediate(APFloat(+0.0)); // xorpd
}
// Support fp16 0 immediate.
@@ -799,18 +799,18 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addLegalFPImmediate(APFloat::getZero(APFloat::IEEEhalf()));
// Handle constrained floating-point operations of scalar.
- setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
// We don't support FMA.
setOperationAction(ISD::FMA, MVT::f64, Expand);
@@ -819,21 +819,21 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// f80 always uses X87.
if (UseX87) {
addRegisterClass(MVT::f80, &X86::RFP80RegClass);
- setOperationAction(ISD::UNDEF, MVT::f80, Expand);
+ setOperationAction(ISD::UNDEF, MVT::f80, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
{
APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
- addLegalFPImmediate(TmpFlt); // FLD0
+ addLegalFPImmediate(TmpFlt); // FLD0
TmpFlt.changeSign();
- addLegalFPImmediate(TmpFlt); // FLD0/FCHS
+ addLegalFPImmediate(TmpFlt); // FLD0/FCHS
bool ignored;
APFloat TmpFlt2(+1.0);
- TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
- &ignored);
- addLegalFPImmediate(TmpFlt2); // FLD1
+ TmpFlt2.convert(APFloat::x87DoubleExtended(),
+ APFloat::rmNearestTiesToEven, &ignored);
+ addLegalFPImmediate(TmpFlt2); // FLD1
TmpFlt2.changeSign();
- addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
+ addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
}
// Always expand sin/cos functions even though x87 has an instruction.
@@ -852,9 +852,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// clang-format on
setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
- setOperationAction(ISD::FCEIL, MVT::f80, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f80, Expand);
setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
- setOperationAction(ISD::FRINT, MVT::f80, Expand);
+ setOperationAction(ISD::FRINT, MVT::f80, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
setOperationAction(ISD::FROUNDEVEN, MVT::f80, Expand);
setOperationAction(ISD::FMA, MVT::f80, Expand);
@@ -864,12 +864,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::LLRINT, MVT::f80, Custom);
// Handle constrained floating-point operations of scalar.
- setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal);
- setOperationAction(ISD::STRICT_FSUB , MVT::f80, Legal);
- setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal);
- setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal);
- setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal);
- setOperationAction(ISD::FCANONICALIZE , MVT::f80, Custom);
+ setOperationAction(ISD::STRICT_FADD, MVT::f80, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::f80, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::f80, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::f80, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::f80, Legal);
+ setOperationAction(ISD::FCANONICALIZE, MVT::f80, Custom);
if (isTypeLegal(MVT::f16)) {
setOperationAction(ISD::FP_EXTEND, MVT::f80, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Custom);
@@ -888,16 +888,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps
- setOperationAction(ISD::FADD, MVT::f128, LibCall);
+ setOperationAction(ISD::FADD, MVT::f128, LibCall);
setOperationAction(ISD::STRICT_FADD, MVT::f128, LibCall);
- setOperationAction(ISD::FSUB, MVT::f128, LibCall);
+ setOperationAction(ISD::FSUB, MVT::f128, LibCall);
setOperationAction(ISD::STRICT_FSUB, MVT::f128, LibCall);
- setOperationAction(ISD::FDIV, MVT::f128, LibCall);
+ setOperationAction(ISD::FDIV, MVT::f128, LibCall);
setOperationAction(ISD::STRICT_FDIV, MVT::f128, LibCall);
- setOperationAction(ISD::FMUL, MVT::f128, LibCall);
+ setOperationAction(ISD::FMUL, MVT::f128, LibCall);
setOperationAction(ISD::STRICT_FMUL, MVT::f128, LibCall);
- setOperationAction(ISD::FMA, MVT::f128, LibCall);
- setOperationAction(ISD::STRICT_FMA, MVT::f128, LibCall);
+ setOperationAction(ISD::FMA, MVT::f128, LibCall);
+ setOperationAction(ISD::STRICT_FMA, MVT::f128, LibCall);
setOperationAction(ISD::FABS, MVT::f128, Custom);
setOperationAction(ISD::FNEG, MVT::f128, Custom);
@@ -913,10 +913,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FTAN, MVT::f128, LibCall);
// clang-format on
// No STRICT_FSINCOS
- setOperationAction(ISD::FSQRT, MVT::f128, LibCall);
+ setOperationAction(ISD::FSQRT, MVT::f128, LibCall);
setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall);
- setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Custom);
// We need to custom handle any FP_ROUND with an f128 input, but
// LegalizeDAG uses the result type to know when to run a custom handler.
@@ -946,10 +946,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
// Always use a library call for pow.
- setOperationAction(ISD::FPOW , MVT::f32 , Expand);
- setOperationAction(ISD::FPOW , MVT::f64 , Expand);
- setOperationAction(ISD::FPOW , MVT::f80 , Expand);
- setOperationAction(ISD::FPOW , MVT::f128 , Expand);
+ setOperationAction(ISD::FPOW, MVT::f32, Expand);
+ setOperationAction(ISD::FPOW, MVT::f64, Expand);
+ setOperationAction(ISD::FPOW, MVT::f80, Expand);
+ setOperationAction(ISD::FPOW, MVT::f128, Expand);
setOperationAction(ISD::FLOG, MVT::f80, Expand);
setOperationAction(ISD::FLOG2, MVT::f80, Expand);
@@ -961,9 +961,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
// Some FP actions are always expanded for vector types.
- for (auto VT : { MVT::v8f16, MVT::v16f16, MVT::v32f16,
- MVT::v4f32, MVT::v8f32, MVT::v16f32,
- MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
+ for (auto VT : {MVT::v8f16, MVT::v16f16, MVT::v32f16, MVT::v4f32, MVT::v8f32,
+ MVT::v16f32, MVT::v2f64, MVT::v4f64, MVT::v8f64}) {
// clang-format off
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
@@ -989,11 +988,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
- setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
- setOperationAction(ISD::FMA, VT, Expand);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Expand);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Expand);
+ setOperationAction(ISD::FMA, VT, Expand);
setOperationAction(ISD::FFLOOR, VT, Expand);
setOperationAction(ISD::FCEIL, VT, Expand);
setOperationAction(ISD::FTRUNC, VT, Expand);
@@ -1017,7 +1016,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_SINT, VT, Expand);
setOperationAction(ISD::UINT_TO_FP, VT, Expand);
setOperationAction(ISD::SINT_TO_FP, VT, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
setOperationAction(ISD::TRUNCATE, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
@@ -1055,30 +1054,30 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
: &X86::VR128RegClass);
- setOperationAction(ISD::FMAXIMUM, MVT::f32, Custom);
- setOperationAction(ISD::FMINIMUM, MVT::f32, Custom);
- setOperationAction(ISD::FMAXIMUMNUM, MVT::f32, Custom);
- setOperationAction(ISD::FMINIMUMNUM, MVT::f32, Custom);
-
- setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
- setOperationAction(ISD::FABS, MVT::v4f32, Custom);
- setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
- setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
+ setOperationAction(ISD::FMAXIMUM, MVT::f32, Custom);
+ setOperationAction(ISD::FMINIMUM, MVT::f32, Custom);
+ setOperationAction(ISD::FMAXIMUMNUM, MVT::f32, Custom);
+ setOperationAction(ISD::FMINIMUMNUM, MVT::f32, Custom);
+
+ setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
+ setOperationAction(ISD::FABS, MVT::v4f32, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
+ setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
- setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v4f32, Custom);
- setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
- setOperationAction(ISD::STORE, MVT::v2f32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
+ setOperationAction(ISD::STORE, MVT::v2f32, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v2f32, Custom);
- setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
- setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
- setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
- setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
- setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
@@ -1098,74 +1097,74 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
: &X86::VR128RegClass);
- for (auto VT : { MVT::f64, MVT::v4f32, MVT::v2f64 }) {
+ for (auto VT : {MVT::f64, MVT::v4f32, MVT::v2f64}) {
setOperationAction(ISD::FMAXIMUM, VT, Custom);
setOperationAction(ISD::FMINIMUM, VT, Custom);
setOperationAction(ISD::FMAXIMUMNUM, VT, Custom);
setOperationAction(ISD::FMINIMUMNUM, VT, Custom);
}
- for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
- MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
+ for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16,
+ MVT::v2i32}) {
setOperationAction(ISD::SDIV, VT, Custom);
setOperationAction(ISD::SREM, VT, Custom);
setOperationAction(ISD::UDIV, VT, Custom);
setOperationAction(ISD::UREM, VT, Custom);
}
- setOperationAction(ISD::MUL, MVT::v2i8, Custom);
- setOperationAction(ISD::MUL, MVT::v4i8, Custom);
- setOperationAction(ISD::MUL, MVT::v8i8, Custom);
-
- setOperationAction(ISD::MUL, MVT::v16i8, Custom);
- setOperationAction(ISD::MUL, MVT::v4i32, Custom);
- setOperationAction(ISD::MUL, MVT::v2i64, Custom);
- setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
- setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
- setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
- setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
- setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
- setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
- setOperationAction(ISD::MUL, MVT::v8i16, Legal);
- setOperationAction(ISD::AVGCEILU, MVT::v16i8, Legal);
- setOperationAction(ISD::AVGCEILU, MVT::v8i16, Legal);
-
- setOperationAction(ISD::SMULO, MVT::v16i8, Custom);
- setOperationAction(ISD::UMULO, MVT::v16i8, Custom);
- setOperationAction(ISD::UMULO, MVT::v2i32, Custom);
-
- setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
+ setOperationAction(ISD::MUL, MVT::v2i8, Custom);
+ setOperationAction(ISD::MUL, MVT::v4i8, Custom);
+ setOperationAction(ISD::MUL, MVT::v8i8, Custom);
+
+ setOperationAction(ISD::MUL, MVT::v16i8, Custom);
+ setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v2i64, Custom);
+ setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
+ setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
+ setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
+ setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
+ setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
+ setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
+ setOperationAction(ISD::MUL, MVT::v8i16, Legal);
+ setOperationAction(ISD::AVGCEILU, MVT::v16i8, Legal);
+ setOperationAction(ISD::AVGCEILU, MVT::v8i16, Legal);
+
+ setOperationAction(ISD::SMULO, MVT::v16i8, Custom);
+ setOperationAction(ISD::UMULO, MVT::v16i8, Custom);
+ setOperationAction(ISD::UMULO, MVT::v2i32, Custom);
+
+ setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v2f64, Custom);
- setOperationAction(ISD::FABS, MVT::v2f64, Custom);
- setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
+ setOperationAction(ISD::FABS, MVT::v2f64, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
setOperationAction(ISD::LRINT, MVT::v4f32, Custom);
setOperationAction(ISD::LRINT, MVT::v2i32, Custom);
- for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
+ for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
}
- setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
- setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
- setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
- setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
- setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
- setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
- setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
- setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
- setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom);
- setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom);
+ setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
+ setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
+ setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
+ setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
+ setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
+ setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
+ setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
+ setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
+ setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom);
+ setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
- for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
+ for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::ABS, VT, Custom);
@@ -1178,30 +1177,30 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setCondCodeAction(ISD::SETLE, VT, Custom);
}
- setOperationAction(ISD::SETCC, MVT::v2f64, Custom);
- setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
- setOperationAction(ISD::STRICT_FSETCC, MVT::v2f64, Custom);
- setOperationAction(ISD::STRICT_FSETCC, MVT::v4f32, Custom);
+ setOperationAction(ISD::SETCC, MVT::v2f64, Custom);
+ setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::v2f64, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::v4f32, Custom);
setOperationAction(ISD::STRICT_FSETCCS, MVT::v2f64, Custom);
setOperationAction(ISD::STRICT_FSETCCS, MVT::v4f32, Custom);
- for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
- setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
- setOperationAction(ISD::VSELECT, VT, Custom);
+ for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) {
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::VSELECT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
}
- for (auto VT : { MVT::v8f16, MVT::v2f64, MVT::v2i64 }) {
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
- setOperationAction(ISD::VSELECT, VT, Custom);
+ for (auto VT : {MVT::v8f16, MVT::v2f64, MVT::v2i64}) {
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::VSELECT, VT, Custom);
if (VT == MVT::v2i64 && !Subtarget.is64Bit())
continue;
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
}
setF16Action(MVT::v8f16, Expand);
@@ -1214,67 +1213,67 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Custom);
// Custom lower v2i64 and v2f64 selects.
- setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
- setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
- setOperationAction(ISD::SELECT, MVT::v4i32, Custom);
- setOperationAction(ISD::SELECT, MVT::v8i16, Custom);
- setOperationAction(ISD::SELECT, MVT::v8f16, Custom);
- setOperationAction(ISD::SELECT, MVT::v16i8, Custom);
-
- setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
- setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Custom);
- setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
+ setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
+ setOperationAction(ISD::SELECT, MVT::v4i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::v8i16, Custom);
+ setOperationAction(ISD::SELECT, MVT::v8f16, Custom);
+ setOperationAction(ISD::SELECT, MVT::v16i8, Custom);
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i32, Custom);
// Custom legalize these to avoid over promotion or custom promotion.
for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) {
- setOperationAction(ISD::FP_TO_SINT, VT, Custom);
- setOperationAction(ISD::FP_TO_UINT, VT, Custom);
+ setOperationAction(ISD::FP_TO_SINT, VT, Custom);
+ setOperationAction(ISD::FP_TO_UINT, VT, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom);
}
- setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
- setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
- setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i32, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
- setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i32, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
- setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Custom);
// Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
- setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
- setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f32, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
- setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f32, Custom);
- setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
- setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f32, Custom);
- setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
- setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f32, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f32, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f32, Custom);
// We want to legalize this to an f64 load rather than an i64 load on
// 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
// store.
- setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
- setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
- setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
- setOperationAction(ISD::STORE, MVT::v2i32, Custom);
- setOperationAction(ISD::STORE, MVT::v4i16, Custom);
- setOperationAction(ISD::STORE, MVT::v8i8, Custom);
+ setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
+ setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
+ setOperationAction(ISD::STORE, MVT::v2i32, Custom);
+ setOperationAction(ISD::STORE, MVT::v4i16, Custom);
+ setOperationAction(ISD::STORE, MVT::v8i8, Custom);
// Add 32-bit vector stores to help vectorization opportunities.
- setOperationAction(ISD::STORE, MVT::v2i16, Custom);
- setOperationAction(ISD::STORE, MVT::v4i8, Custom);
+ setOperationAction(ISD::STORE, MVT::v2i16, Custom);
+ setOperationAction(ISD::STORE, MVT::v4i8, Custom);
- setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
- setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
- setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
+ setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
+ setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
+ setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
if (!Subtarget.hasAVX512())
setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
@@ -1284,41 +1283,42 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v2i64, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v4i64, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i64, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i64, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
// In the customized shift lowering, the legal v4i32/v2i64 cases
// in AVX2 will be recognized.
- for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
- setOperationAction(ISD::SRL, VT, Custom);
- setOperationAction(ISD::SHL, VT, Custom);
- setOperationAction(ISD::SRA, VT, Custom);
- if (VT == MVT::v2i64) continue;
- setOperationAction(ISD::ROTL, VT, Custom);
- setOperationAction(ISD::ROTR, VT, Custom);
- setOperationAction(ISD::FSHL, VT, Custom);
- setOperationAction(ISD::FSHR, VT, Custom);
+ for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
+ setOperationAction(ISD::SRL, VT, Custom);
+ setOperationAction(ISD::SHL, VT, Custom);
+ setOperationAction(ISD::SRA, VT, Custom);
+ if (VT == MVT::v2i64)
+ continue;
+ setOperationAction(ISD::ROTL, VT, Custom);
+ setOperationAction(ISD::ROTR, VT, Custom);
+ setOperationAction(ISD::FSHL, VT, Custom);
+ setOperationAction(ISD::FSHR, VT, Custom);
}
- setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
- setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
- setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
- setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
- setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasGFNI()) {
@@ -1333,73 +1333,73 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
- setOperationAction(ISD::ABS, MVT::v16i8, Legal);
- setOperationAction(ISD::ABS, MVT::v8i16, Legal);
- setOperationAction(ISD::ABS, MVT::v4i32, Legal);
+ setOperationAction(ISD::ABS, MVT::v16i8, Legal);
+ setOperationAction(ISD::ABS, MVT::v8i16, Legal);
+ setOperationAction(ISD::ABS, MVT::v4i32, Legal);
for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
- setOperationAction(ISD::BITREVERSE, VT, Custom);
- setOperationAction(ISD::CTLZ, VT, Custom);
+ setOperationAction(ISD::BITREVERSE, VT, Custom);
+ setOperationAction(ISD::CTLZ, VT, Custom);
}
// These might be better off as horizontal vector ops.
- setOperationAction(ISD::ADD, MVT::i16, Custom);
- setOperationAction(ISD::ADD, MVT::i32, Custom);
- setOperationAction(ISD::SUB, MVT::i16, Custom);
- setOperationAction(ISD::SUB, MVT::i32, Custom);
+ setOperationAction(ISD::ADD, MVT::i16, Custom);
+ setOperationAction(ISD::ADD, MVT::i32, Custom);
+ setOperationAction(ISD::SUB, MVT::i16, Custom);
+ setOperationAction(ISD::SUB, MVT::i32, Custom);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
- setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
- setOperationAction(ISD::STRICT_FFLOOR, RoundedTy, Legal);
- setOperationAction(ISD::FCEIL, RoundedTy, Legal);
- setOperationAction(ISD::STRICT_FCEIL, RoundedTy, Legal);
- setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
- setOperationAction(ISD::STRICT_FTRUNC, RoundedTy, Legal);
- setOperationAction(ISD::FRINT, RoundedTy, Legal);
- setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal);
- setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
- setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal);
- setOperationAction(ISD::FROUNDEVEN, RoundedTy, Legal);
- setOperationAction(ISD::STRICT_FROUNDEVEN, RoundedTy, Legal);
-
- setOperationAction(ISD::FROUND, RoundedTy, Custom);
- }
-
- setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
- setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
- setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
- setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
- setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
- setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
- setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
- setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
-
- setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
- setOperationAction(ISD::SADDSAT, MVT::v2i64, Custom);
- setOperationAction(ISD::SSUBSAT, MVT::v2i64, Custom);
+ setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
+ setOperationAction(ISD::STRICT_FFLOOR, RoundedTy, Legal);
+ setOperationAction(ISD::FCEIL, RoundedTy, Legal);
+ setOperationAction(ISD::STRICT_FCEIL, RoundedTy, Legal);
+ setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
+ setOperationAction(ISD::STRICT_FTRUNC, RoundedTy, Legal);
+ setOperationAction(ISD::FRINT, RoundedTy, Legal);
+ setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal);
+ setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
+ setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal);
+ setOperationAction(ISD::FROUNDEVEN, RoundedTy, Legal);
+ setOperationAction(ISD::STRICT_FROUNDEVEN, RoundedTy, Legal);
+
+ setOperationAction(ISD::FROUND, RoundedTy, Custom);
+ }
+
+ setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
+ setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
+ setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
+ setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
+ setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
+ setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
+ setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
+ setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
+
+ setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
+ setOperationAction(ISD::SADDSAT, MVT::v2i64, Custom);
+ setOperationAction(ISD::SSUBSAT, MVT::v2i64, Custom);
// FIXME: Do we need to handle scalar-to-vector here?
- setOperationAction(ISD::MUL, MVT::v4i32, Legal);
- setOperationAction(ISD::SMULO, MVT::v2i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v4i32, Legal);
+ setOperationAction(ISD::SMULO, MVT::v2i32, Custom);
// We directly match byte blends in the backend as they match the VSELECT
// condition form.
- setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
// SSE41 brings specific instructions for doing vector sign extend even in
// cases where we don't have SRA.
- for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
+ for (auto VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
}
// SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
- for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
- setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
- setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
- setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
+ for (auto LoadExtOp : {ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
+ setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
+ setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
+ setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
@@ -1408,73 +1408,73 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget.is64Bit() && !Subtarget.hasAVX512()) {
// We need to scalarize v4i64->v432 uint_to_fp using cvtsi2ss, but we can
// do the pre and post work in the vector domain.
- setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i64, Custom);
// We need to mark SINT_TO_FP as Custom even though we want to expand it
// so that DAG combine doesn't try to turn it into uint_to_fp.
- setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i64, Custom);
}
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE42()) {
- setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom);
+ setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
- for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
- MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
+ for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v32i8,
+ MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
setOperationAction(ISD::ROTL, VT, Custom);
setOperationAction(ISD::ROTR, VT, Custom);
}
// XOP can efficiently perform BITREVERSE with VPPERM.
- for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
+ for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64})
setOperationAction(ISD::BITREVERSE, VT, Custom);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
bool HasInt256 = Subtarget.hasInt256();
- addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
- : &X86::VR256RegClass);
+ addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
+ : &X86::VR256RegClass);
addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
: &X86::VR256RegClass);
addRegisterClass(MVT::v16f16, Subtarget.hasVLX() ? &X86::VR256XRegClass
: &X86::VR256RegClass);
- addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
- : &X86::VR256RegClass);
- addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
- : &X86::VR256RegClass);
- addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
- : &X86::VR256RegClass);
- addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
- : &X86::VR256RegClass);
-
- for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
- setOperationAction(ISD::FFLOOR, VT, Legal);
- setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
- setOperationAction(ISD::FCEIL, VT, Legal);
- setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
- setOperationAction(ISD::FTRUNC, VT, Legal);
- setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
- setOperationAction(ISD::FRINT, VT, Legal);
- setOperationAction(ISD::STRICT_FRINT, VT, Legal);
- setOperationAction(ISD::FNEARBYINT, VT, Legal);
+ addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
+ : &X86::VR256RegClass);
+ addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
+ : &X86::VR256RegClass);
+ addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
+ : &X86::VR256RegClass);
+ addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
+ : &X86::VR256RegClass);
+
+ for (auto VT : {MVT::v8f32, MVT::v4f64}) {
+ setOperationAction(ISD::FFLOOR, VT, Legal);
+ setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
+ setOperationAction(ISD::FCEIL, VT, Legal);
+ setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
+ setOperationAction(ISD::FTRUNC, VT, Legal);
+ setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
+ setOperationAction(ISD::FRINT, VT, Legal);
+ setOperationAction(ISD::STRICT_FRINT, VT, Legal);
+ setOperationAction(ISD::FNEARBYINT, VT, Legal);
setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
- setOperationAction(ISD::FROUNDEVEN, VT, Legal);
+ setOperationAction(ISD::FROUNDEVEN, VT, Legal);
setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
- setOperationAction(ISD::FROUND, VT, Custom);
+ setOperationAction(ISD::FROUND, VT, Custom);
- setOperationAction(ISD::FNEG, VT, Custom);
- setOperationAction(ISD::FABS, VT, Custom);
- setOperationAction(ISD::FCOPYSIGN, VT, Custom);
+ setOperationAction(ISD::FNEG, VT, Custom);
+ setOperationAction(ISD::FABS, VT, Custom);
+ setOperationAction(ISD::FCOPYSIGN, VT, Custom);
- setOperationAction(ISD::FMAXIMUM, VT, Custom);
- setOperationAction(ISD::FMINIMUM, VT, Custom);
- setOperationAction(ISD::FMAXIMUMNUM, VT, Custom);
- setOperationAction(ISD::FMINIMUMNUM, VT, Custom);
+ setOperationAction(ISD::FMAXIMUM, VT, Custom);
+ setOperationAction(ISD::FMINIMUM, VT, Custom);
+ setOperationAction(ISD::FMAXIMUMNUM, VT, Custom);
+ setOperationAction(ISD::FMINIMUMNUM, VT, Custom);
setOperationAction(ISD::FCANONICALIZE, VT, Custom);
}
@@ -1483,81 +1483,82 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
// even though v8i16 is a legal type.
- setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
- setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
+ setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
+ setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i16, MVT::v8i32);
setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i16, MVT::v8i32);
- setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Custom);
- setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Custom);
-
- setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Custom);
- setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Custom);
- setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Expand);
- setOperationAction(ISD::FP_ROUND, MVT::v8f16, Expand);
- setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Custom);
- setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Custom);
-
- setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal);
- setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal);
- setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal);
- setOperationAction(ISD::STRICT_FSUB, MVT::v8f32, Legal);
- setOperationAction(ISD::STRICT_FSUB, MVT::v4f64, Legal);
- setOperationAction(ISD::STRICT_FMUL, MVT::v8f32, Legal);
- setOperationAction(ISD::STRICT_FMUL, MVT::v4f64, Legal);
- setOperationAction(ISD::STRICT_FDIV, MVT::v8f32, Legal);
- setOperationAction(ISD::STRICT_FDIV, MVT::v4f64, Legal);
- setOperationAction(ISD::STRICT_FSQRT, MVT::v8f32, Legal);
- setOperationAction(ISD::STRICT_FSQRT, MVT::v4f64, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Custom);
+
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::v8f16, Expand);
+ setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Custom);
+
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal);
+ setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::v8f32, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::v4f64, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::v8f32, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::v4f64, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::v8f32, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::v4f64, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v8f32, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v4f64, Legal);
if (!Subtarget.hasAVX512())
setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
// In the customized shift lowering, the legal v8i32/v4i64 cases
// in AVX2 will be recognized.
- for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
- setOperationAction(ISD::SRL, VT, Custom);
- setOperationAction(ISD::SHL, VT, Custom);
- setOperationAction(ISD::SRA, VT, Custom);
- setOperationAction(ISD::ABDS, VT, Custom);
- setOperationAction(ISD::ABDU, VT, Custom);
- if (VT == MVT::v4i64) continue;
- setOperationAction(ISD::ROTL, VT, Custom);
- setOperationAction(ISD::ROTR, VT, Custom);
- setOperationAction(ISD::FSHL, VT, Custom);
- setOperationAction(ISD::FSHR, VT, Custom);
+ for (auto VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
+ setOperationAction(ISD::SRL, VT, Custom);
+ setOperationAction(ISD::SHL, VT, Custom);
+ setOperationAction(ISD::SRA, VT, Custom);
+ setOperationAction(ISD::ABDS, VT, Custom);
+ setOperationAction(ISD::ABDU, VT, Custom);
+ if (VT == MVT::v4i64)
+ continue;
+ setOperationAction(ISD::ROTL, VT, Custom);
+ setOperationAction(ISD::ROTR, VT, Custom);
+ setOperationAction(ISD::FSHL, VT, Custom);
+ setOperationAction(ISD::FSHR, VT, Custom);
}
// These types need custom splitting if their input is a 128-bit vector.
- setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
-
- setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
- setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
- setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
- setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
- setOperationAction(ISD::SELECT, MVT::v16f16, Custom);
- setOperationAction(ISD::SELECT, MVT::v32i8, Custom);
- setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
-
- for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
- setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
- setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
- setOperationAction(ISD::ANY_EXTEND, VT, Custom);
- }
-
- setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v32i16, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v32i32, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v32i64, Custom);
-
- for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
- setOperationAction(ISD::SETCC, VT, Custom);
- setOperationAction(ISD::CTPOP, VT, Custom);
- setOperationAction(ISD::CTLZ, VT, Custom);
- setOperationAction(ISD::BITREVERSE, VT, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
+
+ setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
+ setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
+ setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
+ setOperationAction(ISD::SELECT, MVT::v16f16, Custom);
+ setOperationAction(ISD::SELECT, MVT::v32i8, Custom);
+ setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
+
+ for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
+ setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
+ setOperationAction(ISD::ANY_EXTEND, VT, Custom);
+ }
+
+ setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v32i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v32i32, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v32i64, Custom);
+
+ for (auto VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
+ setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::CTPOP, VT, Custom);
+ setOperationAction(ISD::CTLZ, VT, Custom);
+ setOperationAction(ISD::BITREVERSE, VT, Custom);
// The condition codes aren't legal in SSE/AVX and under AVX512 we use
// setcc all the way to isel and prefer SETGT in some isel patterns.
@@ -1565,64 +1566,64 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setCondCodeAction(ISD::SETLE, VT, Custom);
}
- setOperationAction(ISD::SETCC, MVT::v4f64, Custom);
- setOperationAction(ISD::SETCC, MVT::v8f32, Custom);
- setOperationAction(ISD::STRICT_FSETCC, MVT::v4f64, Custom);
- setOperationAction(ISD::STRICT_FSETCC, MVT::v8f32, Custom);
+ setOperationAction(ISD::SETCC, MVT::v4f64, Custom);
+ setOperationAction(ISD::SETCC, MVT::v8f32, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::v4f64, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::v8f32, Custom);
setOperationAction(ISD::STRICT_FSETCCS, MVT::v4f64, Custom);
setOperationAction(ISD::STRICT_FSETCCS, MVT::v8f32, Custom);
if (Subtarget.hasAnyFMA()) {
- for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
- MVT::v2f64, MVT::v4f64 }) {
+ for (auto VT : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32, MVT::v2f64,
+ MVT::v4f64}) {
setOperationAction(ISD::FMA, VT, Legal);
setOperationAction(ISD::STRICT_FMA, VT, Legal);
}
}
- for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
+ for (auto VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
}
- setOperationAction(ISD::MUL, MVT::v4i64, Custom);
- setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
- setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
- setOperationAction(ISD::MUL, MVT::v32i8, Custom);
-
- setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
- setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
- setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
- setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
- setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
- setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
- setOperationAction(ISD::AVGCEILU, MVT::v16i16, HasInt256 ? Legal : Custom);
- setOperationAction(ISD::AVGCEILU, MVT::v32i8, HasInt256 ? Legal : Custom);
-
- setOperationAction(ISD::SMULO, MVT::v32i8, Custom);
- setOperationAction(ISD::UMULO, MVT::v32i8, Custom);
-
- setOperationAction(ISD::ABS, MVT::v4i64, Custom);
- setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
- setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
- setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
- setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
-
- setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
- setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
- setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
- setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
- setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
- setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
- setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
- setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
- setOperationAction(ISD::UADDSAT, MVT::v8i32, Custom);
- setOperationAction(ISD::USUBSAT, MVT::v8i32, Custom);
- setOperationAction(ISD::UADDSAT, MVT::v4i64, Custom);
- setOperationAction(ISD::USUBSAT, MVT::v4i64, Custom);
-
- for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
- setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::MUL, MVT::v4i64, Custom);
+ setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::MUL, MVT::v32i8, Custom);
+
+ setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
+ setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
+ setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
+ setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
+ setOperationAction(ISD::AVGCEILU, MVT::v16i16, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::AVGCEILU, MVT::v32i8, HasInt256 ? Legal : Custom);
+
+ setOperationAction(ISD::SMULO, MVT::v32i8, Custom);
+ setOperationAction(ISD::UMULO, MVT::v32i8, Custom);
+
+ setOperationAction(ISD::ABS, MVT::v4i64, Custom);
+ setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
+ setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
+ setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
+ setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
+
+ setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::UADDSAT, MVT::v8i32, Custom);
+ setOperationAction(ISD::USUBSAT, MVT::v8i32, Custom);
+ setOperationAction(ISD::UADDSAT, MVT::v4i64, Custom);
+ setOperationAction(ISD::USUBSAT, MVT::v4i64, Custom);
+
+ for (auto VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32}) {
+ setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
@@ -1641,41 +1642,41 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, Custom);
// AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
- for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
+ for (auto LoadExtOp : {ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
- setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
- setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
- setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
- setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
- setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
+ setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
+ setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
+ setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
+ setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
+ setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
}
}
- for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
- MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
- setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
+ for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, MVT::v4f32,
+ MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
+ setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
setOperationAction(ISD::MSTORE, VT, Legal);
}
// Extract subvector is special because the value type
// (result) is 128-bit but the source is 256-bit wide.
- for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
- MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
+ for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v8f16,
+ MVT::v4f32, MVT::v2f64}) {
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
}
// Custom lower several nodes for 256-bit types.
- for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
- MVT::v16f16, MVT::v8f32, MVT::v4f64 }) {
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
- setOperationAction(ISD::VSELECT, VT, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v16f16,
+ MVT::v8f32, MVT::v4f64}) {
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::VSELECT, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
- setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
- setOperationAction(ISD::STORE, VT, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
+ setOperationAction(ISD::STORE, VT, Custom);
}
setF16Action(MVT::v16f16, Expand);
setOperationAction(ISD::FNEG, MVT::v16f16, Custom);
@@ -1693,21 +1694,21 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);
- for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
- MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
- setOperationAction(ISD::MGATHER, VT, Custom);
+ for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
+ MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64})
+ setOperationAction(ISD::MGATHER, VT, Custom);
}
}
if (!Subtarget.useSoftFloat() && !Subtarget.hasFP16() &&
Subtarget.hasF16C()) {
- for (MVT VT : { MVT::f16, MVT::v2f16, MVT::v4f16, MVT::v8f16 }) {
- setOperationAction(ISD::FP_ROUND, VT, Custom);
- setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom);
+ for (MVT VT : {MVT::f16, MVT::v2f16, MVT::v4f16, MVT::v8f16}) {
+ setOperationAction(ISD::FP_ROUND, VT, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom);
}
- for (MVT VT : { MVT::f32, MVT::v2f32, MVT::v4f32, MVT::v8f32 }) {
- setOperationAction(ISD::FP_EXTEND, VT, Custom);
- setOperationAction(ISD::STRICT_FP_EXTEND, VT, Custom);
+ for (MVT VT : {MVT::f32, MVT::v2f32, MVT::v4f32, MVT::v8f32}) {
+ setOperationAction(ISD::FP_EXTEND, VT, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, VT, Custom);
}
for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) {
setOperationPromotedToType(Opc, MVT::v8f16, MVT::v8f32);
@@ -1721,28 +1722,28 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// available with AVX512. 512-bit vectors are in a separate block controlled
// by useAVX512Regs.
if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
- addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
- addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
- addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
- addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
- addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
+ addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
+ addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
+ addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
+ addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
+ addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
- setOperationAction(ISD::SELECT, MVT::v1i1, Custom);
+ setOperationAction(ISD::SELECT, MVT::v1i1, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
-
- setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
- setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
- setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
- setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
- setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i1, MVT::v8i32);
- setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i1, MVT::v8i32);
- setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v4i1, MVT::v4i32);
- setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v4i1, MVT::v4i32);
- setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
- setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
- setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
+
+ setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
+ setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
+ setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
+ setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
+ setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i1, MVT::v8i32);
+ setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i1, MVT::v8i32);
+ setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v4i1, MVT::v4i32);
+ setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v4i1, MVT::v4i32);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v8f16, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v16f16, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v32f16, Custom);
@@ -1761,29 +1762,29 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
// Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
- for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
+ for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
- setOperationAction(ISD::ANY_EXTEND, VT, Custom);
+ setOperationAction(ISD::ANY_EXTEND, VT, Custom);
}
- for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 })
- setOperationAction(ISD::VSELECT, VT, Expand);
+ for (auto VT : {MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1})
+ setOperationAction(ISD::VSELECT, VT, Expand);
- for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
- setOperationAction(ISD::SETCC, VT, Custom);
- setOperationAction(ISD::SELECT, VT, Custom);
- setOperationAction(ISD::TRUNCATE, VT, Custom);
+ for (auto VT : {MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1}) {
+ setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Custom);
+ setOperationAction(ISD::TRUNCATE, VT, Custom);
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
}
- for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
+ for (auto VT : {MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1})
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
}
if (Subtarget.hasDQI() && Subtarget.hasVLX()) {
@@ -1801,30 +1802,30 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
- addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
- addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
+ addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
+ addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
addRegisterClass(MVT::v32f16, &X86::VR512RegClass);
- addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
+ addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
- setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
+ setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
- setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
- setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
- setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
+ setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
+ setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
+ setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
if (HasBWI)
setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
}
- for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
+ for (MVT VT : {MVT::v16f32, MVT::v8f64}) {
setOperationAction(ISD::FMAXIMUM, VT, Custom);
setOperationAction(ISD::FMINIMUM, VT, Custom);
setOperationAction(ISD::FMAXIMUMNUM, VT, Custom);
setOperationAction(ISD::FMINIMUMNUM, VT, Custom);
- setOperationAction(ISD::FNEG, VT, Custom);
- setOperationAction(ISD::FABS, VT, Custom);
- setOperationAction(ISD::FMA, VT, Legal);
+ setOperationAction(ISD::FNEG, VT, Custom);
+ setOperationAction(ISD::FABS, VT, Custom);
+ setOperationAction(ISD::FMA, VT, Legal);
setOperationAction(ISD::STRICT_FMA, VT, Legal);
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
setOperationAction(ISD::FCANONICALIZE, VT, Custom);
@@ -1836,93 +1837,93 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget.hasDQI())
setOperationAction(ISD::LLRINT, MVT::v8f64, Legal);
- for (MVT VT : { MVT::v16i1, MVT::v16i8 }) {
- setOperationPromotedToType(ISD::FP_TO_SINT , VT, MVT::v16i32);
- setOperationPromotedToType(ISD::FP_TO_UINT , VT, MVT::v16i32);
+ for (MVT VT : {MVT::v16i1, MVT::v16i8}) {
+ setOperationPromotedToType(ISD::FP_TO_SINT, VT, MVT::v16i32);
+ setOperationPromotedToType(ISD::FP_TO_UINT, VT, MVT::v16i32);
setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32);
setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32);
}
- for (MVT VT : { MVT::v16i16, MVT::v16i32 }) {
- setOperationAction(ISD::FP_TO_SINT, VT, Custom);
- setOperationAction(ISD::FP_TO_UINT, VT, Custom);
+ for (MVT VT : {MVT::v16i16, MVT::v16i32}) {
+ setOperationAction(ISD::FP_TO_SINT, VT, Custom);
+ setOperationAction(ISD::FP_TO_UINT, VT, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom);
}
- setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i32, Custom);
- setOperationAction(ISD::FP_EXTEND, MVT::v8f64, Custom);
- setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Custom);
-
- setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal);
- setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal);
- setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal);
- setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal);
- setOperationAction(ISD::STRICT_FMUL, MVT::v16f32, Legal);
- setOperationAction(ISD::STRICT_FMUL, MVT::v8f64, Legal);
- setOperationAction(ISD::STRICT_FDIV, MVT::v16f32, Legal);
- setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal);
- setOperationAction(ISD::STRICT_FSQRT, MVT::v16f32, Legal);
- setOperationAction(ISD::STRICT_FSQRT, MVT::v8f64, Legal);
- setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal);
-
- setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
- setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
- setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
- setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
- setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
+ setOperationAction(ISD::FP_EXTEND, MVT::v8f64, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Custom);
+
+ setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal);
+ setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::v16f32, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::v8f64, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::v16f32, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v16f32, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v8f64, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal);
+
+ setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
+ setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
+ setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
+ setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
+ setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
if (HasBWI)
- setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
+ setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
// With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
// to 512-bit rather than use the AVX2 instructions so that we can use
// k-masks.
if (!Subtarget.hasVLX()) {
for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
- MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
- setOperationAction(ISD::MLOAD, VT, Custom);
+ MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
+ setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Custom);
}
}
- setOperationAction(ISD::TRUNCATE, MVT::v8i32, Legal);
- setOperationAction(ISD::TRUNCATE, MVT::v16i16, Legal);
- setOperationAction(ISD::TRUNCATE, MVT::v32i8, HasBWI ? Legal : Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v8i32, Legal);
+ setOperationAction(ISD::TRUNCATE, MVT::v16i16, Legal);
+ setOperationAction(ISD::TRUNCATE, MVT::v32i8, HasBWI ? Legal : Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
- setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
- setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
- setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
if (HasBWI) {
// Extends from v64i1 masks to 512-bit vectors.
- setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
- setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
- }
-
- for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
- setOperationAction(ISD::FFLOOR, VT, Legal);
- setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
- setOperationAction(ISD::FCEIL, VT, Legal);
- setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
- setOperationAction(ISD::FTRUNC, VT, Legal);
- setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
- setOperationAction(ISD::FRINT, VT, Legal);
- setOperationAction(ISD::STRICT_FRINT, VT, Legal);
- setOperationAction(ISD::FNEARBYINT, VT, Legal);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
+ }
+
+ for (auto VT : {MVT::v16f32, MVT::v8f64}) {
+ setOperationAction(ISD::FFLOOR, VT, Legal);
+ setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
+ setOperationAction(ISD::FCEIL, VT, Legal);
+ setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
+ setOperationAction(ISD::FTRUNC, VT, Legal);
+ setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
+ setOperationAction(ISD::FRINT, VT, Legal);
+ setOperationAction(ISD::STRICT_FRINT, VT, Legal);
+ setOperationAction(ISD::FNEARBYINT, VT, Legal);
setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
- setOperationAction(ISD::FROUNDEVEN, VT, Legal);
+ setOperationAction(ISD::FROUNDEVEN, VT, Legal);
setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
- setOperationAction(ISD::FROUND, VT, Custom);
+ setOperationAction(ISD::FROUND, VT, Custom);
}
for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
@@ -1932,36 +1933,36 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ADD, MVT::v32i16, HasBWI ? Legal : Custom);
setOperationAction(ISD::SUB, MVT::v32i16, HasBWI ? Legal : Custom);
- setOperationAction(ISD::ADD, MVT::v64i8, HasBWI ? Legal : Custom);
- setOperationAction(ISD::SUB, MVT::v64i8, HasBWI ? Legal : Custom);
+ setOperationAction(ISD::ADD, MVT::v64i8, HasBWI ? Legal : Custom);
+ setOperationAction(ISD::SUB, MVT::v64i8, HasBWI ? Legal : Custom);
- setOperationAction(ISD::MUL, MVT::v8i64, Custom);
+ setOperationAction(ISD::MUL, MVT::v8i64, Custom);
setOperationAction(ISD::MUL, MVT::v16i32, Legal);
setOperationAction(ISD::MUL, MVT::v32i16, HasBWI ? Legal : Custom);
- setOperationAction(ISD::MUL, MVT::v64i8, Custom);
+ setOperationAction(ISD::MUL, MVT::v64i8, Custom);
setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
setOperationAction(ISD::MULHS, MVT::v32i16, HasBWI ? Legal : Custom);
setOperationAction(ISD::MULHU, MVT::v32i16, HasBWI ? Legal : Custom);
- setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
- setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
+ setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
+ setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
setOperationAction(ISD::AVGCEILU, MVT::v32i16, HasBWI ? Legal : Custom);
- setOperationAction(ISD::AVGCEILU, MVT::v64i8, HasBWI ? Legal : Custom);
+ setOperationAction(ISD::AVGCEILU, MVT::v64i8, HasBWI ? Legal : Custom);
setOperationAction(ISD::SMULO, MVT::v64i8, Custom);
setOperationAction(ISD::UMULO, MVT::v64i8, Custom);
- for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
- setOperationAction(ISD::SRL, VT, Custom);
- setOperationAction(ISD::SHL, VT, Custom);
- setOperationAction(ISD::SRA, VT, Custom);
- setOperationAction(ISD::ROTL, VT, Custom);
- setOperationAction(ISD::ROTR, VT, Custom);
- setOperationAction(ISD::SETCC, VT, Custom);
- setOperationAction(ISD::ABDS, VT, Custom);
- setOperationAction(ISD::ABDU, VT, Custom);
- setOperationAction(ISD::BITREVERSE, VT, Custom);
+ for (auto VT : {MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
+ setOperationAction(ISD::SRL, VT, Custom);
+ setOperationAction(ISD::SHL, VT, Custom);
+ setOperationAction(ISD::SRA, VT, Custom);
+ setOperationAction(ISD::ROTL, VT, Custom);
+ setOperationAction(ISD::ROTR, VT, Custom);
+ setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::ABDS, VT, Custom);
+ setOperationAction(ISD::ABDU, VT, Custom);
+ setOperationAction(ISD::BITREVERSE, VT, Custom);
// The condition codes aren't legal in SSE/AVX and under AVX512 we use
// setcc all the way to isel and prefer SETGT in some isel patterns.
@@ -1969,82 +1970,83 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setCondCodeAction(ISD::SETLE, VT, Custom);
}
- setOperationAction(ISD::SETCC, MVT::v8f64, Custom);
- setOperationAction(ISD::SETCC, MVT::v16f32, Custom);
- setOperationAction(ISD::STRICT_FSETCC, MVT::v8f64, Custom);
- setOperationAction(ISD::STRICT_FSETCC, MVT::v16f32, Custom);
+ setOperationAction(ISD::SETCC, MVT::v8f64, Custom);
+ setOperationAction(ISD::SETCC, MVT::v16f32, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::v8f64, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::v16f32, Custom);
setOperationAction(ISD::STRICT_FSETCCS, MVT::v8f64, Custom);
setOperationAction(ISD::STRICT_FSETCCS, MVT::v16f32, Custom);
- for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
- setOperationAction(ISD::SMAX, VT, Legal);
- setOperationAction(ISD::UMAX, VT, Legal);
- setOperationAction(ISD::SMIN, VT, Legal);
- setOperationAction(ISD::UMIN, VT, Legal);
- setOperationAction(ISD::ABS, VT, Legal);
- setOperationAction(ISD::CTPOP, VT, Custom);
- }
-
- for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
- setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom);
- setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom);
- setOperationAction(ISD::CTLZ, VT, Custom);
- setOperationAction(ISD::SMAX, VT, HasBWI ? Legal : Custom);
- setOperationAction(ISD::UMAX, VT, HasBWI ? Legal : Custom);
- setOperationAction(ISD::SMIN, VT, HasBWI ? Legal : Custom);
- setOperationAction(ISD::UMIN, VT, HasBWI ? Legal : Custom);
+ for (auto VT : {MVT::v16i32, MVT::v8i64}) {
+ setOperationAction(ISD::SMAX, VT, Legal);
+ setOperationAction(ISD::UMAX, VT, Legal);
+ setOperationAction(ISD::SMIN, VT, Legal);
+ setOperationAction(ISD::UMIN, VT, Legal);
+ setOperationAction(ISD::ABS, VT, Legal);
+ setOperationAction(ISD::CTPOP, VT, Custom);
+ }
+
+ for (auto VT : {MVT::v64i8, MVT::v32i16}) {
+ setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom);
+ setOperationAction(ISD::CTPOP, VT,
+ Subtarget.hasBITALG() ? Legal : Custom);
+ setOperationAction(ISD::CTLZ, VT, Custom);
+ setOperationAction(ISD::SMAX, VT, HasBWI ? Legal : Custom);
+ setOperationAction(ISD::UMAX, VT, HasBWI ? Legal : Custom);
+ setOperationAction(ISD::SMIN, VT, HasBWI ? Legal : Custom);
+ setOperationAction(ISD::UMIN, VT, HasBWI ? Legal : Custom);
setOperationAction(ISD::UADDSAT, VT, HasBWI ? Legal : Custom);
setOperationAction(ISD::SADDSAT, VT, HasBWI ? Legal : Custom);
setOperationAction(ISD::USUBSAT, VT, HasBWI ? Legal : Custom);
setOperationAction(ISD::SSUBSAT, VT, HasBWI ? Legal : Custom);
}
- setOperationAction(ISD::FSHL, MVT::v64i8, Custom);
- setOperationAction(ISD::FSHR, MVT::v64i8, Custom);
- setOperationAction(ISD::FSHL, MVT::v32i16, Custom);
- setOperationAction(ISD::FSHR, MVT::v32i16, Custom);
- setOperationAction(ISD::FSHL, MVT::v16i32, Custom);
- setOperationAction(ISD::FSHR, MVT::v16i32, Custom);
+ setOperationAction(ISD::FSHL, MVT::v64i8, Custom);
+ setOperationAction(ISD::FSHR, MVT::v64i8, Custom);
+ setOperationAction(ISD::FSHL, MVT::v32i16, Custom);
+ setOperationAction(ISD::FSHR, MVT::v32i16, Custom);
+ setOperationAction(ISD::FSHL, MVT::v16i32, Custom);
+ setOperationAction(ISD::FSHR, MVT::v16i32, Custom);
if (Subtarget.hasDQI() || Subtarget.hasFP16())
for (auto Opc : {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP,
ISD::STRICT_UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT})
- setOperationAction(Opc, MVT::v8i64, Custom);
+ setOperationAction(Opc, MVT::v8i64, Custom);
if (Subtarget.hasDQI())
- setOperationAction(ISD::MUL, MVT::v8i64, Legal);
+ setOperationAction(ISD::MUL, MVT::v8i64, Legal);
if (Subtarget.hasCDI()) {
// NonVLX sub-targets extend 128/256 vectors to use the 512 version.
- for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
- setOperationAction(ISD::CTLZ, VT, Legal);
+ for (auto VT : {MVT::v16i32, MVT::v8i64}) {
+ setOperationAction(ISD::CTLZ, VT, Legal);
}
} // Subtarget.hasCDI()
if (Subtarget.hasVPOPCNTDQ()) {
- for (auto VT : { MVT::v16i32, MVT::v8i64 })
+ for (auto VT : {MVT::v16i32, MVT::v8i64})
setOperationAction(ISD::CTPOP, VT, Legal);
}
// Extract subvector is special because the value type
// (result) is 256-bit but the source is 512-bit wide.
// 128-bit was made Legal under AVX1.
- for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
- MVT::v16f16, MVT::v8f32, MVT::v4f64 })
+ for (auto VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
+ MVT::v16f16, MVT::v8f32, MVT::v4f64})
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
- for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,
- MVT::v32f16, MVT::v16f32, MVT::v8f64 }) {
- setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
- setOperationAction(ISD::SELECT, VT, Custom);
- setOperationAction(ISD::VSELECT, VT, Custom);
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ for (auto VT : {MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,
+ MVT::v32f16, MVT::v16f32, MVT::v8f64}) {
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
+ setOperationAction(ISD::SELECT, VT, Custom);
+ setOperationAction(ISD::VSELECT, VT, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
}
setF16Action(MVT::v32f16, Expand);
setOperationAction(ISD::FP_ROUND, MVT::v16f16, Custom);
@@ -2055,20 +2057,20 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationPromotedToType(Opc, MVT::v32f16, MVT::v32f32);
setOperationAction(ISD::SETCC, MVT::v32f16, Custom);
- for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
- setOperationAction(ISD::MLOAD, VT, Legal);
- setOperationAction(ISD::MSTORE, VT, Legal);
- setOperationAction(ISD::MGATHER, VT, Custom);
- setOperationAction(ISD::MSCATTER, VT, Custom);
+ for (auto VT : {MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64}) {
+ setOperationAction(ISD::MLOAD, VT, Legal);
+ setOperationAction(ISD::MSTORE, VT, Legal);
+ setOperationAction(ISD::MGATHER, VT, Custom);
+ setOperationAction(ISD::MSCATTER, VT, Custom);
}
if (HasBWI) {
- for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
- setOperationAction(ISD::MLOAD, VT, Legal);
- setOperationAction(ISD::MSTORE, VT, Legal);
+ for (auto VT : {MVT::v64i8, MVT::v32i16}) {
+ setOperationAction(ISD::MLOAD, VT, Legal);
+ setOperationAction(ISD::MSTORE, VT, Legal);
}
} else {
setOperationAction(ISD::STORE, MVT::v32i16, Custom);
- setOperationAction(ISD::STORE, MVT::v64i8, Custom);
+ setOperationAction(ISD::STORE, MVT::v64i8, Custom);
}
if (Subtarget.hasVBMI2()) {
@@ -2084,7 +2086,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FNEG, MVT::v32f16, Custom);
setOperationAction(ISD::FABS, MVT::v32f16, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::v32f16, Custom);
- }// useAVX512Regs
+ } // useAVX512Regs
if (!Subtarget.useSoftFloat() && Subtarget.hasVBMI2()) {
for (auto VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v16i16, MVT::v8i32,
@@ -2105,9 +2107,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// These operations are handled on non-VLX by artificially widening in
// isel patterns.
- setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32, Custom);
- setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Custom);
- setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom);
if (Subtarget.hasDQI()) {
// Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
@@ -2116,31 +2118,31 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) &&
"Unexpected operation action!");
// v2i64 FP_TO_S/UINT(v2f32) custom conversion.
- setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f32, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f32, Custom);
}
- for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
+ for (auto VT : {MVT::v2i64, MVT::v4i64}) {
setOperationAction(ISD::SMAX, VT, Legal);
setOperationAction(ISD::UMAX, VT, Legal);
setOperationAction(ISD::SMIN, VT, Legal);
setOperationAction(ISD::UMIN, VT, Legal);
- setOperationAction(ISD::ABS, VT, Legal);
+ setOperationAction(ISD::ABS, VT, Legal);
}
- for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
- setOperationAction(ISD::ROTL, VT, Custom);
- setOperationAction(ISD::ROTR, VT, Custom);
+ for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64}) {
+ setOperationAction(ISD::ROTL, VT, Custom);
+ setOperationAction(ISD::ROTR, VT, Custom);
}
// Custom legalize 2x32 to get a little better code.
setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom);
setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom);
- for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
- MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
+ for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, MVT::v4f32,
+ MVT::v8f32, MVT::v2f64, MVT::v4f64})
setOperationAction(ISD::MSCATTER, VT, Custom);
if (Subtarget.hasDQI()) {
@@ -2163,13 +2165,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
}
- for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
- setOperationAction(ISD::CTLZ, VT, Legal);
+ for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64}) {
+ setOperationAction(ISD::CTLZ, VT, Legal);
}
} // Subtarget.hasCDI()
if (Subtarget.hasVPOPCNTDQ()) {
- for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
+ for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64})
setOperationAction(ISD::CTPOP, VT, Legal);
}
@@ -2206,33 +2208,33 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// This block control legalization of v32i1/v64i1 which are available with
// AVX512BW..
if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
- addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
- addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
+ addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
+ addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
- for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
- setOperationAction(ISD::VSELECT, VT, Expand);
- setOperationAction(ISD::TRUNCATE, VT, Custom);
- setOperationAction(ISD::SETCC, VT, Custom);
+ for (auto VT : {MVT::v32i1, MVT::v64i1}) {
+ setOperationAction(ISD::VSELECT, VT, Expand);
+ setOperationAction(ISD::TRUNCATE, VT, Custom);
+ setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::SELECT, VT, Custom);
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
}
- for (auto VT : { MVT::v16i1, MVT::v32i1 })
+ for (auto VT : {MVT::v16i1, MVT::v32i1})
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
// Extends from v32i1 masks to 256-bit vectors.
- setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
- setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
for (auto VT : {MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16,
MVT::v16f16, MVT::v8f16}) {
- setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
+ setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
}
@@ -2241,119 +2243,119 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
if (Subtarget.hasBITALG()) {
- for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
+ for (auto VT : {MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16})
setOperationAction(ISD::CTPOP, VT, Legal);
}
}
if (!Subtarget.useSoftFloat() && Subtarget.hasFP16()) {
- auto setGroup = [&] (MVT VT) {
- setOperationAction(ISD::FADD, VT, Legal);
- setOperationAction(ISD::STRICT_FADD, VT, Legal);
- setOperationAction(ISD::FSUB, VT, Legal);
- setOperationAction(ISD::STRICT_FSUB, VT, Legal);
- setOperationAction(ISD::FMUL, VT, Legal);
- setOperationAction(ISD::STRICT_FMUL, VT, Legal);
- setOperationAction(ISD::FDIV, VT, Legal);
- setOperationAction(ISD::STRICT_FDIV, VT, Legal);
- setOperationAction(ISD::FSQRT, VT, Legal);
- setOperationAction(ISD::STRICT_FSQRT, VT, Legal);
-
- setOperationAction(ISD::FFLOOR, VT, Legal);
- setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
- setOperationAction(ISD::FCEIL, VT, Legal);
- setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
- setOperationAction(ISD::FTRUNC, VT, Legal);
- setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
- setOperationAction(ISD::FRINT, VT, Legal);
- setOperationAction(ISD::STRICT_FRINT, VT, Legal);
- setOperationAction(ISD::FNEARBYINT, VT, Legal);
- setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
+ auto setGroup = [&](MVT VT) {
+ setOperationAction(ISD::FADD, VT, Legal);
+ setOperationAction(ISD::STRICT_FADD, VT, Legal);
+ setOperationAction(ISD::FSUB, VT, Legal);
+ setOperationAction(ISD::STRICT_FSUB, VT, Legal);
+ setOperationAction(ISD::FMUL, VT, Legal);
+ setOperationAction(ISD::STRICT_FMUL, VT, Legal);
+ setOperationAction(ISD::FDIV, VT, Legal);
+ setOperationAction(ISD::STRICT_FDIV, VT, Legal);
+ setOperationAction(ISD::FSQRT, VT, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, VT, Legal);
+
+ setOperationAction(ISD::FFLOOR, VT, Legal);
+ setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
+ setOperationAction(ISD::FCEIL, VT, Legal);
+ setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
+ setOperationAction(ISD::FTRUNC, VT, Legal);
+ setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
+ setOperationAction(ISD::FRINT, VT, Legal);
+ setOperationAction(ISD::STRICT_FRINT, VT, Legal);
+ setOperationAction(ISD::FNEARBYINT, VT, Legal);
+ setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
setOperationAction(ISD::FROUNDEVEN, VT, Legal);
setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
- setOperationAction(ISD::FROUND, VT, Custom);
+ setOperationAction(ISD::FROUND, VT, Custom);
- setOperationAction(ISD::LOAD, VT, Legal);
- setOperationAction(ISD::STORE, VT, Legal);
+ setOperationAction(ISD::LOAD, VT, Legal);
+ setOperationAction(ISD::STORE, VT, Legal);
- setOperationAction(ISD::FMA, VT, Legal);
- setOperationAction(ISD::STRICT_FMA, VT, Legal);
- setOperationAction(ISD::VSELECT, VT, Legal);
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::SELECT, VT, Custom);
+ setOperationAction(ISD::FMA, VT, Legal);
+ setOperationAction(ISD::STRICT_FMA, VT, Legal);
+ setOperationAction(ISD::VSELECT, VT, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Custom);
- setOperationAction(ISD::FNEG, VT, Custom);
- setOperationAction(ISD::FABS, VT, Custom);
- setOperationAction(ISD::FCOPYSIGN, VT, Custom);
+ setOperationAction(ISD::FNEG, VT, Custom);
+ setOperationAction(ISD::FABS, VT, Custom);
+ setOperationAction(ISD::FCOPYSIGN, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
- setOperationAction(ISD::SETCC, VT, Custom);
- setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
- setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
+ setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
+ setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
};
// AVX512_FP16 scalar operations
setGroup(MVT::f16);
- setOperationAction(ISD::FREM, MVT::f16, Promote);
- setOperationAction(ISD::STRICT_FREM, MVT::f16, Promote);
- setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
- setOperationAction(ISD::BR_CC, MVT::f16, Expand);
- setOperationAction(ISD::STRICT_FROUND, MVT::f16, Promote);
- setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
- setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Legal);
- setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
- setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
- setOperationAction(ISD::FMAXIMUM, MVT::f16, Custom);
- setOperationAction(ISD::FMINIMUM, MVT::f16, Custom);
- setOperationAction(ISD::FMAXIMUMNUM, MVT::f16, Custom);
- setOperationAction(ISD::FMINIMUMNUM, MVT::f16, Custom);
- setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
- setOperationAction(ISD::LRINT, MVT::f16, Legal);
- setOperationAction(ISD::LLRINT, MVT::f16, Legal);
+ setOperationAction(ISD::FREM, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FREM, MVT::f16, Promote);
+ setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f16, Expand);
+ setOperationAction(ISD::STRICT_FROUND, MVT::f16, Promote);
+ setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
+ setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Legal);
+ setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
+ setOperationAction(ISD::FMAXIMUM, MVT::f16, Custom);
+ setOperationAction(ISD::FMINIMUM, MVT::f16, Custom);
+ setOperationAction(ISD::FMAXIMUMNUM, MVT::f16, Custom);
+ setOperationAction(ISD::FMINIMUMNUM, MVT::f16, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
+ setOperationAction(ISD::LRINT, MVT::f16, Legal);
+ setOperationAction(ISD::LLRINT, MVT::f16, Legal);
setCondCodeAction(ISD::SETOEQ, MVT::f16, Expand);
setCondCodeAction(ISD::SETUNE, MVT::f16, Expand);
if (Subtarget.useAVX512Regs()) {
setGroup(MVT::v32f16);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32f16, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::v32i16, Legal);
- setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v32i16, Legal);
- setOperationAction(ISD::UINT_TO_FP, MVT::v32i16, Legal);
- setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v32i16, Legal);
- setOperationAction(ISD::FP_ROUND, MVT::v16f16, Legal);
- setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Legal);
- setOperationAction(ISD::FP_EXTEND, MVT::v16f32, Custom);
- setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal);
- setOperationAction(ISD::FP_EXTEND, MVT::v8f64, Custom);
- setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32f16, Custom);
-
- setOperationAction(ISD::FP_TO_SINT, MVT::v32i16, Custom);
- setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v32i16, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v32i16, Custom);
- setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v32i16, Custom);
- setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i8, MVT::v32i16);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32f16, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v32i16, Legal);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v32i16, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v32i16, Legal);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v32i16, Legal);
+ setOperationAction(ISD::FP_ROUND, MVT::v16f16, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Legal);
+ setOperationAction(ISD::FP_EXTEND, MVT::v16f32, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal);
+ setOperationAction(ISD::FP_EXTEND, MVT::v8f64, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32f16, Custom);
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::v32i16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v32i16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v32i16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v32i16, Custom);
+ setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i8, MVT::v32i16);
setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i8,
MVT::v32i16);
- setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i8, MVT::v32i16);
+ setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i8, MVT::v32i16);
setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i8,
MVT::v32i16);
- setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i1, MVT::v32i16);
+ setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i1, MVT::v32i16);
setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i1,
MVT::v32i16);
- setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i1, MVT::v32i16);
+ setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i1, MVT::v32i16);
setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i1,
MVT::v32i16);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f16, Legal);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32f16, Legal);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v32f16, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f16, Legal);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32f16, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v32f16, Custom);
- setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Legal);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Legal);
setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Legal);
setOperationAction(ISD::FMINIMUM, MVT::v32f16, Custom);
@@ -2364,40 +2366,40 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::LLRINT, MVT::v8f16, Legal);
}
- setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
- setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i16, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom);
- setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i16, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i16, Custom);
if (Subtarget.hasVLX()) {
setGroup(MVT::v8f16);
setGroup(MVT::v16f16);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8f16, Legal);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16f16, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Legal);
- setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i16, Legal);
- setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Legal);
- setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i16, Legal);
- setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Legal);
- setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i16, Legal);
- setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Legal);
- setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i16, Legal);
-
- setOperationAction(ISD::FP_ROUND, MVT::v8f16, Legal);
- setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f16, Legal);
- setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Custom);
- setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal);
- setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Custom);
- setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8f16, Legal);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16f16, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Legal);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i16, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Legal);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i16, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Legal);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i16, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Legal);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i16, Legal);
+
+ setOperationAction(ISD::FP_ROUND, MVT::v8f16, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f16, Legal);
+ setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal);
+ setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal);
// INSERT_VECTOR_ELT v8f16 extended to VECTOR_SHUFFLE
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8f16, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16f16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8f16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16f16, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f16, Legal);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16f16, Legal);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f16, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f16, Legal);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16f16, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f16, Custom);
setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Legal);
setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Legal);
@@ -2405,7 +2407,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Legal);
// Need to custom widen these to prevent scalarization.
- setOperationAction(ISD::LOAD, MVT::v4f16, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4f16, Custom);
setOperationAction(ISD::STORE, MVT::v4f16, Custom);
setOperationAction(ISD::FMINIMUM, MVT::v8f16, Custom);
@@ -2498,52 +2500,52 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
- setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
+ setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
- setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
+ setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
- setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
+ setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
- setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
+ setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
if (Subtarget.hasBWI()) {
- setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
- setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
+ setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
+ setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
}
if (Subtarget.hasFP16()) {
// vcvttph2[u]dq v4f16 -> v4i32/64, v2f16 -> v2i32/64
- setOperationAction(ISD::FP_TO_SINT, MVT::v2f16, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2f16, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f16, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v2f16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2f16, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f16, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::v4f16, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4f16, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f16, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v4f16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4f16, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f16, Custom);
// vcvt[u]dq2ph v4i32/64 -> v4f16, v2i32/64 -> v2f16
- setOperationAction(ISD::SINT_TO_FP, MVT::v2f16, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2f16, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f16, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v2f16, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2f16, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f16, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::v4f16, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4f16, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f16, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v4f16, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4f16, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f16, Custom);
// vcvtps2phx v4f32 -> v4f16, v2f32 -> v2f16
- setOperationAction(ISD::FP_ROUND, MVT::v2f16, Custom);
- setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f16, Custom);
- setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);
- setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f16, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::v2f16, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f16, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f16, Custom);
// vcvtph2psx v4f16 -> v4f32, v2f16 -> v2f32
- setOperationAction(ISD::FP_EXTEND, MVT::v2f16, Custom);
- setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f16, Custom);
- setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Custom);
- setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f16, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::v2f16, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f16, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f16, Custom);
}
}
@@ -2565,7 +2567,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// FIXME: We really should do custom legalization for addition and
// subtraction on x86-32 once PR3203 is fixed. We really can't do much better
// than generic legalization for 64-bit multiplication-with-overflow, though.
- for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
+ for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
if (VT == MVT::i64 && !Subtarget.is64Bit())
continue;
// Add/Sub/Mul with overflow operations are custom lowered.
@@ -2852,8 +2854,9 @@ static bool isLogicOp(unsigned Opcode) {
}
static bool isTargetShuffle(unsigned Opcode) {
- switch(Opcode) {
- default: return false;
+ switch (Opcode) {
+ default:
+ return false;
case X86ISD::BLENDI:
case X86ISD::PSHUFB:
case X86ISD::PSHUFD:
@@ -2894,7 +2897,8 @@ static bool isTargetShuffle(unsigned Opcode) {
static bool isTargetShuffleVariableMask(unsigned Opcode) {
switch (Opcode) {
- default: return false;
+ default:
+ return false;
// Target Shuffles.
case X86ISD::PSHUFB:
case X86ISD::VPERMILPV:
@@ -2920,9 +2924,8 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
if (ReturnAddrIndex == 0) {
// Set up a frame object for the return address.
unsigned SlotSize = RegInfo->getSlotSize();
- ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
- -(int64_t)SlotSize,
- false);
+ ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(
+ SlotSize, -(int64_t)SlotSize, false);
FuncInfo->setRAIndex(ReturnAddrIndex);
}
@@ -2980,7 +2983,7 @@ static bool isX86CCSigned(X86::CondCode X86CC) {
static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
switch (SetCCOpcode) {
- // clang-format off
+ // clang-format off
default: llvm_unreachable("Invalid integer condition!");
case ISD::SETEQ: return X86::COND_E;
case ISD::SETGT: return X86::COND_G;
@@ -2992,7 +2995,7 @@ static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
case ISD::SETUGT: return X86::COND_A;
case ISD::SETULE: return X86::COND_BE;
case ISD::SETUGE: return X86::COND_AE;
- // clang-format on
+ // clang-format on
}
}
@@ -3030,14 +3033,14 @@ static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
// First determine if it is required or is profitable to flip the operands.
// If LHS is a foldable load, but RHS is not, flip the condition.
- if (ISD::isNON_EXTLoad(LHS.getNode()) &&
- !ISD::isNON_EXTLoad(RHS.getNode())) {
+ if (ISD::isNON_EXTLoad(LHS.getNode()) && !ISD::isNON_EXTLoad(RHS.getNode())) {
SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
std::swap(LHS, RHS);
}
switch (SetCCOpcode) {
- default: break;
+ default:
+ break;
case ISD::SETOLT:
case ISD::SETOLE:
case ISD::SETUGT:
@@ -3053,7 +3056,7 @@ static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
// 1 | 0 | 0 | X == Y
// 1 | 1 | 1 | unordered
switch (SetCCOpcode) {
- // clang-format off
+ // clang-format off
default: llvm_unreachable("Condcode should be pre-legalized away");
case ISD::SETUEQ:
case ISD::SETEQ: return X86::COND_E;
@@ -3075,7 +3078,7 @@ static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
case ISD::SETO: return X86::COND_NP;
case ISD::SETOEQ:
case ISD::SETUNE: return X86::COND_INVALID;
- // clang-format on
+ // clang-format on
}
}
@@ -3110,7 +3113,7 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.flags = MachineMemOperand::MONone;
Info.offset = 0;
- const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
+ const IntrinsicData *IntrData = getIntrinsicWithChain(Intrinsic);
if (!IntrData) {
switch (Intrinsic) {
case Intrinsic::x86_aesenc128kl:
@@ -3203,7 +3206,7 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case TRUNCATE_TO_MEM_VI32: {
Info.opc = ISD::INTRINSIC_VOID;
Info.ptrVal = I.getArgOperand(0);
- MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
+ MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
ScalarVT = MVT::i8;
@@ -3223,8 +3226,8 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = nullptr;
MVT DataVT = MVT::getVT(I.getType());
MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
- unsigned NumElts = std::min(DataVT.getVectorNumElements(),
- IndexVT.getVectorNumElements());
+ unsigned NumElts =
+ std::min(DataVT.getVectorNumElements(), IndexVT.getVectorNumElements());
Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
Info.align = Align(1);
Info.flags |= MachineMemOperand::MOLoad;
@@ -3235,8 +3238,8 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = nullptr;
MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType());
MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
- unsigned NumElts = std::min(DataVT.getVectorNumElements(),
- IndexVT.getVectorNumElements());
+ unsigned NumElts =
+ std::min(DataVT.getVectorNumElements(), IndexVT.getVectorNumElements());
Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
Info.align = Align(1);
Info.flags |= MachineMemOperand::MOStore;
@@ -3395,8 +3398,9 @@ bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
// Mask vectors support all subregister combinations and operations that
// extract half of vector.
if (ResVT.getVectorElementType() == MVT::i1)
- return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) &&
- (Index == ResVT.getVectorNumElements()));
+ return Index == 0 ||
+ ((ResVT.getSizeInBits() == SrcVT.getSizeInBits() * 2) &&
+ (Index == ResVT.getVectorNumElements()));
return (Index % ResVT.getVectorNumElements()) == 0;
}
@@ -3456,9 +3460,9 @@ bool X86TargetLowering::isScalarFPTypeInSSEReg(EVT VT) const {
(VT == MVT::f32 && Subtarget.hasSSE1()) || VT == MVT::f16;
}
-bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
- const SelectionDAG &DAG,
- const MachineMemOperand &MMO) const {
+bool X86TargetLowering::isLoadBitCastBeneficial(
+ EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG,
+ const MachineMemOperand &MMO) const {
if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() &&
BitcastVT.getVectorElementType() == MVT::i1)
return false;
@@ -3467,8 +3471,8 @@ bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
return false;
// If both types are legal vectors, it's always ok to convert them.
- if (LoadVT.isVector() && BitcastVT.isVector() &&
- isTypeLegal(LoadVT) && isTypeLegal(BitcastVT))
+ if (LoadVT.isVector() && BitcastVT.isVector() && isTypeLegal(LoadVT) &&
+ isTypeLegal(BitcastVT))
return true;
// If we have a large vector type (even if illegal), don't bitcast to large
@@ -3498,9 +3502,7 @@ bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
return true;
}
-bool X86TargetLowering::isCtlzFast() const {
- return Subtarget.hasFastLZCNT();
-}
+bool X86TargetLowering::isCtlzFast() const { return Subtarget.hasFastLZCNT(); }
bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
const Instruction &AndI) const {
@@ -3929,8 +3931,7 @@ static bool canWidenShuffleElements(ArrayRef<int> Mask,
return true;
}
-static bool canWidenShuffleElements(ArrayRef<int> Mask,
- const APInt &Zeroable,
+static bool canWidenShuffleElements(ArrayRef<int> Mask, const APInt &Zeroable,
bool V2IsZero,
SmallVectorImpl<int> &WidenedMask) {
// Create an alternative mask with info about zeroable elements.
@@ -4014,7 +4015,7 @@ bool X86::isZeroNode(SDValue Elt) {
static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
const SDLoc &dl, bool IsMask = false) {
- SmallVector<SDValue, 32> Ops;
+ SmallVector<SDValue, 32> Ops;
bool Split = false;
MVT ConstVecVT = VT;
@@ -4028,12 +4029,12 @@ static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
MVT EltVT = ConstVecVT.getVectorElementType();
for (unsigned i = 0; i < NumElts; ++i) {
bool IsUndef = Values[i] < 0 && IsMask;
- SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
- DAG.getConstant(Values[i], dl, EltVT);
+ SDValue OpNode =
+ IsUndef ? DAG.getUNDEF(EltVT) : DAG.getConstant(Values[i], dl, EltVT);
Ops.push_back(OpNode);
if (Split)
- Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) :
- DAG.getConstant(0, dl, EltVT));
+ Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT)
+ : DAG.getConstant(0, dl, EltVT));
}
SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
if (Split)
@@ -4041,8 +4042,8 @@ static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
return ConstsNode;
}
-static SDValue getConstVector(ArrayRef<APInt> Bits, const APInt &Undefs,
- MVT VT, SelectionDAG &DAG, const SDLoc &dl) {
+static SDValue getConstVector(ArrayRef<APInt> Bits, const APInt &Undefs, MVT VT,
+ SelectionDAG &DAG, const SDLoc &dl) {
assert(Bits.size() == Undefs.getBitWidth() &&
"Unequal constant and undef arrays");
SmallVector<SDValue, 32> Ops;
@@ -4077,8 +4078,8 @@ static SDValue getConstVector(ArrayRef<APInt> Bits, const APInt &Undefs,
return DAG.getBitcast(VT, ConstsNode);
}
-static SDValue getConstVector(ArrayRef<APInt> Bits, MVT VT,
- SelectionDAG &DAG, const SDLoc &dl) {
+static SDValue getConstVector(ArrayRef<APInt> Bits, MVT VT, SelectionDAG &DAG,
+ const SDLoc &dl) {
APInt Undefs = APInt::getZero(Bits.size());
return getConstVector(Bits, Undefs, VT, DAG, dl);
}
@@ -4615,8 +4616,7 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
if (IdxVal == 0 && ISD::isBuildVectorAllZeros(Vec.getNode())) {
// May need to promote to a legal type.
Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
- DAG.getConstant(0, dl, WideOpVT),
- SubVec, Idx);
+ DAG.getConstant(0, dl, WideOpVT), SubVec, Idx);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
}
@@ -4631,20 +4631,18 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
if (IdxVal == 0) {
// Zero lower bits of the Vec
SDValue ShiftBits = DAG.getTargetConstant(SubVecNumElems, dl, MVT::i8);
- Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec,
- ZeroIdx);
+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
// Merge them together, SubVec should be zero extended.
SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
- DAG.getConstant(0, dl, WideOpVT),
- SubVec, ZeroIdx);
+ DAG.getConstant(0, dl, WideOpVT), SubVec, ZeroIdx);
Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
}
- SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
- Undef, SubVec, ZeroIdx);
+ SubVec =
+ DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, SubVec, ZeroIdx);
if (Vec.isUndef()) {
assert(IdxVal != 0 && "Unexpected index");
@@ -4682,12 +4680,11 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
// isel to optimize when bits are known zero.
Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVecVT, Vec, ZeroIdx);
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
- DAG.getConstant(0, dl, WideOpVT),
- Vec, ZeroIdx);
+ DAG.getConstant(0, dl, WideOpVT), Vec, ZeroIdx);
} else {
// Otherwise use explicit shifts to zero the bits.
- Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
- Undef, Vec, ZeroIdx);
+ Vec =
+ DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
NumElems = WideOpVT.getVectorNumElements();
SDValue ShiftBits = DAG.getTargetConstant(NumElems - IdxVal, dl, MVT::i8);
Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
@@ -4740,9 +4737,9 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
// Isolate the bits after the last inserted bit.
unsigned HighShift = IdxVal + SubVecNumElems;
SDValue High = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
- DAG.getTargetConstant(HighShift, dl, MVT::i8));
+ DAG.getTargetConstant(HighShift, dl, MVT::i8));
High = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, High,
- DAG.getTargetConstant(HighShift, dl, MVT::i8));
+ DAG.getTargetConstant(HighShift, dl, MVT::i8));
// Now OR all 3 pieces together.
Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Low, High);
@@ -4823,8 +4820,8 @@ static SDValue getBitSelect(const SDLoc &DL, MVT VT, SDValue LHS, SDValue RHS,
return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
}
-void llvm::createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask,
- bool Lo, bool Unary) {
+void llvm::createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo,
+ bool Unary) {
assert(VT.getScalarType().isSimple() && (VT.getSizeInBits() % 128) == 0 &&
"Illegal vector type to unpack");
assert(Mask.empty() && "Expected an empty shuffle mask vector");
@@ -4961,13 +4958,12 @@ static SDValue getPack(SelectionDAG &DAG, const X86Subtarget &Subtarget,
/// This produces a shuffle where the low element of V2 is swizzled into the
/// zero/undef vector, landing at element Idx.
/// This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
-static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx,
- bool IsZero,
+static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx, bool IsZero,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = V2.getSimpleValueType();
- SDValue V1 = IsZero
- ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);
+ SDValue V1 =
+ IsZero ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);
int NumElems = VT.getVectorNumElements();
SmallVector<int, 16> MaskVec(NumElems);
for (int i = 0; i != NumElems; ++i)
@@ -5886,9 +5882,9 @@ static bool getTargetShuffleMask(SDValue N, bool AllowSentinelZero,
/// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle
/// as many lanes with this technique as possible to simplify the remaining
/// shuffle.
-static void computeZeroableShuffleElements(ArrayRef<int> Mask,
- SDValue V1, SDValue V2,
- APInt &KnownUndef, APInt &KnownZero) {
+static void computeZeroableShuffleElements(ArrayRef<int> Mask, SDValue V1,
+ SDValue V2, APInt &KnownUndef,
+ APInt &KnownZero) {
int Size = Mask.size();
KnownUndef = KnownZero = APInt::getZero(Size);
@@ -6074,7 +6070,7 @@ static bool getTargetShuffleAndZeroables(SDValue N, SmallVectorImpl<int> &Mask,
static void resolveTargetShuffleFromZeroables(SmallVectorImpl<int> &Mask,
const APInt &KnownUndef,
const APInt &KnownZero,
- bool ResolveKnownZeros= true) {
+ bool ResolveKnownZeros = true) {
unsigned NumElts = Mask.size();
assert(KnownUndef.getBitWidth() == NumElts &&
KnownZero.getBitWidth() == NumElts && "Shuffle mask size mismatch");
@@ -7044,8 +7040,8 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, const SDLoc &DL,
MVT EltVT = VT.getVectorElementType();
// Create a new build vector with the first 2 elements followed by undef
// padding, bitcast to v2f64, duplicate, and bitcast back.
- SDValue Ops[4] = { Op.getOperand(0), Op.getOperand(1),
- DAG.getUNDEF(EltVT), DAG.getUNDEF(EltVT) };
+ SDValue Ops[4] = {Op.getOperand(0), Op.getOperand(1), DAG.getUNDEF(EltVT),
+ DAG.getUNDEF(EltVT)};
SDValue NewBV = DAG.getBitcast(MVT::v2f64, DAG.getBuildVector(VT, DL, Ops));
SDValue Dup = DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, NewBV);
return DAG.getBitcast(VT, Dup);
@@ -7093,7 +7089,7 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, const SDLoc &DL,
for (EltIdx = 0; EltIdx < 4; ++EltIdx) {
if (Zeroable[EltIdx]) {
// The zero vector will be on the right hand side.
- Mask[EltIdx] = EltIdx+4;
+ Mask[EltIdx] = EltIdx + 4;
continue;
}
@@ -7307,7 +7303,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
APInt ZeroMask = APInt::getZero(NumElems);
APInt UndefMask = APInt::getZero(NumElems);
- SmallVector<LoadSDNode*, 8> Loads(NumElems, nullptr);
+ SmallVector<LoadSDNode *, 8> Loads(NumElems, nullptr);
SmallVector<int64_t, 8> ByteOffsets(NumElems, 0);
// For each element in the initializer, see if we've found a load, zero or an
@@ -7402,8 +7398,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
auto CreateLoad = [&DAG, &DL, &Loads](EVT VT, LoadSDNode *LDBase) {
auto MMOFlags = LDBase->getMemOperand()->getFlags();
- assert(LDBase->isSimple() &&
- "Cannot merge volatile or atomic loads.");
+ assert(LDBase->isSimple() && "Cannot merge volatile or atomic loads.");
SDValue NewLd =
DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
LDBase->getPointerInfo(), LDBase->getBaseAlign(), MMOFlags);
@@ -7491,7 +7486,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
VecVT = MVT::v4f32;
if (TLI.isTypeLegal(VecVT)) {
SDVTList Tys = DAG.getVTList(VecVT, MVT::Other);
- SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
+ SDValue Ops[] = {LDBase->getChain(), LDBase->getBasePtr()};
SDValue ResNode = DAG.getMemIntrinsicNode(
X86ISD::VZEXT_LOAD, DL, Tys, Ops, VecSVT, LDBase->getPointerInfo(),
LDBase->getBaseAlign(), MachineMemOperand::MOLoad);
@@ -8110,9 +8105,9 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, const SDLoc &dl,
return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Select, Select);
} else {
MVT ImmVT = MVT::getIntegerVT(std::max((unsigned)VT.getSizeInBits(), 8U));
- SDValue Select = DAG.getSelect(dl, ImmVT, Cond,
- DAG.getAllOnesConstant(dl, ImmVT),
- DAG.getConstant(0, dl, ImmVT));
+ SDValue Select =
+ DAG.getSelect(dl, ImmVT, Cond, DAG.getAllOnesConstant(dl, ImmVT),
+ DAG.getConstant(0, dl, ImmVT));
MVT VecVT = VT.getSizeInBits() >= 8 ? VT : MVT::v8i1;
Select = DAG.getBitcast(VecVT, Select);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Select,
@@ -8224,10 +8219,10 @@ static bool isHorizontalBinOpPart(const BuildVectorSDNode *N, unsigned Opcode,
// Try to match the following pattern:
// (BINOP (extract_vector_elt A, I), (extract_vector_elt A, I+1))
CanFold = (Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- Op0.getOperand(0) == Op1.getOperand(0) &&
- isa<ConstantSDNode>(Op0.getOperand(1)) &&
- isa<ConstantSDNode>(Op1.getOperand(1)));
+ Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Op0.getOperand(0) == Op1.getOperand(0) &&
+ isa<ConstantSDNode>(Op0.getOperand(1)) &&
+ isa<ConstantSDNode>(Op1.getOperand(1)));
if (!CanFold)
break;
@@ -8307,9 +8302,9 @@ static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
unsigned NumElts = VT.getVectorNumElements();
SDValue V0_LO = extract128BitVector(V0, 0, DAG, DL);
- SDValue V0_HI = extract128BitVector(V0, NumElts/2, DAG, DL);
+ SDValue V0_HI = extract128BitVector(V0, NumElts / 2, DAG, DL);
SDValue V1_LO = extract128BitVector(V1, 0, DAG, DL);
- SDValue V1_HI = extract128BitVector(V1, NumElts/2, DAG, DL);
+ SDValue V1_HI = extract128BitVector(V1, NumElts / 2, DAG, DL);
MVT NewVT = V0_LO.getSimpleValueType();
SDValue LO = DAG.getUNDEF(NewVT);
@@ -8419,8 +8414,8 @@ static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
// Ensure we have found an opcode for both parities and that they are
// different. Don't try to fold this build_vector into an ADDSUB/SUBADD if the
// inputs are undef.
- if (!Opc[0] || !Opc[1] || Opc[0] == Opc[1] ||
- InVec0.isUndef() || InVec1.isUndef())
+ if (!Opc[0] || !Opc[1] || Opc[0] == Opc[1] || InVec0.isUndef() ||
+ InVec1.isUndef())
return false;
IsSubAdd = Opc[0] == ISD::FADD;
@@ -8433,7 +8428,8 @@ static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
/// Returns true if is possible to fold MUL and an idiom that has already been
/// recognized as ADDSUB/SUBADD(\p Opnd0, \p Opnd1) into
/// FMADDSUB/FMSUBADD(x, y, \p Opnd1). If (and only if) true is returned, the
-/// operands of FMADDSUB/FMSUBADD are written to parameters \p Opnd0, \p Opnd1, \p Opnd2.
+/// operands of FMADDSUB/FMSUBADD are written to parameters \p Opnd0, \p Opnd1,
+/// \p Opnd2.
///
/// Prior to calling this function it should be known that there is some
/// SDNode that potentially can be replaced with an X86ISD::ADDSUB operation
@@ -8515,8 +8511,8 @@ static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV,
if (VT.is512BitVector()) {
SmallVector<int> Mask;
for (int I = 0, E = VT.getVectorNumElements(); I != E; I += 2) {
- Mask.push_back(I);
- Mask.push_back(I + E + 1);
+ Mask.push_back(I);
+ Mask.push_back(I + E + 1);
}
SDValue Sub = DAG.getNode(ISD::FSUB, DL, VT, Opnd0, Opnd1);
SDValue Add = DAG.getNode(ISD::FADD, DL, VT, Opnd0, Opnd1);
@@ -8557,13 +8553,13 @@ static bool isHopBuildVector(const BuildVectorSDNode *BV, SelectionDAG &DAG,
if (HOpcode == ISD::DELETED_NODE) {
GenericOpcode = Op.getOpcode();
switch (GenericOpcode) {
- // clang-format off
+ // clang-format off
case ISD::ADD: HOpcode = X86ISD::HADD; break;
case ISD::SUB: HOpcode = X86ISD::HSUB; break;
case ISD::FADD: HOpcode = X86ISD::FHADD; break;
case ISD::FSUB: HOpcode = X86ISD::FHSUB; break;
default: return false;
- // clang-format on
+ // clang-format on
}
}
@@ -8593,8 +8589,7 @@ static bool isHopBuildVector(const BuildVectorSDNode *BV, SelectionDAG &DAG,
// op (extract_vector_elt A, I), (extract_vector_elt A, I+1)
unsigned ExtIndex0 = Op0.getConstantOperandVal(1);
unsigned ExtIndex1 = Op1.getConstantOperandVal(1);
- unsigned ExpectedIndex = i * NumEltsIn128Bits +
- (j % NumEltsIn64Bits) * 2;
+ unsigned ExpectedIndex = i * NumEltsIn128Bits + (j % NumEltsIn64Bits) * 2;
if (ExpectedIndex == ExtIndex0 && ExtIndex1 == ExtIndex0 + 1)
continue;
@@ -9294,8 +9289,8 @@ LowerBUILD_VECTORAsVariablePermute(SDValue V, const SDLoc &DL,
return createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget);
}
-SDValue
-X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerBUILD_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
@@ -9521,14 +9516,13 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
}
// Is it a vector logical left shift?
- if (NumElems == 2 && Idx == 1 &&
- X86::isZeroNode(Op.getOperand(0)) &&
+ if (NumElems == 2 && Idx == 1 && X86::isZeroNode(Op.getOperand(0)) &&
!X86::isZeroNode(Op.getOperand(1))) {
unsigned NumBits = VT.getSizeInBits();
- return getVShift(true, VT,
- DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
- VT, Op.getOperand(1)),
- NumBits/2, DAG, *this, dl);
+ return getVShift(
+ true, VT,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(1)),
+ NumBits / 2, DAG, *this, dl);
}
if (IsAllConstants) // Otherwise, it's better to do a constpool load.
@@ -9541,7 +9535,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// place.
if (EVTBits == 32) {
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
- return getShuffleVectorZeroOrUndef(Item, Idx, NumZero > 0, Subtarget, DAG);
+ return getShuffleVectorZeroOrUndef(Item, Idx, NumZero > 0, Subtarget,
+ DAG);
}
}
@@ -9580,8 +9575,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// build_vector and broadcast it.
// TODO: We could probably generalize this more.
if (Subtarget.hasAVX2() && EVTBits == 32 && Values.size() == 2) {
- SDValue Ops[4] = { Op.getOperand(0), Op.getOperand(1),
- DAG.getUNDEF(EltVT), DAG.getUNDEF(EltVT) };
+ SDValue Ops[4] = {Op.getOperand(0), Op.getOperand(1), DAG.getUNDEF(EltVT),
+ DAG.getUNDEF(EltVT)};
auto CanSplat = [](SDValue Op, unsigned NumElems, ArrayRef<SDValue> Ops) {
// Make sure all the even/odd operands match.
for (unsigned i = 2; i != NumElems; ++i)
@@ -9597,8 +9592,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DAG.getBuildVector(NarrowVT, dl, Ops));
// Broadcast from v2i64/v2f64 and cast to final VT.
MVT BcastVT = MVT::getVectorVT(WideEltVT, NumElems / 2);
- return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, dl, BcastVT,
- NewBV));
+ return DAG.getBitcast(
+ VT, DAG.getNode(X86ISD::VBROADCAST, dl, BcastVT, NewBV));
}
}
@@ -9611,7 +9606,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
SDValue Lower =
DAG.getBuildVector(HVT, dl, Op->ops().slice(0, NumElems / 2));
SDValue Upper = DAG.getBuildVector(
- HVT, dl, Op->ops().slice(NumElems / 2, NumElems /2));
+ HVT, dl, Op->ops().slice(NumElems / 2, NumElems / 2));
// Recreate the wider vector with the lower and upper part.
return concatSubVectors(Lower, Upper, DAG, dl);
@@ -9622,8 +9617,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (NumNonZero == 1) {
// One half is zero or undef.
unsigned Idx = NonZeroMask.countr_zero();
- SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
- Op.getOperand(Idx));
+ SDValue V2 =
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(Idx));
return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG);
}
return SDValue();
@@ -9658,30 +9653,28 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
for (unsigned i = 0; i < 2; ++i) {
switch (NonZeroMask.extractBitsAsZExtValue(2, i * 2)) {
- default: llvm_unreachable("Unexpected NonZero count");
- case 0:
- Ops[i] = Ops[i*2]; // Must be a zero vector.
- break;
- case 1:
- Ops[i] = getMOVL(DAG, dl, VT, Ops[i*2+1], Ops[i*2]);
- break;
- case 2:
- Ops[i] = getMOVL(DAG, dl, VT, Ops[i*2], Ops[i*2+1]);
- break;
- case 3:
- Ops[i] = getUnpackl(DAG, dl, VT, Ops[i*2], Ops[i*2+1]);
- break;
+ default:
+ llvm_unreachable("Unexpected NonZero count");
+ case 0:
+ Ops[i] = Ops[i * 2]; // Must be a zero vector.
+ break;
+ case 1:
+ Ops[i] = getMOVL(DAG, dl, VT, Ops[i * 2 + 1], Ops[i * 2]);
+ break;
+ case 2:
+ Ops[i] = getMOVL(DAG, dl, VT, Ops[i * 2], Ops[i * 2 + 1]);
+ break;
+ case 3:
+ Ops[i] = getUnpackl(DAG, dl, VT, Ops[i * 2], Ops[i * 2 + 1]);
+ break;
}
}
bool Reverse1 = NonZeroMask.extractBitsAsZExtValue(2, 0) == 2;
bool Reverse2 = NonZeroMask.extractBitsAsZExtValue(2, 2) == 2;
- int MaskVec[] = {
- Reverse1 ? 1 : 0,
- Reverse1 ? 0 : 1,
- static_cast<int>(Reverse2 ? NumElems+1 : NumElems),
- static_cast<int>(Reverse2 ? NumElems : NumElems+1)
- };
+ int MaskVec[] = {Reverse1 ? 1 : 0, Reverse1 ? 0 : 1,
+ static_cast<int>(Reverse2 ? NumElems + 1 : NumElems),
+ static_cast<int>(Reverse2 ? NumElems : NumElems + 1)};
return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], MaskVec);
}
@@ -9700,7 +9693,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
Result = DAG.getUNDEF(VT);
for (unsigned i = 1; i < NumElems; ++i) {
- if (Op.getOperand(i).isUndef()) continue;
+ if (Op.getOperand(i).isUndef())
+ continue;
Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result,
Op.getOperand(i), DAG.getVectorIdxConstant(i, dl));
}
@@ -9725,14 +9719,14 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
for (unsigned Scale = 1; Scale < NumElems; Scale *= 2) {
// Generate scaled UNPCKL shuffle mask.
SmallVector<int, 16> Mask;
- for(unsigned i = 0; i != Scale; ++i)
+ for (unsigned i = 0; i != Scale; ++i)
Mask.push_back(i);
for (unsigned i = 0; i != Scale; ++i)
- Mask.push_back(NumElems+i);
+ Mask.push_back(NumElems + i);
Mask.append(NumElems - Mask.size(), SM_SentinelUndef);
for (unsigned i = 0, e = NumElems / (2 * Scale); i != e; ++i)
- Ops[i] = DAG.getVectorShuffle(VT, dl, Ops[2*i], Ops[(2*i)+1], Mask);
+ Ops[i] = DAG.getVectorShuffle(VT, dl, Ops[2 * i], Ops[(2 * i) + 1], Mask);
}
return Ops[0];
}
@@ -9758,15 +9752,14 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, const SDLoc &dl,
if (SubVec.isUndef())
continue;
if (ISD::isFreezeUndef(SubVec.getNode())) {
- // If the freeze(undef) has multiple uses then we must fold to zero.
- if (SubVec.hasOneUse()) {
- ++NumFreezeUndef;
- } else {
- ++NumZero;
- Undefs.insert(SubVec);
- }
- }
- else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
+ // If the freeze(undef) has multiple uses then we must fold to zero.
+ if (SubVec.hasOneUse()) {
+ ++NumFreezeUndef;
+ } else {
+ ++NumZero;
+ Undefs.insert(SubVec);
+ }
+ } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
++NumZero;
else {
assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
@@ -9780,9 +9773,9 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, const SDLoc &dl,
MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
ArrayRef<SDUse> Ops = Op->ops();
SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT,
- Ops.slice(0, NumOperands/2));
+ Ops.slice(0, NumOperands / 2));
SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT,
- Ops.slice(NumOperands/2));
+ Ops.slice(NumOperands / 2));
return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
}
@@ -9815,7 +9808,7 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, const SDLoc &dl,
// TODO: Merge this with LowerAVXCONCAT_VECTORS?
static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, const SDLoc &dl,
const X86Subtarget &Subtarget,
- SelectionDAG & DAG) {
+ SelectionDAG &DAG) {
MVT ResVT = Op.getSimpleValueType();
unsigned NumOperands = Op.getNumOperands();
assert(NumOperands > 1 && isPowerOf2_32(NumOperands) &&
@@ -9886,8 +9879,7 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, const SDLoc &dl,
DAG.getVectorIdxConstant(NumElems / 2, dl));
}
-static SDValue LowerCONCAT_VECTORS(SDValue Op,
- const X86Subtarget &Subtarget,
+static SDValue LowerCONCAT_VECTORS(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
@@ -10109,8 +10101,8 @@ static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
// Ok, handle the in-lane shuffles by detecting if and when they repeat.
// Adjust second vector indices to start at LaneSize instead of Size.
- int LocalM = Mask[i] < Size ? Mask[i] % LaneSize
- : Mask[i] % LaneSize + LaneSize;
+ int LocalM =
+ Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
if (RepeatedMask[i % LaneSize] < 0)
// This is the first non-undef entry in this slot of a 128-bit lane.
RepeatedMask[i % LaneSize] = LocalM;
@@ -10128,8 +10120,7 @@ is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
return isRepeatedShuffleMask(128, VT, Mask, RepeatedMask);
}
-static bool
-is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask) {
+static bool is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask) {
SmallVector<int, 32> RepeatedMask;
return isRepeatedShuffleMask(128, VT, Mask, RepeatedMask);
}
@@ -10428,8 +10419,8 @@ static SDValue getSHUFPDImmForMask(ArrayRef<int> Mask, const SDLoc &DL,
//
// The function looks for a sub-mask that the nonzero elements are in
// increasing order. If such sub-mask exist. The function returns true.
-static bool isNonZeroElementsInOrder(const APInt &Zeroable,
- ArrayRef<int> Mask, const EVT &VectorType,
+static bool isNonZeroElementsInOrder(const APInt &Zeroable, ArrayRef<int> Mask,
+ const EVT &VectorType,
bool &IsZeroSideLeft) {
int NextElement = -1;
// Check if the Mask's nonzero elements are in increasing order.
@@ -11209,7 +11200,7 @@ static bool matchShuffleAsBlend(MVT VT, SDValue V1, SDValue V2,
if (M == SM_SentinelUndef)
continue;
if (M == Elt || (0 <= M && M < NumElts &&
- IsElementEquivalent(NumElts, V1, V1, M, Elt))) {
+ IsElementEquivalent(NumElts, V1, V1, M, Elt))) {
Mask[Elt] = Elt;
LaneV1InUse = true;
continue;
@@ -11342,8 +11333,7 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
// If we have VPTERNLOG, we can use that as a bit blend.
if (Subtarget.hasVLX())
- if (SDValue BitBlend =
- lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG))
+ if (SDValue BitBlend = lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG))
return BitBlend;
// Scale the blend by the number of bytes per element.
@@ -11651,9 +11641,11 @@ static SDValue lowerShuffleAsPermuteAndUnpack(const SDLoc &DL, MVT VT,
/// Helper to form a PALIGNR-based rotate+permute, merging 2 inputs and then
/// permuting the elements of the result in place.
-static SDValue lowerShuffleAsByteRotateAndPermute(
- const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
- const X86Subtarget &Subtarget, SelectionDAG &DAG) {
+static SDValue lowerShuffleAsByteRotateAndPermute(const SDLoc &DL, MVT VT,
+ SDValue V1, SDValue V2,
+ ArrayRef<int> Mask,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
if ((VT.is128BitVector() && !Subtarget.hasSSSE3()) ||
(VT.is256BitVector() && !Subtarget.hasAVX2()) ||
(VT.is512BitVector() && !Subtarget.hasBWI()))
@@ -11851,9 +11843,9 @@ static SDValue lowerShuffleAsDecomposedShuffleMerge(
// If either input vector provides only a single element which is repeated
// multiple times, unpacking from both input vectors would generate worse
// code. e.g. for
- // t5: v16i8 = vector_shuffle<16,0,16,1,16,2,16,3,16,4,16,5,16,6,16,7> t2, t4
- // it is better to process t4 first to create a vector of t4[0], then unpack
- // that vector with t2.
+ // t5: v16i8 = vector_shuffle<16,0,16,1,16,2,16,3,16,4,16,5,16,6,16,7> t2,
+ // t4 it is better to process t4 first to create a vector of t4[0], then
+ // unpack that vector with t2.
if (!V1Zero && !V2Zero && !isSingleElementRepeatedMask(V1Mask) &&
!isSingleElementRepeatedMask(V2Mask))
if (SDValue UnpackPerm =
@@ -11865,8 +11857,8 @@ static SDValue lowerShuffleAsDecomposedShuffleMerge(
return RotatePerm;
// Unpack/rotate failed - try again with variable blends.
- if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask,
- DAG))
+ if (SDValue BlendPerm =
+ lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask, DAG))
return BlendPerm;
if (VT.getScalarSizeInBits() >= 32)
@@ -11980,7 +11972,7 @@ static int matchShuffleAsElementRotate(SDValue &V1, SDValue &V2,
SDValue Lo, Hi;
for (int i = 0; i < NumElts; ++i) {
int M = Mask[i];
- assert((M == SM_SentinelUndef || (0 <= M && M < (2*NumElts))) &&
+ assert((M == SM_SentinelUndef || (0 <= M && M < (2 * NumElts))) &&
"Unexpected mask index.");
if (M < 0)
continue;
@@ -12102,8 +12094,7 @@ static SDValue lowerShuffleAsByteRotate(const SDLoc &DL, MVT VT, SDValue V1,
"Rotate-based lowering only supports 128-bit lowering!");
assert(Mask.size() <= 16 &&
"Can shuffle at most 16 bytes in a 128-bit vector!");
- assert(ByteVT == MVT::v16i8 &&
- "SSE2 rotate lowering only needed for v16i8!");
+ assert(ByteVT == MVT::v16i8 && "SSE2 rotate lowering only needed for v16i8!");
// Default SSE2 implementation
int LoByteShift = 16 - ByteRotation;
@@ -12138,8 +12129,9 @@ static SDValue lowerShuffleAsVALIGN(const SDLoc &DL, MVT VT, SDValue V1,
"Only 32-bit and 64-bit elements are supported!");
// 128/256-bit vectors are only supported with VLX.
- assert((Subtarget.hasVLX() || (!VT.is128BitVector() && !VT.is256BitVector()))
- && "VLX required for 128/256-bit vectors");
+ assert(
+ (Subtarget.hasVLX() || (!VT.is128BitVector() && !VT.is256BitVector())) &&
+ "VLX required for 128/256-bit vectors");
SDValue Lo = V1, Hi = V2;
int Rotation = matchShuffleAsElementRotate(Lo, Hi, Mask);
@@ -12691,8 +12683,7 @@ static SDValue lowerShuffleAsSpecificExtension(const SDLoc &DL, MVT VT,
/// are both incredibly common and often quite performance sensitive.
static SDValue lowerShuffleAsZeroOrAnyExtend(
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
- const APInt &Zeroable, const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+ const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) {
int Bits = VT.getSizeInBits();
int NumLanes = Bits / 128;
int NumElements = VT.getVectorNumElements();
@@ -12818,7 +12809,8 @@ static SDValue getScalarValueForVectorElement(SDValue V, int Idx,
// If the bitcasts shift the element size, we can't extract an equivalent
// element from it.
MVT NewVT = V.getSimpleValueType();
- if (!NewVT.isVector() || NewVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
+ if (!NewVT.isVector() ||
+ NewVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
return SDValue();
if (V.getOpcode() == ISD::BUILD_VECTOR ||
@@ -12842,7 +12834,7 @@ static bool isShuffleFoldableLoad(SDValue V) {
ISD::isNON_EXTLoad(peekThroughOneUseBitcasts(V).getNode());
}
-template<typename T>
+template <typename T>
static bool isSoftF16(T VT, const X86Subtarget &Subtarget) {
T EltVT = VT.getScalarType();
return (EltVT == MVT::bf16 && !Subtarget.hasAVX10_2()) ||
@@ -12855,8 +12847,7 @@ static bool isSoftF16(T VT, const X86Subtarget &Subtarget) {
/// across all subtarget feature sets.
static SDValue lowerShuffleAsElementInsertion(
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
- const APInt &Zeroable, const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+ const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) {
MVT ExtVT = VT;
MVT EltVT = VT.getVectorElementType();
unsigned NumElts = VT.getVectorNumElements();
@@ -12889,8 +12880,8 @@ static SDValue lowerShuffleAsElementInsertion(
// all the smarts here sunk into that routine. However, the current
// lowering of BUILD_VECTOR makes that nearly impossible until the old
// vector shuffle lowering is dead.
- SDValue V2S = getScalarValueForVectorElement(V2, Mask[V2Index] - Mask.size(),
- DAG);
+ SDValue V2S =
+ getScalarValueForVectorElement(V2, Mask[V2Index] - Mask.size(), DAG);
if (V2S && DAG.getTargetLoweringInfo().isTypeLegal(V2S.getValueType())) {
// We need to zext the scalar if it is smaller than an i32.
V2S = DAG.getBitcast(EltVT, V2S);
@@ -13093,8 +13084,8 @@ static SDValue lowerShuffleOfExtractsAsVperm(const SDLoc &DL, SDValue N0,
// Check that both sources are extracts of the same source vector.
if (N0.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
N1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
- N0.getOperand(0) != N1.getOperand(0) ||
- !N0.hasOneUse() || !N1.hasOneUse())
+ N0.getOperand(0) != N1.getOperand(0) || !N0.hasOneUse() ||
+ !N1.hasOneUse())
return SDValue();
SDValue WideVec = N0.getOperand(0);
@@ -13124,8 +13115,8 @@ static SDValue lowerShuffleOfExtractsAsVperm(const SDLoc &DL, SDValue N0,
NewMask.append(NumElts, -1);
// shuf (extract X, 0), (extract X, 4), M --> extract (shuf X, undef, M'), 0
- SDValue Shuf = DAG.getVectorShuffle(WideVT, DL, WideVec, DAG.getUNDEF(WideVT),
- NewMask);
+ SDValue Shuf =
+ DAG.getVectorShuffle(WideVT, DL, WideVec, DAG.getUNDEF(WideVT), NewMask);
// This is free: ymm -> xmm.
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuf,
DAG.getVectorIdxConstant(0, DL));
@@ -13324,8 +13315,8 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1,
if (!V.getValueType().isVector()) {
assert(V.getScalarValueSizeInBits() == NumEltBits &&
"Unexpected scalar size");
- MVT BroadcastVT = MVT::getVectorVT(V.getSimpleValueType(),
- VT.getVectorNumElements());
+ MVT BroadcastVT =
+ MVT::getVectorVT(V.getSimpleValueType(), VT.getVectorNumElements());
return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, BroadcastVT, V));
}
@@ -13350,8 +13341,8 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1,
// elements are zeroable.
static bool matchShuffleAsInsertPS(SDValue &V1, SDValue &V2,
unsigned &InsertPSMask,
- const APInt &Zeroable,
- ArrayRef<int> Mask, SelectionDAG &DAG) {
+ const APInt &Zeroable, ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
assert(V1.getSimpleValueType().is128BitVector() && "Bad operand type!");
assert(V2.getSimpleValueType().is128BitVector() && "Bad operand type!");
assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
@@ -13803,8 +13794,8 @@ static SDValue lowerV4F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// when the V2 input is targeting element 0 of the mask -- that is the fast
// case here.
if (NumV2Elements == 1 && Mask[0] >= 4)
- if (SDValue V = lowerShuffleAsElementInsertion(
- DL, MVT::v4f32, V1, V2, Mask, Zeroable, Subtarget, DAG))
+ if (SDValue V = lowerShuffleAsElementInsertion(DL, MVT::v4f32, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return V;
if (Subtarget.hasSSE41()) {
@@ -13813,8 +13804,8 @@ static SDValue lowerV4F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return V;
if (!isSingleSHUFPSMask(Mask))
- if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, MVT::v4f32, V1,
- V2, Mask, DAG))
+ if (SDValue BlendPerm =
+ lowerShuffleAsBlendAndPermute(DL, MVT::v4f32, V1, V2, Mask, DAG))
return BlendPerm;
}
@@ -13906,8 +13897,8 @@ static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// There are special ways we can lower some single-element blends.
if (NumV2Elements == 1)
- if (SDValue V = lowerShuffleAsElementInsertion(
- DL, MVT::v4i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
+ if (SDValue V = lowerShuffleAsElementInsertion(DL, MVT::v4i32, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return V;
// We have different paths for blend lowering, but they all must use the
@@ -14037,7 +14028,7 @@ static SDValue lowerV8I16GeneralSingleInputShuffle(
};
if ((NumHToL + NumHToH) == 0 || (NumLToL + NumLToH) == 0) {
- int PSHUFDMask[4] = { -1, -1, -1, -1 };
+ int PSHUFDMask[4] = {-1, -1, -1, -1};
SmallVector<std::pair<int, int>, 4> DWordPairs;
int DOffset = ((NumHToL + NumHToH) == 0 ? 0 : 2);
@@ -14141,7 +14132,8 @@ static SDValue lowerV8I16GeneralSingleInputShuffle(
int OneInput = ThreeAInputs ? BToAInputs[0] : AToAInputs[0];
int TripleInputSum = 0 + 1 + 2 + 3 + (4 * TripleInputOffset);
int TripleNonInputIdx =
- TripleInputSum - std::accumulate(TripleInputs.begin(), TripleInputs.end(), 0);
+ TripleInputSum -
+ std::accumulate(TripleInputs.begin(), TripleInputs.end(), 0);
TripleDWord = TripleNonInputIdx / 2;
// We use xor with one to compute the adjacent DWord to whichever one the
@@ -14219,9 +14211,9 @@ static SDValue lowerV8I16GeneralSingleInputShuffle(
// Adjust the mask to match the new locations of A and B.
for (int &M : Mask)
- if (M >= 0 && M/2 == ADWord)
+ if (M >= 0 && M / 2 == ADWord)
M = 2 * BDWord + M % 2;
- else if (M >= 0 && M/2 == BDWord)
+ else if (M >= 0 && M / 2 == BDWord)
M = 2 * ADWord + M % 2;
// Recurse back into this routine to re-compute state now that this isn't
@@ -14249,33 +14241,33 @@ static SDValue lowerV8I16GeneralSingleInputShuffle(
[&PSHUFDMask](ArrayRef<int> InPlaceInputs, ArrayRef<int> IncomingInputs,
MutableArrayRef<int> SourceHalfMask,
MutableArrayRef<int> HalfMask, int HalfOffset) {
- if (InPlaceInputs.empty())
- return;
- if (InPlaceInputs.size() == 1) {
- SourceHalfMask[InPlaceInputs[0] - HalfOffset] =
- InPlaceInputs[0] - HalfOffset;
- PSHUFDMask[InPlaceInputs[0] / 2] = InPlaceInputs[0] / 2;
- return;
- }
- if (IncomingInputs.empty()) {
- // Just fix all of the in place inputs.
- for (int Input : InPlaceInputs) {
- SourceHalfMask[Input - HalfOffset] = Input - HalfOffset;
- PSHUFDMask[Input / 2] = Input / 2;
- }
- return;
- }
+ if (InPlaceInputs.empty())
+ return;
+ if (InPlaceInputs.size() == 1) {
+ SourceHalfMask[InPlaceInputs[0] - HalfOffset] =
+ InPlaceInputs[0] - HalfOffset;
+ PSHUFDMask[InPlaceInputs[0] / 2] = InPlaceInputs[0] / 2;
+ return;
+ }
+ if (IncomingInputs.empty()) {
+ // Just fix all of the in place inputs.
+ for (int Input : InPlaceInputs) {
+ SourceHalfMask[Input - HalfOffset] = Input - HalfOffset;
+ PSHUFDMask[Input / 2] = Input / 2;
+ }
+ return;
+ }
- assert(InPlaceInputs.size() == 2 && "Cannot handle 3 or 4 inputs!");
- SourceHalfMask[InPlaceInputs[0] - HalfOffset] =
- InPlaceInputs[0] - HalfOffset;
- // Put the second input next to the first so that they are packed into
- // a dword. We find the adjacent index by toggling the low bit.
- int AdjIndex = InPlaceInputs[0] ^ 1;
- SourceHalfMask[AdjIndex - HalfOffset] = InPlaceInputs[1] - HalfOffset;
- llvm::replace(HalfMask, InPlaceInputs[1], AdjIndex);
- PSHUFDMask[AdjIndex / 2] = AdjIndex / 2;
- };
+ assert(InPlaceInputs.size() == 2 && "Cannot handle 3 or 4 inputs!");
+ SourceHalfMask[InPlaceInputs[0] - HalfOffset] =
+ InPlaceInputs[0] - HalfOffset;
+ // Put the second input next to the first so that they are packed into
+ // a dword. We find the adjacent index by toggling the low bit.
+ int AdjIndex = InPlaceInputs[0] ^ 1;
+ SourceHalfMask[AdjIndex - HalfOffset] = InPlaceInputs[1] - HalfOffset;
+ llvm::replace(HalfMask, InPlaceInputs[1], AdjIndex);
+ PSHUFDMask[AdjIndex / 2] = AdjIndex / 2;
+ };
fixInPlaceInputs(LToLInputs, HToLInputs, PSHUFLMask, LoMask, 0);
fixInPlaceInputs(HToHInputs, LToHInputs, PSHUFHMask, HiMask, 4);
@@ -14284,10 +14276,12 @@ static SDValue lowerV8I16GeneralSingleInputShuffle(
// FIXME: This operation could almost certainly be simplified dramatically to
// look more like the 3-1 fixing operation.
auto moveInputsToRightHalf = [&PSHUFDMask](
- MutableArrayRef<int> IncomingInputs, ArrayRef<int> ExistingInputs,
- MutableArrayRef<int> SourceHalfMask, MutableArrayRef<int> HalfMask,
- MutableArrayRef<int> FinalSourceHalfMask, int SourceOffset,
- int DestOffset) {
+ MutableArrayRef<int> IncomingInputs,
+ ArrayRef<int> ExistingInputs,
+ MutableArrayRef<int> SourceHalfMask,
+ MutableArrayRef<int> HalfMask,
+ MutableArrayRef<int> FinalSourceHalfMask,
+ int SourceOffset, int DestOffset) {
auto isWordClobbered = [](ArrayRef<int> SourceHalfMask, int Word) {
return SourceHalfMask[Word] >= 0 && SourceHalfMask[Word] != Word;
};
@@ -14483,9 +14477,11 @@ static SDValue lowerV8I16GeneralSingleInputShuffle(
/// Helper to form a PSHUFB-based shuffle+blend, opportunistically avoiding the
/// blend if only one input is used.
-static SDValue lowerShuffleAsBlendOfPSHUFBs(
- const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
- const APInt &Zeroable, SelectionDAG &DAG, bool &V1InUse, bool &V2InUse) {
+static SDValue lowerShuffleAsBlendOfPSHUFBs(const SDLoc &DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ const APInt &Zeroable,
+ SelectionDAG &DAG, bool &V1InUse,
+ bool &V2InUse) {
assert(!is128BitLaneCrossingShuffleMask(VT, Mask) &&
"Lane crossing shuffle masks not supported");
@@ -14580,8 +14576,8 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Broadcast;
// Try to use bit rotation instructions.
- if (SDValue Rotate = lowerShuffleAsBitRotate(DL, MVT::v8i16, V1, Mask,
- Subtarget, DAG))
+ if (SDValue Rotate =
+ lowerShuffleAsBitRotate(DL, MVT::v8i16, V1, Mask, Subtarget, DAG))
return Rotate;
// Use dedicated unpack instructions for masks that match their pattern.
@@ -14616,14 +14612,14 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// See if we can use SSE4A Extraction / Insertion.
if (Subtarget.hasSSE4A())
- if (SDValue V = lowerShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask,
- Zeroable, DAG))
+ if (SDValue V =
+ lowerShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask, Zeroable, DAG))
return V;
// There are special ways we can lower some single-element blends.
if (NumV2Inputs == 1)
- if (SDValue V = lowerShuffleAsElementInsertion(
- DL, MVT::v8i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
+ if (SDValue V = lowerShuffleAsElementInsertion(DL, MVT::v8i16, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return V;
// We have different paths for blend lowering, but they all must use the
@@ -14739,8 +14735,8 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// can both shuffle and set up the inefficient blend.
if (!IsBlendSupported && Subtarget.hasSSSE3()) {
bool V1InUse, V2InUse;
- return lowerShuffleAsBlendOfPSHUFBs(DL, MVT::v8i16, V1, V2, Mask,
- Zeroable, DAG, V1InUse, V2InUse);
+ return lowerShuffleAsBlendOfPSHUFBs(DL, MVT::v8i16, V1, V2, Mask, Zeroable,
+ DAG, V1InUse, V2InUse);
}
// We can always bit-blend if we have to so the fallback strategy is to
@@ -14873,8 +14869,8 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// See if we can use SSE4A Extraction / Insertion.
if (Subtarget.hasSSE4A())
- if (SDValue V = lowerShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask,
- Zeroable, DAG))
+ if (SDValue V =
+ lowerShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask, Zeroable, DAG))
return V;
int NumV2Elements = count_if(Mask, [](int M) { return M >= 16; });
@@ -14887,8 +14883,8 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Broadcast;
// Try to use bit rotation instructions.
- if (SDValue Rotate = lowerShuffleAsBitRotate(DL, MVT::v16i8, V1, Mask,
- Subtarget, DAG))
+ if (SDValue Rotate =
+ lowerShuffleAsBitRotate(DL, MVT::v16i8, V1, Mask, Subtarget, DAG))
return Rotate;
if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i8, V1, V2, Mask, DAG))
@@ -14929,7 +14925,7 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
int PreDupI16Shuffle[] = {-1, -1, -1, -1, -1, -1, -1, -1};
SmallDenseMap<int, int, 8> LaneMap;
for (int I : InPlaceInputs) {
- PreDupI16Shuffle[I/2] = I/2;
+ PreDupI16Shuffle[I / 2] = I / 2;
LaneMap[I] = I;
}
int j = TargetLo ? 0 : 4, je = j + 4;
@@ -14943,7 +14939,8 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
++j;
if (j == je)
- // We can't place the inputs into a single half with a simple i16 shuffle, so bail.
+ // We can't place the inputs into a single half with a simple i16
+ // shuffle, so bail.
return SDValue();
// Map this input with the i16 shuffle.
@@ -15064,8 +15061,8 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Use PALIGNR+Permute if possible - permute might become PSHUFB but the
// PALIGNR will be cheaper than the second PSHUFB+OR.
- if (SDValue V = lowerShuffleAsByteRotateAndPermute(
- DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
+ if (SDValue V = lowerShuffleAsByteRotateAndPermute(DL, MVT::v16i8, V1, V2,
+ Mask, Subtarget, DAG))
return V;
}
@@ -15074,8 +15071,8 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// There are special ways we can lower some single-element blends.
if (NumV2Elements == 1)
- if (SDValue V = lowerShuffleAsElementInsertion(
- DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
+ if (SDValue V = lowerShuffleAsElementInsertion(DL, MVT::v16i8, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return V;
if (SDValue Blend = lowerShuffleAsBitBlend(DL, MVT::v16i8, V1, V2, Mask, DAG))
@@ -15167,8 +15164,8 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (M >= 0)
M /= 2;
} else {
- // Otherwise just unpack the low half of V into VLoHalf and the high half into
- // VHiHalf so that we can blend them as i16s.
+ // Otherwise just unpack the low half of V into VLoHalf and the high half
+ // into VHiHalf so that we can blend them as i16s.
SDValue Zero = getZeroVector(MVT::v16i8, Subtarget, DAG, DL);
VLoHalf = DAG.getBitcast(
@@ -15177,8 +15174,10 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
MVT::v8i16, DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero));
}
- SDValue LoV = DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, LoBlendMask);
- SDValue HiV = DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, HiBlendMask);
+ SDValue LoV =
+ DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, LoBlendMask);
+ SDValue HiV =
+ DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, HiBlendMask);
return DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, LoV, HiV);
}
@@ -15187,9 +15186,8 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
///
/// This routine breaks down the specific type of 128-bit shuffle and
/// dispatches to the lowering routines accordingly.
-static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- MVT VT, SDValue V1, SDValue V2,
- const APInt &Zeroable,
+static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
+ SDValue V1, SDValue V2, const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
if (VT == MVT::v8bf16) {
@@ -15371,7 +15369,7 @@ static SDValue lowerShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT, SDValue V1,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(!V2.isUndef() && "This routine must not be used to lower single-input "
- "shuffles as it could then recurse on itself.");
+ "shuffles as it could then recurse on itself.");
int Size = Mask.size();
// If this can be modeled as a broadcast of two elements followed by a blend,
@@ -15710,8 +15708,8 @@ static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1,
// instruction bytes needed to explicitly generate the zero vector.
// Blends are faster and handle all the non-lane-crossing cases.
- if (SDValue Blend = lowerShuffleAsBlend(DL, VT, V1, V2, Mask, Zeroable,
- Subtarget, DAG))
+ if (SDValue Blend =
+ lowerShuffleAsBlend(DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG))
return Blend;
// If either input operand is a zero vector, use VPERM2X128 because its mask
@@ -15737,8 +15735,8 @@ static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1,
// Try to use SHUF128 if possible.
if (Subtarget.hasVLX()) {
if (WidenedMask[0] < 2 && WidenedMask[1] >= 2) {
- unsigned PermMask = ((WidenedMask[0] % 2) << 0) |
- ((WidenedMask[1] % 2) << 1);
+ unsigned PermMask =
+ ((WidenedMask[0] % 2) << 0) | ((WidenedMask[1] % 2) << 1);
return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2,
DAG.getTargetConstant(PermMask, DL, MVT::i8));
}
@@ -15762,7 +15760,7 @@ static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1,
(WidenedMask[1] >= 0 || IsHighZero) && "Undef half?");
unsigned PermMask = 0;
- PermMask |= IsLowZero ? 0x08 : (WidenedMask[0] << 0);
+ PermMask |= IsLowZero ? 0x08 : (WidenedMask[0] << 0);
PermMask |= IsHighZero ? 0x80 : (WidenedMask[1] << 4);
// Check the immediate mask and replace unused sources with undef.
@@ -15954,9 +15952,9 @@ static SDValue lowerShuffleAsLanePermuteAndRepeatedMask(
/// adjusted to access the extracted halves of the original shuffle operands is
/// returned in HalfMask. HalfIdx1 and HalfIdx2 return whether the upper or
/// lower half of each input operand is accessed.
-static bool
-getHalfShuffleMask(ArrayRef<int> Mask, MutableArrayRef<int> HalfMask,
- int &HalfIdx1, int &HalfIdx2) {
+static bool getHalfShuffleMask(ArrayRef<int> Mask,
+ MutableArrayRef<int> HalfMask, int &HalfIdx1,
+ int &HalfIdx2) {
assert((Mask.size() == HalfMask.size() * 2) &&
"Expected input mask to be twice as long as output");
@@ -16009,7 +16007,8 @@ getHalfShuffleMask(ArrayRef<int> Mask, MutableArrayRef<int> HalfMask,
static SDValue getShuffleHalfVectors(const SDLoc &DL, SDValue V1, SDValue V2,
ArrayRef<int> HalfMask, int HalfIdx1,
int HalfIdx2, bool UndefLower,
- SelectionDAG &DAG, bool UseConcat = false) {
+ SelectionDAG &DAG,
+ bool UseConcat = false) {
assert(V1.getValueType() == V2.getValueType() && "Different sized vectors?");
assert(V1.getValueType().isSimple() && "Expecting only simple types");
@@ -16371,7 +16370,7 @@ static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,
assert(isUndefOrZeroOrInRange(Mask, 0, 2 * NumElts) &&
"Illegal shuffle mask");
- bool ZeroLane[2] = { true, true };
+ bool ZeroLane[2] = {true, true};
for (int i = 0; i < NumElts; ++i)
ZeroLane[i & 1] &= Zeroable[i];
@@ -16456,9 +16455,9 @@ static SDValue lowerShuffleAsVTRUNCAndUnpack(const SDLoc &DL, MVT VT,
// The VTRUNCs will put 0s in the upper 12 bytes. Use them to put zeroes in
// the upper bits of the result using an unpckldq.
- SDValue Unpack = DAG.getVectorShuffle(MVT::v16i8, DL, V1, V2,
- { 0, 1, 2, 3, 16, 17, 18, 19,
- 4, 5, 6, 7, 20, 21, 22, 23 });
+ SDValue Unpack = DAG.getVectorShuffle(
+ MVT::v16i8, DL, V1, V2,
+ {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23});
// Insert the unpckldq into a zero vector to widen to v32i8.
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v32i8,
DAG.getConstant(0, DL, MVT::v32i8), Unpack,
@@ -16695,8 +16694,8 @@ static SDValue lowerV4I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Blend;
// Check for being able to broadcast a single element.
- if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4i64, V1, V2, Mask,
- Subtarget, DAG))
+ if (SDValue Broadcast =
+ lowerShuffleAsBroadcast(DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG))
return Broadcast;
// Try to use shift instructions if fast.
@@ -16803,8 +16802,8 @@ static SDValue lowerV8F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Blend;
// Check for being able to broadcast a single element.
- if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8f32, V1, V2, Mask,
- Subtarget, DAG))
+ if (SDValue Broadcast =
+ lowerShuffleAsBroadcast(DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG))
return Broadcast;
if (!Subtarget.hasAVX2()) {
@@ -16951,8 +16950,8 @@ static SDValue lowerV8I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Blend;
// Check for being able to broadcast a single element.
- if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8i32, V1, V2, Mask,
- Subtarget, DAG))
+ if (SDValue Broadcast =
+ lowerShuffleAsBroadcast(DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG))
return Broadcast;
// Try to use shift instructions if fast.
@@ -17119,7 +17118,8 @@ static SDValue lowerV16I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Try to produce a fixed cross-128-bit lane permute followed by unpack
// because that should be faster than the variable permute alternatives.
- if (SDValue V = lowerShuffleWithUNPCK256(DL, MVT::v16i16, V1, V2, Mask, DAG))
+ if (SDValue V =
+ lowerShuffleWithUNPCK256(DL, MVT::v16i16, V1, V2, Mask, DAG))
return V;
// There are no generalized cross-lane shuffle operations available on i16
@@ -17138,8 +17138,8 @@ static SDValue lowerV16I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// As this is a single-input shuffle, the repeated mask should be
// a strictly valid v8i16 mask that we can pass through to the v8i16
// lowering to handle even the v16 case.
- return lowerV8I16GeneralSingleInputShuffle(
- DL, MVT::v16i16, V1, RepeatedMask, Subtarget, DAG);
+ return lowerV8I16GeneralSingleInputShuffle(DL, MVT::v16i16, V1,
+ RepeatedMask, Subtarget, DAG);
}
}
@@ -17158,8 +17158,8 @@ static SDValue lowerV16I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Result;
// Try to permute the lanes and then use a per-lane permute.
- if (SDValue V = lowerShuffleAsLanePermuteAndPermute(
- DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget))
+ if (SDValue V = lowerShuffleAsLanePermuteAndPermute(DL, MVT::v16i16, V1, V2,
+ Mask, DAG, Subtarget))
return V;
// Try to match an interleave of two v16i16s and lower them as unpck and
@@ -17195,8 +17195,8 @@ static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return ZExt;
// Check for being able to broadcast a single element.
- if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v32i8, V1, V2, Mask,
- Subtarget, DAG))
+ if (SDValue Broadcast =
+ lowerShuffleAsBroadcast(DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG))
return Broadcast;
if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v32i8, V1, V2, Mask,
@@ -17248,8 +17248,8 @@ static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (SDValue V = lowerShuffleWithUNPCK256(DL, MVT::v32i8, V1, V2, Mask, DAG))
return V;
- if (SDValue V = lowerShuffleAsLanePermuteAndPermute(
- DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget))
+ if (SDValue V = lowerShuffleAsLanePermuteAndPermute(DL, MVT::v32i8, V1, V2,
+ Mask, DAG, Subtarget))
return V;
return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v32i8, V1, V2, Mask,
@@ -17271,16 +17271,16 @@ static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Result;
// Try to permute the lanes and then use a per-lane permute.
- if (SDValue V = lowerShuffleAsLanePermuteAndPermute(
- DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget))
+ if (SDValue V = lowerShuffleAsLanePermuteAndPermute(DL, MVT::v32i8, V1, V2,
+ Mask, DAG, Subtarget))
return V;
// Look for {0, 8, 16, 24, 32, 40, 48, 56 } in the first 8 elements. Followed
// by zeroable elements in the remaining 24 elements. Turn this into two
// vmovqb instructions shuffled together.
if (Subtarget.hasVLX())
- if (SDValue V = lowerShuffleAsVTRUNCAndUnpack(DL, MVT::v32i8, V1, V2,
- Mask, Zeroable, DAG))
+ if (SDValue V = lowerShuffleAsVTRUNCAndUnpack(DL, MVT::v32i8, V1, V2, Mask,
+ Zeroable, DAG))
return V;
// Try to match an interleave of two v32i8s and lower them as unpck and
@@ -17335,7 +17335,8 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
return V;
if (SDValue V = lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG))
return V;
- return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG, /*SimpleOnly*/ false);
+ return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG,
+ /*SimpleOnly*/ false);
}
MVT FpVT = MVT::getVectorVT(MVT::getFloatingPointVT(ElementBits),
@@ -17584,8 +17585,7 @@ static SDValue lowerV16F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// If we have a single input shuffle with different shuffle patterns in the
// 128-bit lanes and don't lane cross, use variable mask VPERMILPS.
- if (V2.isUndef() &&
- !is128BitLaneCrossingShuffleMask(MVT::v16f32, Mask)) {
+ if (V2.isUndef() && !is128BitLaneCrossingShuffleMask(MVT::v16f32, Mask)) {
SDValue VPermMask = getConstVector(Mask, MVT::v16i32, DAG, DL, true);
return DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v16f32, V1, VPermMask);
}
@@ -17852,8 +17852,8 @@ static SDValue lowerV64I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Whenever we can lower this as a zext, that instruction is strictly faster
// than any alternative. It also allows us to fold memory operands into the
// shuffle in many cases.
- if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(
- DL, MVT::v64i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
+ if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v64i8, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return ZExt;
// Use dedicated unpack instructions for masks that match their pattern.
@@ -17930,7 +17930,8 @@ static SDValue lowerV64I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (Subtarget.hasVBMI())
return lowerShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, Subtarget, DAG);
- return splitAndLowerShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG, /*SimpleOnly*/ false);
+ return splitAndLowerShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG,
+ /*SimpleOnly*/ false);
}
/// High-level routine to lower various 512-bit x86 vector shuffles.
@@ -17938,13 +17939,11 @@ static SDValue lowerV64I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
/// This routine either breaks down the specific type of a 512-bit x86 vector
/// shuffle or splits it into two 256-bit shuffles and fuses the results back
/// together based on the available instructions.
-static SDValue lower512BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- MVT VT, SDValue V1, SDValue V2,
- const APInt &Zeroable,
+static SDValue lower512BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
+ SDValue V1, SDValue V2, const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
- assert(Subtarget.hasAVX512() &&
- "Cannot lower 512-bit vectors w/ basic ISA!");
+ assert(Subtarget.hasAVX512() && "Cannot lower 512-bit vectors w/ basic ISA!");
// If we have a single input to the zero element, insert that into V1 if we
// can do so cheaply.
@@ -17962,8 +17961,8 @@ static SDValue lower512BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
return V;
// Check for being able to broadcast a single element.
- if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, Mask,
- Subtarget, DAG))
+ if (SDValue Broadcast =
+ lowerShuffleAsBroadcast(DL, VT, V1, V2, Mask, Subtarget, DAG))
return Broadcast;
if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI()) {
@@ -17975,7 +17974,8 @@ static SDValue lower512BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (SDValue V = lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG))
return V;
- return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG, /*SimpleOnly*/ false);
+ return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG,
+ /*SimpleOnly*/ false);
}
if (VT == MVT::v32f16 || VT == MVT::v32bf16) {
@@ -18082,14 +18082,12 @@ static int match1BitShuffleAsKSHIFT(unsigned &Opcode, ArrayRef<int> Mask,
return -1;
}
-
// Lower vXi1 vector shuffles.
// There is no a dedicated instruction on AVX-512 that shuffles the masks.
// The only way to shuffle bits is to sign-extend the mask vector to SIMD
// vector, shuffle and then truncate it back.
-static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- MVT VT, SDValue V1, SDValue V2,
- const APInt &Zeroable,
+static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
+ SDValue V1, SDValue V2, const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(Subtarget.hasAVX512() &&
@@ -18220,8 +18218,8 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
int NumElems = VT.getVectorNumElements();
if ((Subtarget.hasBWI() && (NumElems >= 32)) ||
(Subtarget.hasDQI() && (NumElems < 32)))
- return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, ExtVT),
- Shuffle, ISD::SETGT);
+ return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, ExtVT), Shuffle,
+ ISD::SETGT);
return DAG.getNode(ISD::TRUNCATE, DL, VT, Shuffle);
}
@@ -18348,7 +18346,7 @@ static SDValue canonicalizeShuffleMaskWithHorizOp(
unsigned RootSizeInBits, const SDLoc &DL, SelectionDAG &DAG,
const X86Subtarget &Subtarget);
- /// Top-level lowering for x86 vector shuffles.
+/// Top-level lowering for x86 vector shuffles.
///
/// This handles decomposition, canonicalization, and lowering of all x86
/// vector shuffles. Most of the specific lowering strategies are encapsulated
@@ -18424,8 +18422,8 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, const X86Subtarget &Subtarget,
// by obfuscating the operands with bitcasts.
// TODO: Avoid lowering directly from this top-level function: make this
// a query (canLowerAsBroadcast) and defer lowering to the type-based calls.
- if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, OrigMask,
- Subtarget, DAG))
+ if (SDValue Broadcast =
+ lowerShuffleAsBroadcast(DL, VT, V1, V2, OrigMask, Subtarget, DAG))
return Broadcast;
MVT NewEltVT = VT.isFloatingPoint()
@@ -18652,8 +18650,7 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
// Build a mask by testing the condition against zero.
MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
SDValue Mask = DAG.getSetCC(dl, MaskVT, Cond,
- DAG.getConstant(0, dl, CondVT),
- ISD::SETNE);
+ DAG.getConstant(0, dl, CondVT), ISD::SETNE);
// Now return a new VSELECT using the mask.
return DAG.getSelect(dl, VT, Mask, LHS, RHS);
}
@@ -18760,7 +18757,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
}
if (VT == MVT::i32 || VT == MVT::i64)
- return Op;
+ return Op;
return SDValue();
}
@@ -18773,7 +18770,7 @@ static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG,
SDLoc dl(Vec);
MVT VecVT = Vec.getSimpleValueType();
SDValue Idx = Op.getOperand(1);
- auto* IdxC = dyn_cast<ConstantSDNode>(Idx);
+ auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
MVT EltVT = Op.getSimpleValueType();
assert((VecVT.getVectorNumElements() <= 16 || Subtarget.hasBWI()) &&
@@ -18788,7 +18785,8 @@ static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG,
if (NumElts == 1) {
Vec = widenMaskVector(Vec, false, Subtarget, DAG, dl);
MVT IntVT = MVT::getIntegerVT(Vec.getValueType().getVectorNumElements());
- return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, DAG.getBitcast(IntVT, Vec));
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getBitcast(IntVT, Vec));
}
MVT ExtEltVT = (NumElts <= 8) ? MVT::getIntegerVT(128 / NumElts) : MVT::i8;
MVT ExtVecVT = MVT::getVectorVT(ExtEltVT, NumElts);
@@ -18846,14 +18844,13 @@ static APInt getExtractedDemandedElts(SDNode *N) {
return DemandedElts;
}
-SDValue
-X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
SDLoc dl(Op);
SDValue Vec = Op.getOperand(0);
MVT VecVT = Vec.getSimpleValueType();
SDValue Idx = Op.getOperand(1);
- auto* IdxC = dyn_cast<ConstantSDNode>(Idx);
+ auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
if (VecVT.getVectorElementType() == MVT::i1)
return ExtractBitFromMaskVector(Op, DAG, Subtarget);
@@ -18884,10 +18881,10 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
// | | Ports pressure in cycles | |
// |Uops| 1 | 2 - D |3 - D | 4 | 5 | |
// ---------------------------------------------------------
- // |2^ | | 0.5 | 0.5 |1.0| |CP| vmovaps xmmword ptr [rsp-0x18], xmm0
- // |1 |0.5| | | |0.5| | lea rax, ptr [rsp-0x18]
- // |1 | |0.5, 0.5|0.5, 0.5| | |CP| mov al, byte ptr [rdi+rax*1]
- // Total Num Of Uops: 4
+ // |2^ | | 0.5 | 0.5 |1.0| |CP| vmovaps xmmword ptr [rsp-0x18],
+ // xmm0 |1 |0.5| | | |0.5| | lea rax, ptr [rsp-0x18] |1
+ // | |0.5, 0.5|0.5, 0.5| | |CP| mov al, byte ptr [rdi+rax*1] Total Num
+ // Of Uops: 4
return SDValue();
}
@@ -18992,7 +18989,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
// UNPCKHPD the element to the lowest double word, then movsd.
// Note if the lower 64 bits of the result of the UNPCKHPD is then stored
// to a f64mem, the whole operation is folded into a single MOVHPDmr.
- int Mask[2] = { 1, -1 };
+ int Mask[2] = {1, -1};
Vec = DAG.getVectorShuffle(VecVT, dl, Vec, DAG.getUNDEF(VecVT), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
DAG.getVectorIdxConstant(0, dl));
@@ -19017,9 +19014,10 @@ static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG,
unsigned NumElts = VecVT.getVectorNumElements();
MVT ExtEltVT = (NumElts <= 8) ? MVT::getIntegerVT(128 / NumElts) : MVT::i8;
MVT ExtVecVT = MVT::getVectorVT(ExtEltVT, NumElts);
- SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT,
- DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVecVT, Vec),
- DAG.getNode(ISD::SIGN_EXTEND, dl, ExtEltVT, Elt), Idx);
+ SDValue ExtOp =
+ DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT,
+ DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVecVT, Vec),
+ DAG.getNode(ISD::SIGN_EXTEND, dl, ExtEltVT, Elt), Idx);
return DAG.getNode(ISD::TRUNCATE, dl, VecVT, ExtOp);
}
@@ -19046,9 +19044,9 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
if (EltVT == MVT::bf16) {
MVT IVT = VT.changeVectorElementTypeToInteger();
- SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, IVT,
- DAG.getBitcast(IVT, N0),
- DAG.getBitcast(MVT::i16, N1), N2);
+ SDValue Res =
+ DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, IVT, DAG.getBitcast(IVT, N0),
+ DAG.getBitcast(MVT::i16, N1), N2);
return DAG.getBitcast(VT, Res);
}
@@ -19375,8 +19373,9 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
}
// Returns the appropriate wrapper opcode for a global reference.
-unsigned X86TargetLowering::getGlobalWrapperKind(
- const GlobalValue *GV, const unsigned char OpFlags) const {
+unsigned
+X86TargetLowering::getGlobalWrapperKind(const GlobalValue *GV,
+ const unsigned char OpFlags) const {
// References to absolute symbols are never PC-relative.
if (GV && GV->isAbsoluteSymbolRef())
return X86ISD::Wrapper;
@@ -19400,8 +19399,8 @@ unsigned X86TargetLowering::getGlobalWrapperKind(
// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
// be used to form addressing mode. These wrapped nodes will be selected
// into MOV32ri.
-SDValue
-X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerConstantPool(SDValue Op,
+ SelectionDAG &DAG) const {
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
// In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
@@ -19451,11 +19450,10 @@ SDValue X86TargetLowering::LowerExternalSymbol(SDValue Op,
return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false, nullptr);
}
-SDValue
-X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerBlockAddress(SDValue Op,
+ SelectionDAG &DAG) const {
// Create the TargetBlockAddressAddress node.
- unsigned char OpFlags =
- Subtarget.classifyBlockAddressReference();
+ unsigned char OpFlags = Subtarget.classifyBlockAddressReference();
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
int64_t Offset = cast<BlockAddressSDNode>(Op)->getOffset();
SDLoc dl(Op);
@@ -19560,8 +19558,8 @@ SDValue X86TargetLowering::LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
return Result;
}
-SDValue
-X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerGlobalAddress(SDValue Op,
+ SelectionDAG &DAG) const {
return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false, nullptr);
}
@@ -19639,24 +19637,24 @@ static SDValue GetTLSADDR(SelectionDAG &DAG, GlobalAddressSDNode *GA,
}
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit
-static SDValue
-LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
- const EVT PtrVT) {
+static SDValue LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG,
+ const EVT PtrVT) {
return GetTLSADDR(DAG, GA, PtrVT, X86::EAX, X86II::MO_TLSGD,
/*LoadGlobalBaseReg=*/true);
}
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit LP64
-static SDValue
-LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
- const EVT PtrVT) {
+static SDValue LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG,
+ const EVT PtrVT) {
return GetTLSADDR(DAG, GA, PtrVT, X86::RAX, X86II::MO_TLSGD);
}
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit ILP32
-static SDValue
-LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
- const EVT PtrVT) {
+static SDValue LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG,
+ const EVT PtrVT) {
return GetTLSADDR(DAG, GA, PtrVT, X86::EAX, X86II::MO_TLSGD);
}
@@ -19688,9 +19686,8 @@ static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
// Build x at dtpoff.
unsigned char OperandFlags = X86II::MO_DTPOFF;
unsigned WrapperKind = X86ISD::Wrapper;
- SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
- GA->getValueType(0),
- GA->getOffset(), OperandFlags);
+ SDValue TGA = DAG.getTargetGlobalAddress(
+ GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), OperandFlags);
SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
// Add x at dtpoff with the base.
@@ -19731,9 +19728,8 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
// emit "addl x at ntpoff,%eax" (local exec)
// or "addl x at indntpoff,%eax" (initial exec)
// or "addl x at gotntpoff(%ebx) ,%eax" (initial exec, 32-bit pic)
- SDValue TGA =
- DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0),
- GA->getOffset(), OperandFlags);
+ SDValue TGA = DAG.getTargetGlobalAddress(
+ GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), OperandFlags);
SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
if (model == TLSModel::InitialExec) {
@@ -19752,8 +19748,8 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
}
-SDValue
-X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerGlobalTLSAddress(SDValue Op,
+ SelectionDAG &DAG) const {
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
@@ -19767,20 +19763,20 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
if (Subtarget.isTargetELF()) {
TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
switch (model) {
- case TLSModel::GeneralDynamic:
- if (Subtarget.is64Bit()) {
- if (Subtarget.isTarget64BitLP64())
- return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT);
- return LowerToTLSGeneralDynamicModelX32(GA, DAG, PtrVT);
- }
- return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT);
- case TLSModel::LocalDynamic:
- return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT, Subtarget.is64Bit(),
- Subtarget.isTarget64BitLP64());
- case TLSModel::InitialExec:
- case TLSModel::LocalExec:
- return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget.is64Bit(),
- PositionIndependent);
+ case TLSModel::GeneralDynamic:
+ if (Subtarget.is64Bit()) {
+ if (Subtarget.isTarget64BitLP64())
+ return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT);
+ return LowerToTLSGeneralDynamicModelX32(GA, DAG, PtrVT);
+ }
+ return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT);
+ case TLSModel::LocalDynamic:
+ return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT, Subtarget.is64Bit(),
+ Subtarget.isTarget64BitLP64());
+ case TLSModel::InitialExec:
+ case TLSModel::LocalExec:
+ return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget.is64Bit(),
+ PositionIndependent);
}
llvm_unreachable("Unknown TLS model.");
}
@@ -19801,9 +19797,8 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
WrapperKind = X86ISD::WrapperRIP;
}
SDLoc DL(Op);
- SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
- GA->getValueType(0),
- GA->getOffset(), OpFlag);
+ SDValue Result = DAG.getTargetGlobalAddress(
+ GA->getGlobal(), DL, GA->getValueType(0), GA->getOffset(), OpFlag);
SDValue Offset = DAG.getNode(WrapperKind, DL, PtrVT, Result);
// With PIC32, the address is actually $g + Offset.
@@ -19817,7 +19812,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = DAG.getEntryNode();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
- SDValue Args[] = { Chain, Offset };
+ SDValue Args[] = {Chain, Offset};
Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args);
Chain = DAG.getCALLSEQ_END(Chain, 0, 0, Chain.getValue(1), DL);
@@ -19885,9 +19880,9 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
res = DAG.getLoad(PtrVT, dl, Chain, res, MachinePointerInfo());
// Get the offset of start of .tls section
- SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
- GA->getValueType(0),
- GA->getOffset(), X86II::MO_SECREL);
+ SDValue TGA =
+ DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0),
+ GA->getOffset(), X86II::MO_SECREL);
SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, TGA);
// The address of the thread local variable is the add of the thread
@@ -19947,8 +19942,8 @@ static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, const SDLoc &dl,
MVT SrcVT = Src.getSimpleValueType();
MVT VT = Op.getSimpleValueType();
- if (!Subtarget.hasDQI() || SrcVT != MVT::i64 || Subtarget.is64Bit() ||
- (VT != MVT::f32 && VT != MVT::f64))
+ if (!Subtarget.hasDQI() || SrcVT != MVT::i64 || Subtarget.is64Bit() ||
+ (VT != MVT::f32 && VT != MVT::f64))
return SDValue();
// Pack the i64 into a vector, do the operation and extract.
@@ -20013,22 +20008,22 @@ static SDValue LowerI64IntToFP16(SDValue Op, const SDLoc &dl, SelectionDAG &DAG,
static bool useVectorCast(unsigned Opcode, MVT FromVT, MVT ToVT,
const X86Subtarget &Subtarget) {
switch (Opcode) {
- case ISD::SINT_TO_FP:
- // TODO: Handle wider types with AVX/AVX512.
- if (!Subtarget.hasSSE2() || FromVT != MVT::v4i32)
- return false;
- // CVTDQ2PS or (V)CVTDQ2PD
- return ToVT == MVT::v4f32 || (Subtarget.hasAVX() && ToVT == MVT::v4f64);
-
- case ISD::UINT_TO_FP:
- // TODO: Handle wider types and i64 elements.
- if (!Subtarget.hasAVX512() || FromVT != MVT::v4i32)
- return false;
- // VCVTUDQ2PS or VCVTUDQ2PD
- return ToVT == MVT::v4f32 || ToVT == MVT::v4f64;
+ case ISD::SINT_TO_FP:
+ // TODO: Handle wider types with AVX/AVX512.
+ if (!Subtarget.hasSSE2() || FromVT != MVT::v4i32)
+ return false;
+ // CVTDQ2PS or (V)CVTDQ2PD
+ return ToVT == MVT::v4f32 || (Subtarget.hasAVX() && ToVT == MVT::v4f64);
- default:
+ case ISD::UINT_TO_FP:
+ // TODO: Handle wider types and i64 elements.
+ if (!Subtarget.hasAVX512() || FromVT != MVT::v4i32)
return false;
+ // VCVTUDQ2PS or VCVTUDQ2PD
+ return ToVT == MVT::v4f32 || ToVT == MVT::v4f64;
+
+ default:
+ return false;
}
}
@@ -20172,7 +20167,7 @@ static SDValue lowerINT_TO_FP_vXi64(SDValue Op, const SDLoc &DL,
return SDValue();
SDValue Zero = DAG.getConstant(0, DL, MVT::v4i64);
- SDValue One = DAG.getConstant(1, DL, MVT::v4i64);
+ SDValue One = DAG.getConstant(1, DL, MVT::v4i64);
SDValue Sign = DAG.getNode(ISD::OR, DL, MVT::v4i64,
DAG.getNode(ISD::SRL, DL, MVT::v4i64, Src, One),
DAG.getNode(ISD::AND, DL, MVT::v4i64, Src, One));
@@ -20392,7 +20387,7 @@ std::pair<SDValue, SDValue> X86TargetLowering::BuildFILD(
Chain = Result.getValue(1);
}
- return { Result, Chain };
+ return {Result, Chain};
}
/// Horizontal vector math instructions may be slower than normal math with
@@ -20429,18 +20424,18 @@ static SDValue LowerUINT_TO_FP_i64(SDValue Op, const SDLoc &dl,
LLVMContext *Context = DAG.getContext();
// Build some magic constants.
- static const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 };
+ static const uint32_t CV0[] = {0x43300000, 0x45300000, 0, 0};
Constant *C0 = ConstantDataVector::get(*Context, CV0);
auto PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
SDValue CPIdx0 = DAG.getConstantPool(C0, PtrVT, Align(16));
- SmallVector<Constant*,2> CV1;
+ SmallVector<Constant *, 2> CV1;
CV1.push_back(
- ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(),
- APInt(64, 0x4330000000000000ULL))));
+ ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(),
+ APInt(64, 0x4330000000000000ULL))));
CV1.push_back(
- ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(),
- APInt(64, 0x4530000000000000ULL))));
+ ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(),
+ APInt(64, 0x4530000000000000ULL))));
Constant *C1 = ConstantVector::get(CV1);
SDValue CPIdx1 = DAG.getConstantPool(C1, PtrVT, Align(16));
@@ -20461,11 +20456,10 @@ static SDValue LowerUINT_TO_FP_i64(SDValue Op, const SDLoc &dl,
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
SDValue Result;
- if (Subtarget.hasSSE3() &&
- shouldUseHorizontalOp(true, DAG, Subtarget)) {
+ if (Subtarget.hasSSE3() && shouldUseHorizontalOp(true, DAG, Subtarget)) {
Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub);
} else {
- SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1,-1});
+ SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1, -1});
Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuffle, Sub);
}
Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result,
@@ -20491,8 +20485,7 @@ static SDValue LowerUINT_TO_FP_i32(SDValue Op, const SDLoc &dl,
// Or the load with the bias.
SDValue Or = DAG.getNode(
- ISD::OR, dl, MVT::v2i64,
- DAG.getBitcast(MVT::v2i64, Load),
+ ISD::OR, dl, MVT::v2i64, DAG.getBitcast(MVT::v2i64, Load),
DAG.getBitcast(MVT::v2i64,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Bias)));
Or = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
@@ -20696,8 +20689,9 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, const SDLoc &DL,
SDValue VecBitcast = DAG.getBitcast(VecI16VT, V);
// Low will be bitcasted right away, so do not bother bitcasting back to its
// original type.
- Low = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecBitcast,
- VecCstLowBitcast, DAG.getTargetConstant(0xaa, DL, MVT::i8));
+ Low =
+ DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecBitcast, VecCstLowBitcast,
+ DAG.getTargetConstant(0xaa, DL, MVT::i8));
// uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16),
// (uint4) 0x53000000, 0xaa);
SDValue VecCstHighBitcast = DAG.getBitcast(VecI16VT, VecCstHigh);
@@ -20705,7 +20699,8 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, const SDLoc &DL,
// High will be bitcasted right away, so do not bother bitcasting back to
// its original type.
High = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecShiftBitcast,
- VecCstHighBitcast, DAG.getTargetConstant(0xaa, DL, MVT::i8));
+ VecCstHighBitcast,
+ DAG.getTargetConstant(0xaa, DL, MVT::i8));
} else {
SDValue VecCstMask = DAG.getConstant(0xffff, DL, VecIntVT);
// uint4 lo = (v & (uint4) 0xffff) | (uint4) 0x4b000000;
@@ -20741,7 +20736,8 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, const SDLoc &DL,
return DAG.getNode(ISD::FADD, DL, VecFloatVT, LowBitcast, FHigh);
}
-static SDValue lowerUINT_TO_FP_vec(SDValue Op, const SDLoc &dl, SelectionDAG &DAG,
+static SDValue lowerUINT_TO_FP_vec(SDValue Op, const SDLoc &dl,
+ SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0;
SDValue N0 = Op.getOperand(OpNo);
@@ -20952,8 +20948,7 @@ SDValue X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
DstTy = MVT::i64;
}
- assert(DstTy.getSimpleVT() <= MVT::i64 &&
- DstTy.getSimpleVT() >= MVT::i16 &&
+ assert(DstTy.getSimpleVT() <= MVT::i64 && DstTy.getSimpleVT() >= MVT::i16 &&
"Unknown FP_TO_INT to lower!");
// We lower FP->int64 into FISTP64 followed by a load from a temporary
@@ -20991,8 +20986,8 @@ SDValue X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
bool LosesInfo = false;
if (TheVT == MVT::f64)
// The rounding mode is irrelevant as the conversion should be exact.
- Status = Thresh.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
- &LosesInfo);
+ Status = Thresh.convert(APFloat::IEEEdouble(),
+ APFloat::rmNearestTiesToEven, &LosesInfo);
else if (TheVT == MVT::f80)
Status = Thresh.convert(APFloat::x87DoubleExtended(),
APFloat::rmNearestTiesToEven, &LosesInfo);
@@ -21002,8 +20997,8 @@ SDValue X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
SDValue ThreshVal = DAG.getConstantFP(Thresh, DL, TheVT);
- EVT ResVT = getSetCCResultType(DAG.getDataLayout(),
- *DAG.getContext(), TheVT);
+ EVT ResVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), TheVT);
SDValue Cmp;
if (IsStrict) {
Cmp = DAG.getSetCC(DL, ResVT, Value, ThreshVal, ISD::SETGE, Chain,
@@ -21032,8 +21027,8 @@ SDValue X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
DAG.getConstantFP(0.0, DL, TheVT));
if (IsStrict) {
- Value = DAG.getNode(ISD::STRICT_FSUB, DL, { TheVT, MVT::Other},
- { Chain, Value, FltOfs });
+ Value = DAG.getNode(ISD::STRICT_FSUB, DL, {TheVT, MVT::Other},
+ {Chain, Value, FltOfs});
Chain = Value.getValue(1);
} else
Value = DAG.getNode(ISD::FSUB, DL, TheVT, Value, FltOfs);
@@ -21047,7 +21042,7 @@ SDValue X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
Chain = DAG.getStore(Chain, DL, Value, StackSlot, MPI);
SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
- SDValue Ops[] = { Chain, StackSlot };
+ SDValue Ops[] = {Chain, StackSlot};
unsigned FLDSize = TheVT.getStoreSize();
assert(FLDSize <= MemSize && "Stack slot not big enough");
@@ -21060,10 +21055,9 @@ SDValue X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
// Build the FP_TO_INT*_IN_MEM
MachineMemOperand *MMO = MF.getMachineMemOperand(
MPI, MachineMemOperand::MOStore, MemSize, Align(MemSize));
- SDValue Ops[] = { Chain, Value, StackSlot };
- SDValue FIST = DAG.getMemIntrinsicNode(X86ISD::FP_TO_INT_IN_MEM, DL,
- DAG.getVTList(MVT::Other),
- Ops, DstTy, MMO);
+ SDValue Ops[] = {Chain, Value, StackSlot};
+ SDValue FIST = DAG.getMemIntrinsicNode(
+ X86ISD::FP_TO_INT_IN_MEM, DL, DAG.getVTList(MVT::Other), Ops, DstTy, MMO);
SDValue Res = DAG.getLoad(Op.getValueType(), DL, FIST, StackSlot, MPI);
Chain = Res.getValue(1);
@@ -21242,7 +21236,7 @@ static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In,
return In;
unsigned NumElems = SrcVT.getVectorNumElements();
- if (NumElems < 2 || !isPowerOf2_32(NumElems) )
+ if (NumElems < 2 || !isPowerOf2_32(NumElems))
return SDValue();
unsigned DstSizeInBits = DstVT.getSizeInBits();
@@ -21313,7 +21307,7 @@ static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In,
// Scale shuffle mask to avoid bitcasts and help ComputeNumSignBits.
SmallVector<int, 64> Mask;
int Scale = 64 / OutVT.getScalarSizeInBits();
- narrowShuffleMaskElts(Scale, { 0, 2, 1, 3 }, Mask);
+ narrowShuffleMaskElts(Scale, {0, 2, 1, 3}, Mask);
Res = DAG.getVectorShuffle(OutVT, DL, Res, Res, Mask);
if (DstVT.is256BitVector())
@@ -21557,14 +21551,12 @@ static SDValue LowerTruncateVecI1(SDValue Op, const SDLoc &DL,
if (DAG.ComputeNumSignBits(In) < InVT.getScalarSizeInBits()) {
// We need to shift to get the lsb into sign position.
// Shift packed bytes not supported natively, bitcast to word
- MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits()/16);
- In = DAG.getNode(ISD::SHL, DL, ExtVT,
- DAG.getBitcast(ExtVT, In),
+ MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits() / 16);
+ In = DAG.getNode(ISD::SHL, DL, ExtVT, DAG.getBitcast(ExtVT, In),
DAG.getConstant(ShiftInx, DL, ExtVT));
In = DAG.getBitcast(InVT, In);
}
- return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, InVT),
- In, ISD::SETGT);
+ return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, InVT), In, ISD::SETGT);
}
// Use TESTD/Q, extended vector to packed dword/qword.
assert((InVT.is256BitVector() || InVT.is128BitVector()) &&
@@ -21602,7 +21594,8 @@ static SDValue LowerTruncateVecI1(SDValue Op, const SDLoc &DL,
// We either have 8 elements or we're allowed to use 512-bit vectors.
// If we have VLX, we want to use the narrowest vector that can get the
// job done so we use vXi32.
- MVT EltVT = Subtarget.hasVLX() ? MVT::i32 : MVT::getIntegerVT(512/NumElts);
+ MVT EltVT =
+ Subtarget.hasVLX() ? MVT::i32 : MVT::getIntegerVT(512 / NumElts);
MVT ExtVT = MVT::getVectorVT(EltVT, NumElts);
In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In);
InVT = ExtVT;
@@ -21716,10 +21709,9 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
// On AVX2, v8i32 -> v8i16 becomes PSHUFB.
if (Subtarget.hasInt256()) {
// The PSHUFB mask:
- static const int ShufMask1[] = { 0, 1, 4, 5, 8, 9, 12, 13,
- -1, -1, -1, -1, -1, -1, -1, -1,
- 16, 17, 20, 21, 24, 25, 28, 29,
- -1, -1, -1, -1, -1, -1, -1, -1 };
+ static const int ShufMask1[] = {
+ 0, 1, 4, 5, 8, 9, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1,
+ 16, 17, 20, 21, 24, 25, 28, 29, -1, -1, -1, -1, -1, -1, -1, -1};
In = DAG.getBitcast(MVT::v32i8, In);
In = DAG.getVectorShuffle(MVT::v32i8, DL, In, In, ShufMask1);
In = DAG.getBitcast(MVT::v4i64, In);
@@ -21910,8 +21902,8 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
dl, {NVT, MVT::Other}, {Chain, Src});
Chain = Res.getValue(1);
} else {
- Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl,
- NVT, Src);
+ Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, NVT,
+ Src);
}
// TODO: Need to add exception check code for strict FP.
@@ -22013,8 +22005,8 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
SDValue Tmp = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
DAG.getUNDEF(MVT::v2f32));
if (IsStrict) {
- unsigned Opc = IsSigned ? X86ISD::STRICT_CVTTP2SI
- : X86ISD::STRICT_CVTTP2UI;
+ unsigned Opc =
+ IsSigned ? X86ISD::STRICT_CVTTP2SI : X86ISD::STRICT_CVTTP2UI;
return DAG.getNode(Opc, dl, {VT, MVT::Other}, {Op->getOperand(0), Tmp});
}
unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
@@ -22139,7 +22131,7 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
makeLibCall(DAG, LC, VT, Src, CallOptions, dl, Chain);
if (IsStrict)
- return DAG.getMergeValues({ Tmp.first, Tmp.second }, dl);
+ return DAG.getMergeValues({Tmp.first, Tmp.second}, dl);
return Tmp.first;
}
@@ -22202,7 +22194,7 @@ SDValue X86TargetLowering::LRINT_LLRINTHelper(SDNode *N,
assert(DstVT == MVT::i64 && "Invalid LRINT/LLRINT to lower!");
Chain = DAG.getStore(Chain, DL, Src, StackPtr, MPI);
SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
- SDValue Ops[] = { Chain, StackPtr };
+ SDValue Ops[] = {Chain, StackPtr};
Src = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, SrcVT, MPI,
/*Align*/ std::nullopt,
@@ -22210,7 +22202,7 @@ SDValue X86TargetLowering::LRINT_LLRINTHelper(SDNode *N,
Chain = Src.getValue(1);
}
- SDValue StoreOps[] = { Chain, Src, StackPtr };
+ SDValue StoreOps[] = {Chain, Src, StackPtr};
Chain = DAG.getMemIntrinsicNode(X86ISD::FIST, DL, DAG.getVTList(MVT::Other),
StoreOps, DstVT, MPI, /*Align*/ std::nullopt,
MachineMemOperand::MOStore);
@@ -22218,8 +22210,8 @@ SDValue X86TargetLowering::LRINT_LLRINTHelper(SDNode *N,
return DAG.getLoad(DstVT, DL, Chain, StackPtr, MPI);
}
-SDValue
-X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
+ SelectionDAG &DAG) const {
// This is based on the TargetLowering::expandFP_TO_INT_SAT implementation,
// but making use of X86 specifics to produce better instruction sequences.
SDNode *Node = Op.getNode();
@@ -22281,12 +22273,12 @@ X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const {
APFloat MinFloat(Sem);
APFloat MaxFloat(Sem);
- APFloat::opStatus MinStatus = MinFloat.convertFromAPInt(
- MinInt, IsSigned, APFloat::rmTowardZero);
- APFloat::opStatus MaxStatus = MaxFloat.convertFromAPInt(
- MaxInt, IsSigned, APFloat::rmTowardZero);
- bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact)
- && !(MaxStatus & APFloat::opStatus::opInexact);
+ APFloat::opStatus MinStatus =
+ MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
+ APFloat::opStatus MaxStatus =
+ MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
+ bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
+ !(MaxStatus & APFloat::opStatus::opInexact);
SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
@@ -22296,11 +22288,11 @@ X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const {
if (AreExactFloatBounds) {
if (DstVT != TmpVT) {
// Clamp by MinFloat from below. If Src is NaN, propagate NaN.
- SDValue MinClamped = DAG.getNode(
- X86ISD::FMAX, dl, SrcVT, MinFloatNode, Src);
+ SDValue MinClamped =
+ DAG.getNode(X86ISD::FMAX, dl, SrcVT, MinFloatNode, Src);
// Clamp by MaxFloat from above. If Src is NaN, propagate NaN.
- SDValue BothClamped = DAG.getNode(
- X86ISD::FMIN, dl, SrcVT, MaxFloatNode, MinClamped);
+ SDValue BothClamped =
+ DAG.getNode(X86ISD::FMIN, dl, SrcVT, MaxFloatNode, MinClamped);
// Convert clamped value to integer.
SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, TmpVT, BothClamped);
@@ -22310,11 +22302,11 @@ X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const {
}
// Clamp by MinFloat from below. If Src is NaN, the result is MinFloat.
- SDValue MinClamped = DAG.getNode(
- X86ISD::FMAX, dl, SrcVT, Src, MinFloatNode);
+ SDValue MinClamped =
+ DAG.getNode(X86ISD::FMAX, dl, SrcVT, Src, MinFloatNode);
// Clamp by MaxFloat from above. NaN cannot occur.
- SDValue BothClamped = DAG.getNode(
- X86ISD::FMINC, dl, SrcVT, MinClamped, MaxFloatNode);
+ SDValue BothClamped =
+ DAG.getNode(X86ISD::FMINC, dl, SrcVT, MinClamped, MaxFloatNode);
// Convert clamped value to integer.
SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, DstVT, BothClamped);
@@ -22326,8 +22318,8 @@ X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const {
// Otherwise, select zero if Src is NaN.
SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
- return DAG.getSelectCC(
- dl, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
+ return DAG.getSelectCC(dl, Src, Src, ZeroInt, FpToInt,
+ ISD::CondCode::SETUO);
}
SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
@@ -22349,13 +22341,13 @@ X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const {
if (!IsSigned || SatWidth != TmpVT.getScalarSizeInBits()) {
// If Src ULT MinFloat, select MinInt. In particular, this also selects
// MinInt if Src is NaN.
- Select = DAG.getSelectCC(
- dl, Src, MinFloatNode, MinIntNode, Select, ISD::CondCode::SETULT);
+ Select = DAG.getSelectCC(dl, Src, MinFloatNode, MinIntNode, Select,
+ ISD::CondCode::SETULT);
}
// If Src OGT MaxFloat, select MaxInt.
- Select = DAG.getSelectCC(
- dl, Src, MaxFloatNode, MaxIntNode, Select, ISD::CondCode::SETOGT);
+ Select = DAG.getSelectCC(dl, Src, MaxFloatNode, MaxIntNode, Select,
+ ISD::CondCode::SETOGT);
// In the unsigned case we are done, because we mapped NaN to MinInt, which
// is already zero. The promoted case was already handled above.
@@ -22365,8 +22357,7 @@ X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const {
// Otherwise, select 0 if Src is NaN.
SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
- return DAG.getSelectCC(
- dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO);
+ return DAG.getSelectCC(dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO);
}
SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
@@ -22421,15 +22412,15 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
Entry.IsZExt = true;
Args.push_back(Entry);
- SDValue Callee = DAG.getExternalSymbol(
- getLibcallName(RTLIB::FPEXT_F16_F32),
- getPointerTy(DAG.getDataLayout()));
+ SDValue Callee =
+ DAG.getExternalSymbol(getLibcallName(RTLIB::FPEXT_F16_F32),
+ getPointerTy(DAG.getDataLayout()));
CLI.setDebugLoc(DL).setChain(Chain).setLibCallee(
CallingConv::C, EVT(VT).getTypeForEVT(*DAG.getContext()), Callee,
std::move(Args));
SDValue Res;
- std::tie(Res,Chain) = LowerCallTo(CLI);
+ std::tie(Res, Chain) = LowerCallTo(CLI);
if (IsStrict)
Res = DAG.getMergeValues({Res, Chain}, DL);
@@ -22696,14 +22687,14 @@ static SDValue lowerAddSubToHorizontalOp(SDValue Op, const SDLoc &DL,
// TODO: Allow commuted (f)sub by negating the result of (F)HSUB?
unsigned HOpcode;
switch (Op.getOpcode()) {
- // clang-format off
+ // clang-format off
case ISD::ADD: HOpcode = X86ISD::HADD; break;
case ISD::SUB: HOpcode = X86ISD::HSUB; break;
case ISD::FADD: HOpcode = X86ISD::FHADD; break;
case ISD::FSUB: HOpcode = X86ISD::FHSUB; break;
default:
llvm_unreachable("Trying to lower unsupported opcode to horizontal op");
- // clang-format on
+ // clang-format on
}
unsigned LExtIndex = LHS.getConstantOperandVal(1);
unsigned RExtIndex = RHS.getConstantOperandVal(1);
@@ -22761,7 +22752,7 @@ static SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) {
bool Ignored;
APFloat Point5Pred = APFloat(0.5f);
Point5Pred.convert(Sem, APFloat::rmNearestTiesToEven, &Ignored);
- Point5Pred.next(/*nextDown*/true);
+ Point5Pred.next(/*nextDown*/ true);
SDValue Adder = DAG.getNode(ISD::FCOPYSIGN, dl, VT,
DAG.getConstantFP(Point5Pred, dl, VT), N0);
@@ -22811,16 +22802,16 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
unsigned EltBits = VT.getScalarSizeInBits();
// For FABS, mask is 0x7f...; for FNEG, mask is 0x80...
- APInt MaskElt = IsFABS ? APInt::getSignedMaxValue(EltBits) :
- APInt::getSignMask(EltBits);
+ APInt MaskElt =
+ IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignMask(EltBits);
const fltSemantics &Sem = VT.getFltSemantics();
SDValue Mask = DAG.getConstantFP(APFloat(Sem, MaskElt), dl, LogicVT);
SDValue Op0 = Op.getOperand(0);
bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS);
- unsigned LogicOp = IsFABS ? X86ISD::FAND :
- IsFNABS ? X86ISD::FOR :
- X86ISD::FXOR;
+ unsigned LogicOp = IsFABS ? X86ISD::FAND
+ : IsFNABS ? X86ISD::FOR
+ : X86ISD::FXOR;
SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0;
if (VT.isVector() || IsF128)
@@ -22923,7 +22914,8 @@ static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
}
/// Helper for attempting to create a X86ISD::BT node.
-static SDValue getBT(SDValue Src, SDValue BitNo, const SDLoc &DL, SelectionDAG &DAG) {
+static SDValue getBT(SDValue Src, SDValue BitNo, const SDLoc &DL,
+ SelectionDAG &DAG) {
// If Src is i8, promote it to i32 with any_extend. There is no i8 BT
// instruction. Since the shift amount is in-range-or-undefined, we know
// that doing a bittest on the i32 value is ok. We extend to i32 because
@@ -23539,8 +23531,7 @@ static bool hasNonFlagsUse(SDValue Op) {
// the node alone and emit a 'cmp' or 'test' instruction.
static bool isProfitableToUseFlagOp(SDValue Op) {
for (SDNode *U : Op->users())
- if (U->getOpcode() != ISD::CopyToReg &&
- U->getOpcode() != ISD::SETCC &&
+ if (U->getOpcode() != ISD::CopyToReg && U->getOpcode() != ISD::SETCC &&
U->getOpcode() != ISD::STORE)
return false;
@@ -23556,14 +23547,20 @@ static SDValue EmitTest(SDValue Op, X86::CondCode X86CC, const SDLoc &dl,
bool NeedCF = false;
bool NeedOF = false;
switch (X86CC) {
- default: break;
- case X86::COND_A: case X86::COND_AE:
- case X86::COND_B: case X86::COND_BE:
+ default:
+ break;
+ case X86::COND_A:
+ case X86::COND_AE:
+ case X86::COND_B:
+ case X86::COND_BE:
NeedCF = true;
break;
- case X86::COND_G: case X86::COND_GE:
- case X86::COND_L: case X86::COND_LE:
- case X86::COND_O: case X86::COND_NO: {
+ case X86::COND_G:
+ case X86::COND_GE:
+ case X86::COND_L:
+ case X86::COND_LE:
+ case X86::COND_O:
+ case X86::COND_NO: {
// Check if we really need to set the
// Overflow flag. If NoSignedWrap is present
// that is not actually needed.
@@ -23615,14 +23612,14 @@ static SDValue EmitTest(SDValue Op, X86::CondCode X86CC, const SDLoc &dl,
// Otherwise use a regular EFLAGS-setting instruction.
switch (ArithOp.getOpcode()) {
- // clang-format off
+ // clang-format off
default: llvm_unreachable("unexpected operator!");
case ISD::ADD: Opcode = X86ISD::ADD; break;
case ISD::SUB: Opcode = X86ISD::SUB; break;
case ISD::XOR: Opcode = X86ISD::XOR; break;
case ISD::AND: Opcode = X86ISD::AND; break;
case ISD::OR: Opcode = X86ISD::OR; break;
- // clang-format on
+ // clang-format on
}
NumOperands = 2;
@@ -23637,8 +23634,9 @@ static SDValue EmitTest(SDValue Op, X86::CondCode X86CC, const SDLoc &dl,
case ISD::USUBO: {
// /USUBO/SSUBO will become a X86ISD::SUB and we can use its Z flag.
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
- return DAG.getNode(X86ISD::SUB, dl, VTs, Op->getOperand(0),
- Op->getOperand(1)).getValue(1);
+ return DAG
+ .getNode(X86ISD::SUB, dl, VTs, Op->getOperand(0), Op->getOperand(1))
+ .getValue(1);
}
default:
break;
@@ -23667,8 +23665,9 @@ static SDValue EmitCmp(SDValue Op0, SDValue Op1, X86::CondCode X86CC,
EVT CmpVT = Op0.getValueType();
- assert((CmpVT == MVT::i8 || CmpVT == MVT::i16 ||
- CmpVT == MVT::i32 || CmpVT == MVT::i64) && "Unexpected VT!");
+ assert((CmpVT == MVT::i8 || CmpVT == MVT::i16 || CmpVT == MVT::i32 ||
+ CmpVT == MVT::i64) &&
+ "Unexpected VT!");
// Only promote the compare up to I32 if it is a 16 bit operation
// with an immediate. 16 bit immediates are to be avoided unless the target
@@ -23795,9 +23794,8 @@ bool X86TargetLowering::isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const {
/// The minimum architected relative accuracy is 2^-12. We need one
/// Newton-Raphson step to have a good float result (24 bits of precision).
-SDValue X86TargetLowering::getSqrtEstimate(SDValue Op,
- SelectionDAG &DAG, int Enabled,
- int &RefinementSteps,
+SDValue X86TargetLowering::getSqrtEstimate(SDValue Op, SelectionDAG &DAG,
+ int Enabled, int &RefinementSteps,
bool &UseOneConstNR,
bool Reciprocal) const {
SDLoc DL(Op);
@@ -23904,9 +23902,7 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op, SelectionDAG &DAG,
/// This is because we still need one division to calculate the reciprocal and
/// then we need two multiplies by that reciprocal as replacements for the
/// original divisions.
-unsigned X86TargetLowering::combineRepeatedFPDivisors() const {
- return 2;
-}
+unsigned X86TargetLowering::combineRepeatedFPDivisors() const { return 2; }
SDValue
X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
@@ -23914,7 +23910,7 @@ X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SmallVectorImpl<SDNode *> &Created) const {
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
if (isIntDivCheap(N->getValueType(0), Attr))
- return SDValue(N,0); // Lower SDIV as SDIV
+ return SDValue(N, 0); // Lower SDIV as SDIV
assert((Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()) &&
"Unexpected divisor!");
@@ -23983,8 +23979,8 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, const SDLoc &dl,
if ((!isUInt<32>(AndRHSVal) || (OptForSize && !isUInt<8>(AndRHSVal))) &&
isPowerOf2_64(AndRHSVal)) {
Src = AndLHS;
- BitNo = DAG.getConstant(Log2_64_Ceil(AndRHSVal), dl,
- Src.getValueType());
+ BitNo =
+ DAG.getConstant(Log2_64_Ceil(AndRHSVal), dl, Src.getValueType());
}
}
}
@@ -24030,7 +24026,7 @@ static unsigned translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0,
// 6 - NLE
// 7 - ORD
switch (SetCCOpcode) {
- // clang-format off
+ // clang-format off
default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETOEQ:
case ISD::SETEQ: SSECC = 0; break;
@@ -24052,7 +24048,7 @@ static unsigned translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0,
case ISD::SETO: SSECC = 7; break;
case ISD::SETUEQ: SSECC = 8; break;
case ISD::SETONE: SSECC = 12; break;
- // clang-format on
+ // clang-format on
}
if (Swap)
std::swap(Op0, Op1);
@@ -24337,13 +24333,13 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
Cmp1 = DAG.getNode(
Opc, dl, {VT, MVT::Other},
{Chain, Op0, Op1, DAG.getTargetConstant(CC1, dl, MVT::i8)});
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Cmp0.getValue(1),
- Cmp1.getValue(1));
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Cmp0.getValue(1), Cmp1.getValue(1));
} else {
- Cmp0 = DAG.getNode(
- Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(CC0, dl, MVT::i8));
- Cmp1 = DAG.getNode(
- Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(CC1, dl, MVT::i8));
+ Cmp0 = DAG.getNode(Opc, dl, VT, Op0, Op1,
+ DAG.getTargetConstant(CC0, dl, MVT::i8));
+ Cmp1 = DAG.getNode(Opc, dl, VT, Op0, Op1,
+ DAG.getTargetConstant(CC1, dl, MVT::i8));
}
Cmp = DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1);
} else {
@@ -24353,8 +24349,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
{Chain, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8)});
Chain = Cmp.getValue(1);
} else
- Cmp = DAG.getNode(
- Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8));
+ Cmp = DAG.getNode(Opc, dl, VT, Op0, Op1,
+ DAG.getTargetConstant(SSECC, dl, MVT::i8));
}
} else {
// Handle all other FP comparisons here.
@@ -24366,8 +24362,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
{Chain, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8)});
Chain = Cmp.getValue(1);
} else
- Cmp = DAG.getNode(
- Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8));
+ Cmp = DAG.getNode(Opc, dl, VT, Op0, Op1,
+ DAG.getTargetConstant(SSECC, dl, MVT::i8));
}
if (VT.getFixedSizeInBits() >
@@ -24418,7 +24414,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
// Translate compare code to XOP PCOM compare mode.
unsigned CmpMode = 0;
switch (Cond) {
- // clang-format off
+ // clang-format off
default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETULT:
case ISD::SETLT: CmpMode = 0x00; break;
@@ -24430,7 +24426,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
case ISD::SETGE: CmpMode = 0x03; break;
case ISD::SETEQ: CmpMode = 0x04; break;
case ISD::SETNE: CmpMode = 0x05; break;
- // clang-format on
+ // clang-format on
}
// Are we comparing unsigned or signed integers?
@@ -24528,13 +24524,13 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
bool Invert = false;
unsigned Opc;
switch (Cond) {
- // clang-format off
+ // clang-format off
default: llvm_unreachable("Unexpected condition code");
case ISD::SETUGT: Invert = true; [[fallthrough]];
case ISD::SETULE: Opc = ISD::UMIN; break;
case ISD::SETULT: Invert = true; [[fallthrough]];
case ISD::SETUGE: Opc = ISD::UMAX; break;
- // clang-format on
+ // clang-format on
}
SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
@@ -24558,10 +24554,10 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
// operations may be required for some comparisons.
unsigned Opc = (Cond == ISD::SETEQ || Cond == ISD::SETNE) ? X86ISD::PCMPEQ
: X86ISD::PCMPGT;
- bool Swap = Cond == ISD::SETLT || Cond == ISD::SETULT ||
- Cond == ISD::SETGE || Cond == ISD::SETUGE;
- bool Invert = Cond == ISD::SETNE ||
- (Cond != ISD::SETEQ && ISD::isTrueWhenEqual(Cond));
+ bool Swap = Cond == ISD::SETLT || Cond == ISD::SETULT || Cond == ISD::SETGE ||
+ Cond == ISD::SETUGE;
+ bool Invert =
+ Cond == ISD::SETNE || (Cond != ISD::SETEQ && ISD::isTrueWhenEqual(Cond));
if (Swap)
std::swap(Op0, Op1);
@@ -24579,7 +24575,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
Op1 = DAG.getBitcast(MVT::v4i32, Op1);
SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1);
- static const int MaskHi[] = { 1, 1, 3, 3 };
+ static const int MaskHi[] = {1, 1, 3, 3};
SDValue Result = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi);
return DAG.getBitcast(VT, Result);
@@ -24590,7 +24586,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
Op1 = DAG.getAllOnesConstant(dl, MVT::v4i32);
SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1);
- static const int MaskHi[] = { 1, 1, 3, 3 };
+ static const int MaskHi[] = {1, 1, 3, 3};
SDValue Result = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi);
return DAG.getBitcast(VT, Result);
@@ -24629,8 +24625,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
SDValue EQ = DAG.getNode(X86ISD::PCMPEQ, dl, MVT::v4i32, Op0, Op1);
// Create masks for only the low parts/high parts of the 64 bit integers.
- static const int MaskHi[] = { 1, 1, 3, 3 };
- static const int MaskLo[] = { 0, 0, 2, 2 };
+ static const int MaskHi[] = {1, 1, 3, 3};
+ static const int MaskLo[] = {0, 0, 2, 2};
SDValue EQHi = DAG.getVectorShuffle(MVT::v4i32, dl, EQ, EQ, MaskHi);
SDValue GTLo = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskLo);
SDValue GTHi = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi);
@@ -24657,7 +24653,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1);
// Make sure the lower and upper halves are both all-ones.
- static const int Mask[] = { 1, 0, 3, 2 };
+ static const int Mask[] = {1, 0, 3, 2};
SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask);
Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, Result, Shuf);
@@ -24672,8 +24668,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
// bits of the inputs before performing those operations.
if (FlipSigns) {
MVT EltVT = VT.getVectorElementType();
- SDValue SM = DAG.getConstant(APInt::getSignMask(EltVT.getSizeInBits()), dl,
- VT);
+ SDValue SM =
+ DAG.getConstant(APInt::getSignMask(EltVT.getSizeInBits()), dl, VT);
Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SM);
Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SM);
}
@@ -24690,8 +24686,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
// Try to select this as a KORTEST+SETCC or KTEST+SETCC if possible.
static SDValue EmitAVX512Test(SDValue Op0, SDValue Op1, ISD::CondCode CC,
const SDLoc &dl, SelectionDAG &DAG,
- const X86Subtarget &Subtarget,
- SDValue &X86CC) {
+ const X86Subtarget &Subtarget, SDValue &X86CC) {
assert((CC == ISD::SETEQ || CC == ISD::SETNE) && "Unsupported ISD::CondCode");
// Must be a bitcast from vXi1.
@@ -24838,7 +24833,8 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
Op.getOpcode() == ISD::STRICT_FSETCCS;
MVT VT = Op->getSimpleValueType(0);
- if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG);
+ if (VT.isVector())
+ return LowerVSETCC(Op, Subtarget, DAG);
assert(VT == MVT::i8 && "SetCC type must be 8-bit integer");
SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
@@ -24933,7 +24929,8 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res;
}
-SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op,
+ SelectionDAG &DAG) const {
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
SDValue Carry = Op.getOperand(2);
@@ -24945,8 +24942,8 @@ SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const
// Recreate the carry if needed.
EVT CarryVT = Carry.getValueType();
- Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32),
- Carry, DAG.getAllOnesConstant(DL, CarryVT));
+ Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32), Carry,
+ DAG.getAllOnesConstant(DL, CarryVT));
SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
SDValue Cmp = DAG.getNode(X86ISD::SBB, DL, VTs, LHS, RHS, Carry.getValue(1));
@@ -24966,7 +24963,8 @@ getX86XALUOOp(X86::CondCode &Cond, SDValue Op, SelectionDAG &DAG) {
unsigned BaseOp = 0;
SDLoc DL(Op);
switch (Op.getOpcode()) {
- default: llvm_unreachable("Unknown ovf instruction!");
+ default:
+ llvm_unreachable("Unknown ovf instruction!");
case ISD::SADDO:
BaseOp = X86ISD::ADD;
Cond = X86::COND_O;
@@ -25040,7 +25038,8 @@ static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) {
SDValue VOp0 = V.getOperand(0);
unsigned InBits = VOp0.getValueSizeInBits();
unsigned Bits = V.getValueSizeInBits();
- return DAG.MaskedValueIsZero(VOp0, APInt::getHighBitsSet(InBits,InBits-Bits));
+ return DAG.MaskedValueIsZero(VOp0,
+ APInt::getHighBitsSet(InBits, InBits - Bits));
}
// Lower various (select (icmp CmpVal, 0), LHS, RHS) custom patterns.
@@ -25178,7 +25177,7 @@ static SDValue LowerSELECTWithCmpZero(SDValue CmpVal, SDValue LHS, SDValue RHS,
SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
bool AddTest = true;
- SDValue Cond = Op.getOperand(0);
+ SDValue Cond = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue Op2 = Op.getOperand(2);
SDLoc DL(Op);
@@ -25329,14 +25328,13 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// If condition flag is set by a X86ISD::CMP, then use it as the condition
// setting operand in place of the X86ISD::SETCC.
unsigned CondOpcode = Cond.getOpcode();
- if (CondOpcode == X86ISD::SETCC ||
- CondOpcode == X86ISD::SETCC_CARRY) {
+ if (CondOpcode == X86ISD::SETCC || CondOpcode == X86ISD::SETCC_CARRY) {
CC = Cond.getOperand(0);
SDValue Cmp = Cond.getOperand(1);
bool IllegalFPCMov = false;
- if (VT.isFloatingPoint() && !VT.isVector() &&
- !isScalarFPTypeInSSEReg(VT) && Subtarget.canUseCMOV()) // FPStack?
+ if (VT.isFloatingPoint() && !VT.isVector() && !isScalarFPTypeInSSEReg(VT) &&
+ Subtarget.canUseCMOV()) // FPStack?
IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue());
if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) ||
@@ -25399,14 +25397,15 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// X86 doesn't have an i8 cmov. If both operands are the result of a truncate
// widen the cmov and push the truncate through. This avoids introducing a new
// branch during isel and doesn't add any extensions.
- if (Op.getValueType() == MVT::i8 &&
- Op1.getOpcode() == ISD::TRUNCATE && Op2.getOpcode() == ISD::TRUNCATE) {
+ if (Op.getValueType() == MVT::i8 && Op1.getOpcode() == ISD::TRUNCATE &&
+ Op2.getOpcode() == ISD::TRUNCATE) {
SDValue T1 = Op1.getOperand(0), T2 = Op2.getOperand(0);
if (T1.getValueType() == T2.getValueType() &&
// Exclude CopyFromReg to avoid partial register stalls.
- T1.getOpcode() != ISD::CopyFromReg && T2.getOpcode()!=ISD::CopyFromReg){
- SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, T1.getValueType(), T2, T1,
- CC, Cond);
+ T1.getOpcode() != ISD::CopyFromReg &&
+ T2.getOpcode() != ISD::CopyFromReg) {
+ SDValue Cmov =
+ DAG.getNode(X86ISD::CMOV, DL, T1.getValueType(), T2, T1, CC, Cond);
return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov);
}
}
@@ -25422,14 +25421,14 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
!X86::mayFoldLoad(Op2, Subtarget))) {
Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
- SDValue Ops[] = { Op2, Op1, CC, Cond };
+ SDValue Ops[] = {Op2, Op1, CC, Cond};
SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, MVT::i32, Ops);
return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov);
}
// X86ISD::CMOV means set the result (which is operand 1) to the RHS if
// condition is true.
- SDValue Ops[] = { Op2, Op1, CC, Cond };
+ SDValue Ops[] = {Op2, Op1, CC, Cond};
return DAG.getNode(X86ISD::CMOV, DL, Op.getValueType(), Ops, Op->getFlags());
}
@@ -25539,9 +25538,9 @@ static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op,
InVT = In.getSimpleValueType();
}
- // SSE41 targets can use the pmov[sz]x* instructions directly for 128-bit results,
- // so are legal and shouldn't occur here. AVX2/AVX512 pmovsx* instructions still
- // need to be handled here for 256/512-bit results.
+ // SSE41 targets can use the pmov[sz]x* instructions directly for 128-bit
+ // results, so are legal and shouldn't occur here. AVX2/AVX512 pmovsx*
+ // instructions still need to be handled here for 256/512-bit results.
if (Subtarget.hasInt256()) {
assert(VT.getSizeInBits() > 128 && "Unexpected 128-bit vector extension");
@@ -25550,9 +25549,8 @@ static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op,
// FIXME: Apparently we create inreg operations that could be regular
// extends.
- unsigned ExtOpc =
- Opc == ISD::SIGN_EXTEND_VECTOR_INREG ? ISD::SIGN_EXTEND
- : ISD::ZERO_EXTEND;
+ unsigned ExtOpc = Opc == ISD::SIGN_EXTEND_VECTOR_INREG ? ISD::SIGN_EXTEND
+ : ISD::ZERO_EXTEND;
return DAG.getNode(ExtOpc, dl, VT, In);
}
@@ -25670,9 +25668,9 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
SDValue OpLo = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, HalfVT, In);
unsigned NumElems = InVT.getVectorNumElements();
- SmallVector<int,8> ShufMask(NumElems, -1);
- for (unsigned i = 0; i != NumElems/2; ++i)
- ShufMask[i] = i + NumElems/2;
+ SmallVector<int, 8> ShufMask(NumElems, -1);
+ for (unsigned i = 0; i != NumElems / 2; ++i)
+ ShufMask[i] = i + NumElems / 2;
SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, In, ShufMask);
OpHi = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, HalfVT, OpHi);
@@ -25834,11 +25832,10 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
// TODO: It is possible to support ZExt by zeroing the undef values during
// the shuffle phase or after the shuffle.
static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG) {
MVT RegVT = Op.getSimpleValueType();
assert(RegVT.isVector() && "We only custom lower vector loads.");
- assert(RegVT.isInteger() &&
- "We only custom lower integer vector loads.");
+ assert(RegVT.isInteger() && "We only custom lower integer vector loads.");
LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
SDLoc dl(Ld);
@@ -25881,8 +25878,8 @@ static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc) {
SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
- SDValue Cond = Op.getOperand(1);
- SDValue Dest = Op.getOperand(2);
+ SDValue Cond = Op.getOperand(1);
+ SDValue Dest = Op.getOperand(2);
SDLoc dl(Op);
// Bail out when we don't have native compare instructions.
@@ -25932,7 +25929,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
if (User->getOpcode() == ISD::BR) {
SDValue FalseBB = User->getOperand(1);
SDNode *NewBR =
- DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
+ DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
assert(NewBR == User);
(void)NewBR;
Dest = FalseBB;
@@ -26003,9 +26000,8 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
// bytes in one go. Touching the stack at 4K increments is necessary to ensure
// that the guard pages used by the OS virtual memory manager are allocated in
// correct sequence.
-SDValue
-X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
bool SplitStack = MF.shouldSplitStack();
bool EmitStackProbeCall = hasStackProbeSymbol(MF);
@@ -26016,7 +26012,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
// Get the inputs.
SDNode *Node = Op.getNode();
SDValue Chain = Op.getOperand(0);
- SDValue Size = Op.getOperand(1);
+ SDValue Size = Op.getOperand(1);
MaybeAlign Alignment(Op.getConstantOperandVal(2));
EVT VT = Node->getValueType(0);
@@ -26139,8 +26135,9 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MemOps.push_back(Store);
// Store ptr to reg_save_area.
- FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(
- Subtarget.isTarget64BitLP64() ? 8 : 4, DL));
+ FIN = DAG.getNode(
+ ISD::ADD, DL, PtrVT, FIN,
+ DAG.getIntPtrConstant(Subtarget.isTarget64BitLP64() ? 8 : 4, DL));
SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT);
Store = DAG.getStore(
Op.getOperand(0), DL, RSFIN, FIN,
@@ -26150,8 +26147,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
}
SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
- assert(Subtarget.is64Bit() &&
- "LowerVAARG only handles 64-bit va_arg!");
+ assert(Subtarget.is64Bit() && "LowerVAARG only handles 64-bit va_arg!");
assert(Op.getNumOperands() == 4);
MachineFunction &MF = DAG.getMachineFunction();
@@ -26175,11 +26171,11 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
// selection mechanism works only for the basic types.
assert(ArgVT != MVT::f80 && "va_arg for f80 not yet implemented");
if (ArgVT.isFloatingPoint() && ArgSize <= 16 /*bytes*/) {
- ArgMode = 2; // Argument passed in XMM register. Use fp_offset.
+ ArgMode = 2; // Argument passed in XMM register. Use fp_offset.
} else {
assert(ArgVT.isInteger() && ArgSize <= 32 /*bytes*/ &&
"Unhandled argument type in LowerVAARG");
- ArgMode = 1; // Argument passed in GPR64 register(s). Use gp_offset.
+ ArgMode = 1; // Argument passed in GPR64 register(s). Use gp_offset.
}
if (ArgMode == 2) {
@@ -26213,7 +26209,7 @@ static SDValue LowerVACOPY(SDValue Op, const X86Subtarget &Subtarget,
// where a va_list is still an i8*.
assert(Subtarget.is64Bit() && "This code only handles 64-bit va_copy!");
if (Subtarget.isCallingConvWin64(
- DAG.getMachineFunction().getFunction().getCallingConv()))
+ DAG.getMachineFunction().getFunction().getCallingConv()))
// Probably a Win64 va_copy.
return DAG.expandVACopy(Op.getNode());
@@ -26275,15 +26271,17 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT,
return DAG.getConstant(0, dl, VT);
}
- assert((Opc == X86ISD::VSHLI || Opc == X86ISD::VSRLI || Opc == X86ISD::VSRAI)
- && "Unknown target vector shift-by-constant node");
+ assert(
+ (Opc == X86ISD::VSHLI || Opc == X86ISD::VSRLI || Opc == X86ISD::VSRAI) &&
+ "Unknown target vector shift-by-constant node");
// Fold this packed vector shift into a build vector if SrcOp is a
// vector of Constants or UNDEFs.
if (ISD::isBuildVectorOfConstantSDNodes(SrcOp.getNode())) {
unsigned ShiftOpc;
switch (Opc) {
- default: llvm_unreachable("Unknown opcode!");
+ default:
+ llvm_unreachable("Unknown opcode!");
case X86ISD::VSHLI:
ShiftOpc = ISD::SHL;
break;
@@ -26423,8 +26421,8 @@ static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
Hi = DAG.getBitcast(MVT::v32i1, Hi);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lo, Hi);
} else {
- MVT BitcastVT = MVT::getVectorVT(MVT::i1,
- Mask.getSimpleValueType().getSizeInBits());
+ MVT BitcastVT =
+ MVT::getVectorVT(MVT::i1, Mask.getSimpleValueType().getSizeInBits());
// In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
// are extracted by EXTRACT_SUBVECTOR.
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
@@ -26505,9 +26503,12 @@ static int getSEHRegistrationNodeSize(const Function *Fn) {
// The RegNodeSize is 6 32-bit words for SEH and 4 for C++ EH. See
// WinEHStatePass for the full struct definition.
switch (classifyEHPersonality(Fn->getPersonalityFn())) {
- case EHPersonality::MSVC_X86SEH: return 24;
- case EHPersonality::MSVC_CXX: return 16;
- default: break;
+ case EHPersonality::MSVC_X86SEH:
+ return 24;
+ case EHPersonality::MSVC_CXX:
+ return 16;
+ default:
+ break;
}
report_fatal_error(
"can only recover FP for 32-bit MSVC EH personality functions");
@@ -26597,13 +26598,13 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDLoc dl(Op);
unsigned IntNo = Op.getConstantOperandVal(0);
MVT VT = Op.getSimpleValueType();
- const IntrinsicData* IntrData = getIntrinsicWithoutChain(IntNo);
+ const IntrinsicData *IntrData = getIntrinsicWithoutChain(IntNo);
// Propagate flags from original node to transformed node(s).
SelectionDAG::FlagInserter FlagsInserter(DAG, Op->getFlags());
if (IntrData) {
- switch(IntrData->Type) {
+ switch (IntrData->Type) {
case INTR_TYPE_1OP: {
// We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode,
@@ -26729,9 +26730,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
if (!isRoundModeCurDirection(Rnd))
return SDValue();
}
- return getVectorMaskingNode(
- DAG.getNode(IntrData->Opc0, dl, VT, Src), Mask, PassThru,
- Subtarget, DAG);
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src),
+ Mask, PassThru, Subtarget, DAG);
}
case INTR_TYPE_1OP_MASK_SAE: {
SDValue Src = Op.getOperand(1);
@@ -26772,9 +26772,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
if (!isRoundModeCurDirection(Rnd))
return SDValue();
}
- return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1,
- Src2),
- Mask, passThru, Subtarget, DAG);
+ return getScalarMaskingNode(
+ DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2), Mask, passThru,
+ Subtarget, DAG);
}
assert(Op.getNumOperands() == (6U + HasRounding) &&
@@ -26788,9 +26788,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
else if (!isRoundModeCurDirection(Sae))
return SDValue();
}
- return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1,
- Src2, RoundingMode),
- Mask, passThru, Subtarget, DAG);
+ return getScalarMaskingNode(
+ DAG.getNode(Opc, dl, VT, Src1, Src2, RoundingMode), Mask, passThru,
+ Subtarget, DAG);
}
case INTR_TYPE_SCALAR_MASK_RND: {
SDValue Src1 = Op.getOperand(1);
@@ -26825,8 +26825,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
else
return SDValue();
- return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2),
- Mask, passThru, Subtarget, DAG);
+ return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2), Mask,
+ passThru, Subtarget, DAG);
}
case INTR_TYPE_2OP_MASK: {
SDValue Src1 = Op.getOperand(1);
@@ -26862,8 +26862,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return SDValue();
}
- return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2),
- Mask, PassThru, Subtarget, DAG);
+ return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2), Mask,
+ PassThru, Subtarget, DAG);
}
case INTR_TYPE_3OP_SCALAR_MASK_SAE: {
SDValue Src1 = Op.getOperand(1);
@@ -26912,12 +26912,12 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// Reverse the operands to match VSELECT order.
return DAG.getNode(IntrData->Opc0, dl, VT, Src3, Src2, Src1);
}
- case VPERM_2OP : {
+ case VPERM_2OP: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
// Swap Src1 and Src2 in the node creation
- return DAG.getNode(IntrData->Opc0, dl, VT,Src2, Src1);
+ return DAG.getNode(IntrData->Opc0, dl, VT, Src2, Src1);
}
case CFMA_OP_MASKZ:
case CFMA_OP_MASK: {
@@ -26961,8 +26961,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDValue Imm = Op.getOperand(2);
SDValue Mask = Op.getOperand(3);
SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Imm);
- SDValue FPclassMask = getScalarMaskingNode(FPclass, Mask, SDValue(),
- Subtarget, DAG);
+ SDValue FPclassMask =
+ getScalarMaskingNode(FPclass, Mask, SDValue(), Subtarget, DAG);
// Need to fill with zeros to ensure the bitcast will produce zeroes
// for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that.
SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8i1,
@@ -26986,7 +26986,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
if (!isRoundModeCurDirection(Sae))
return SDValue();
}
- //default rounding mode
+ // default rounding mode
return DAG.getNode(IntrData->Opc0, dl, MaskVT,
{Op.getOperand(1), Op.getOperand(2), CC, Mask});
}
@@ -27004,12 +27004,12 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
else if (!isRoundModeCurDirection(Sae))
return SDValue();
}
- //default rounding mode
+ // default rounding mode
if (!Cmp.getNode())
Cmp = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Src2, CC);
- SDValue CmpMask = getScalarMaskingNode(Cmp, Mask, SDValue(),
- Subtarget, DAG);
+ SDValue CmpMask =
+ getScalarMaskingNode(Cmp, Mask, SDValue(), Subtarget, DAG);
// Need to fill with zeros to ensure the bitcast will produce zeroes
// for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that.
SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8i1,
@@ -27177,8 +27177,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
assert(IntrData->Opc0 == X86ISD::BEXTRI && "Unexpected opcode");
uint64_t Imm = Op.getConstantOperandVal(2);
- SDValue Control = DAG.getTargetConstant(Imm & 0xffff, dl,
- Op.getValueType());
+ SDValue Control =
+ DAG.getTargetConstant(Imm & 0xffff, dl, Op.getValueType());
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
Op.getOperand(1), Control);
}
@@ -27200,7 +27200,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(3), GenCF.getValue(1));
}
SDValue SetCC = getSETCC(X86::COND_B, Res.getValue(1), dl, DAG);
- SDValue Results[] = { SetCC, Res };
+ SDValue Results[] = {SetCC, Res};
return DAG.getMergeValues(Results, dl);
}
case CVTPD2PS_MASK:
@@ -27283,7 +27283,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
switch (IntNo) {
- default: return SDValue(); // Don't custom lower most intrinsics.
+ default:
+ return SDValue(); // Don't custom lower most intrinsics.
// ptest and testp intrinsics. The intrinsic these come from are designed to
// return an integer value, not just an instruction so lower it to the ptest
@@ -27317,7 +27318,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
unsigned TestOpc = X86ISD::PTEST;
X86::CondCode X86CC;
switch (IntNo) {
- default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
+ default:
+ llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
case Intrinsic::x86_avx512_ktestc_b:
case Intrinsic::x86_avx512_ktestc_w:
case Intrinsic::x86_avx512_ktestc_d:
@@ -27388,7 +27390,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
unsigned Opcode;
X86::CondCode X86CC;
switch (IntNo) {
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ default:
+ llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_sse42_pcmpistria128:
Opcode = X86ISD::PCMPISTR;
X86CC = X86::COND_A;
@@ -27558,7 +27561,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
unsigned NewIntrinsic;
switch (IntNo) {
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ default:
+ llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_mmx_pslli_w:
NewIntrinsic = Intrinsic::x86_mmx_psll_w;
break;
@@ -27635,16 +27639,16 @@ static SDValue getAVX2GatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op);
- SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale };
+ SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale};
SDValue Res =
DAG.getMemIntrinsicNode(X86ISD::MGATHER, dl, VTs, Ops,
MemIntr->getMemoryVT(), MemIntr->getMemOperand());
return DAG.getMergeValues({Res, Res.getValue(1)}, dl);
}
-static SDValue getGatherNode(SDValue Op, SelectionDAG &DAG,
- SDValue Src, SDValue Mask, SDValue Base,
- SDValue Index, SDValue ScaleOp, SDValue Chain,
+static SDValue getGatherNode(SDValue Op, SelectionDAG &DAG, SDValue Src,
+ SDValue Mask, SDValue Base, SDValue Index,
+ SDValue ScaleOp, SDValue Chain,
const X86Subtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
@@ -27673,7 +27677,7 @@ static SDValue getGatherNode(SDValue Op, SelectionDAG &DAG,
MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op);
- SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale };
+ SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale};
SDValue Res =
DAG.getMemIntrinsicNode(X86ISD::MGATHER, dl, VTs, Ops,
MemIntr->getMemoryVT(), MemIntr->getMemOperand());
@@ -27681,9 +27685,9 @@ static SDValue getGatherNode(SDValue Op, SelectionDAG &DAG,
}
static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
- SDValue Src, SDValue Mask, SDValue Base,
- SDValue Index, SDValue ScaleOp, SDValue Chain,
- const X86Subtarget &Subtarget) {
+ SDValue Src, SDValue Mask, SDValue Base,
+ SDValue Index, SDValue ScaleOp, SDValue Chain,
+ const X86Subtarget &Subtarget) {
SDLoc dl(Op);
auto *C = dyn_cast<ConstantSDNode>(ScaleOp);
// Scale must be constant.
@@ -27725,8 +27729,8 @@ static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
TLI.getPointerTy(DAG.getDataLayout()));
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
- MVT MaskVT =
- MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements());
+ MVT MaskVT = MVT::getVectorVT(
+ MVT::i1, Index.getSimpleValueType().getVectorNumElements());
SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
SDValue Ops[] = {VMask, Base, Scale, Index, Disp, Segment, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, MVT::Other, Ops);
@@ -27742,11 +27746,11 @@ static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
/// expanded intrinsics implicitly defines extra registers (i.e. not just
/// EDX:EAX).
static SDValue expandIntrinsicWChainHelper(SDNode *N, const SDLoc &DL,
- SelectionDAG &DAG,
- unsigned TargetOpcode,
- unsigned SrcReg,
- const X86Subtarget &Subtarget,
- SmallVectorImpl<SDValue> &Results) {
+ SelectionDAG &DAG,
+ unsigned TargetOpcode,
+ unsigned SrcReg,
+ const X86Subtarget &Subtarget,
+ SmallVectorImpl<SDValue> &Results) {
SDValue Chain = N->getOperand(0);
SDValue Glue;
@@ -27786,7 +27790,7 @@ static SDValue expandIntrinsicWChainHelper(SDNode *N, const SDLoc &DL,
}
// Use a buildpair to merge the two 32-bit values into a 64-bit one.
- SDValue Ops[] = { LO, HI };
+ SDValue Ops[] = {LO, HI};
SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops);
Results.push_back(Pair);
Results.push_back(Chain);
@@ -27803,9 +27807,9 @@ static void getReadTimeStampCounter(SDNode *N, const SDLoc &DL, unsigned Opcode,
// The processor's time-stamp counter (a 64-bit MSR) is stored into the
// EDX:EAX registers. EDX is loaded with the high-order 32 bits of the MSR
// and the EAX register is loaded with the low-order 32 bits.
- SDValue Glue = expandIntrinsicWChainHelper(N, DL, DAG, Opcode,
- /* NoRegister */0, Subtarget,
- Results);
+ SDValue Glue =
+ expandIntrinsicWChainHelper(N, DL, DAG, Opcode,
+ /* NoRegister */ 0, Subtarget, Results);
if (Opcode != X86::RDTSCP)
return;
@@ -27863,24 +27867,24 @@ static SDValue MarkEHGuard(SDValue Op, SelectionDAG &DAG) {
}
/// Emit Truncating Store with signed or unsigned saturation.
-static SDValue
-EmitTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &DL, SDValue Val,
- SDValue Ptr, EVT MemVT, MachineMemOperand *MMO,
- SelectionDAG &DAG) {
+static SDValue EmitTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &DL,
+ SDValue Val, SDValue Ptr, EVT MemVT,
+ MachineMemOperand *MMO, SelectionDAG &DAG) {
SDVTList VTs = DAG.getVTList(MVT::Other);
SDValue Undef = DAG.getUNDEF(Ptr.getValueType());
- SDValue Ops[] = { Chain, Val, Ptr, Undef };
+ SDValue Ops[] = {Chain, Val, Ptr, Undef};
unsigned Opc = SignedSat ? X86ISD::VTRUNCSTORES : X86ISD::VTRUNCSTOREUS;
return DAG.getMemIntrinsicNode(Opc, DL, VTs, Ops, MemVT, MMO);
}
/// Emit Masked Truncating Store with signed or unsigned saturation.
static SDValue EmitMaskedTruncSStore(bool SignedSat, SDValue Chain,
- const SDLoc &DL,
- SDValue Val, SDValue Ptr, SDValue Mask, EVT MemVT,
- MachineMemOperand *MMO, SelectionDAG &DAG) {
+ const SDLoc &DL, SDValue Val, SDValue Ptr,
+ SDValue Mask, EVT MemVT,
+ MachineMemOperand *MMO,
+ SelectionDAG &DAG) {
SDVTList VTs = DAG.getVTList(MVT::Other);
- SDValue Ops[] = { Chain, Val, Ptr, Mask };
+ SDValue Ops[] = {Chain, Val, Ptr, Mask};
unsigned Opc = SignedSat ? X86ISD::VMTRUNCSTORES : X86ISD::VMTRUNCSTOREUS;
return DAG.getMemIntrinsicNode(Opc, DL, VTs, Ops, MemVT, MMO);
}
@@ -27948,9 +27952,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
SDLoc dl(Op);
// Create a WRPKRU node, pass the input to the EAX parameter, and pass 0
// to the EDX and ECX parameters.
- return DAG.getNode(X86ISD::WRPKRU, dl, MVT::Other,
- Op.getOperand(0), Op.getOperand(2),
- DAG.getConstant(0, dl, MVT::i32),
+ return DAG.getNode(X86ISD::WRPKRU, dl, MVT::Other, Op.getOperand(0),
+ Op.getOperand(2), DAG.getConstant(0, dl, MVT::i32),
DAG.getConstant(0, dl, MVT::i32));
}
case llvm::Intrinsic::asan_check_memaccess: {
@@ -27981,7 +27984,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
unsigned Opcode;
switch (IntNo) {
- default: llvm_unreachable("Impossible intrinsic");
+ default:
+ llvm_unreachable("Impossible intrinsic");
case Intrinsic::x86_umwait:
Opcode = X86ISD::UMWAIT;
break;
@@ -27994,9 +27998,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
break;
}
- SDValue Operation =
- DAG.getNode(Opcode, dl, VTs, Chain, Op->getOperand(2),
- Op->getOperand(3), Op->getOperand(4));
+ SDValue Operation = DAG.getNode(Opcode, dl, VTs, Chain, Op->getOperand(2),
+ Op->getOperand(3), Op->getOperand(4));
SDValue SetCC = getSETCC(X86::COND_B, Operation.getValue(0), dl, DAG);
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC,
Operation.getValue(1));
@@ -28008,7 +28011,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
unsigned Opcode;
switch (IntNo) {
- default: llvm_unreachable("Impossible intrinsic!");
+ default:
+ llvm_unreachable("Impossible intrinsic!");
case Intrinsic::x86_enqcmd:
Opcode = X86ISD::ENQCMD;
break;
@@ -28032,7 +28036,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
unsigned Opcode;
switch (IntNo) {
- default: llvm_unreachable("Impossible intrinsic");
+ default:
+ llvm_unreachable("Impossible intrinsic");
case Intrinsic::x86_aesenc128kl:
Opcode = X86ISD::AESENC128KL;
break;
@@ -28070,7 +28075,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
unsigned Opcode;
switch (IntNo) {
- default: llvm_unreachable("Impossible intrinsic");
+ default:
+ llvm_unreachable("Impossible intrinsic");
case Intrinsic::x86_aesencwide128kl:
Opcode = X86ISD::AESENCWIDE128KL;
break;
@@ -28164,9 +28170,9 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
SDValue Src2 = Op.getOperand(4);
SDValue CC = Op.getOperand(5);
MachineMemOperand *MMO = cast<MemIntrinsicSDNode>(Op)->getMemOperand();
- SDValue Operation = DAG.getMemIntrinsicNode(
- X86ISD::CMPCCXADD, DL, Op->getVTList(), {Chain, Addr, Src1, Src2, CC},
- MVT::i32, MMO);
+ SDValue Operation =
+ DAG.getMemIntrinsicNode(X86ISD::CMPCCXADD, DL, Op->getVTList(),
+ {Chain, Addr, Src1, Src2, CC}, MVT::i32, MMO);
return Operation;
}
case Intrinsic::x86_aadd32:
@@ -28250,8 +28256,9 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
}
SDLoc dl(Op);
- switch(IntrData->Type) {
- default: llvm_unreachable("Unknown Intrinsic Type");
+ switch (IntrData->Type) {
+ default:
+ llvm_unreachable("Unknown Intrinsic Type");
case RDSEED:
case RDRAND: {
// Emit the node with the right value type.
@@ -28272,32 +28279,32 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
}
case GATHER_AVX2: {
SDValue Chain = Op.getOperand(0);
- SDValue Src = Op.getOperand(2);
- SDValue Base = Op.getOperand(3);
+ SDValue Src = Op.getOperand(2);
+ SDValue Base = Op.getOperand(3);
SDValue Index = Op.getOperand(4);
- SDValue Mask = Op.getOperand(5);
+ SDValue Mask = Op.getOperand(5);
SDValue Scale = Op.getOperand(6);
return getAVX2GatherNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index,
Scale, Chain, Subtarget);
}
case GATHER: {
- //gather(v1, mask, index, base, scale);
+ // gather(v1, mask, index, base, scale);
SDValue Chain = Op.getOperand(0);
- SDValue Src = Op.getOperand(2);
- SDValue Base = Op.getOperand(3);
+ SDValue Src = Op.getOperand(2);
+ SDValue Base = Op.getOperand(3);
SDValue Index = Op.getOperand(4);
- SDValue Mask = Op.getOperand(5);
+ SDValue Mask = Op.getOperand(5);
SDValue Scale = Op.getOperand(6);
- return getGatherNode(Op, DAG, Src, Mask, Base, Index, Scale,
- Chain, Subtarget);
+ return getGatherNode(Op, DAG, Src, Mask, Base, Index, Scale, Chain,
+ Subtarget);
}
case SCATTER: {
- //scatter(base, mask, index, v1, scale);
+ // scatter(base, mask, index, v1, scale);
SDValue Chain = Op.getOperand(0);
- SDValue Base = Op.getOperand(2);
- SDValue Mask = Op.getOperand(3);
+ SDValue Base = Op.getOperand(2);
+ SDValue Mask = Op.getOperand(3);
SDValue Index = Op.getOperand(4);
- SDValue Src = Op.getOperand(5);
+ SDValue Src = Op.getOperand(5);
SDValue Scale = Op.getOperand(6);
return getScatterNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index,
Scale, Chain, Subtarget);
@@ -28308,9 +28315,9 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
"Wrong prefetch hint in intrinsic: should be 2 or 3");
unsigned Opcode = (HintVal == 2 ? IntrData->Opc1 : IntrData->Opc0);
SDValue Chain = Op.getOperand(0);
- SDValue Mask = Op.getOperand(2);
+ SDValue Mask = Op.getOperand(2);
SDValue Index = Op.getOperand(3);
- SDValue Base = Op.getOperand(4);
+ SDValue Base = Op.getOperand(4);
SDValue Scale = Op.getOperand(5);
return getPrefetchNode(Opcode, Op, DAG, Mask, Base, Index, Scale, Chain,
Subtarget);
@@ -28345,8 +28352,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
SDValue SetCC = getSETCC(X86::COND_NE, InTrans, dl, DAG);
SDValue Ret = DAG.getNode(ISD::ZERO_EXTEND, dl, Op->getValueType(0), SetCC);
- return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
- Ret, SDValue(InTrans.getNode(), 1));
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Ret,
+ SDValue(InTrans.getNode(), 1));
}
case TRUNCATE_TO_MEM_VI8:
case TRUNCATE_TO_MEM_VI16:
@@ -28359,7 +28366,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
MemIntrinsicSDNode *MemIntr = dyn_cast<MemIntrinsicSDNode>(Op);
assert(MemIntr && "Expected MemIntrinsicSDNode!");
- EVT MemVT = MemIntr->getMemoryVT();
+ EVT MemVT = MemIntr->getMemoryVT();
uint16_t TruncationOp = IntrData->Opc0;
switch (TruncationOp) {
@@ -28454,7 +28461,7 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
Register FrameReg =
RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction());
- SDLoc dl(Op); // FIXME probably not meaningful
+ SDLoc dl(Op); // FIXME probably not meaningful
unsigned Depth = Op.getConstantOperandVal(0);
assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
(FrameReg == X86::EBP && VT == MVT::i32)) &&
@@ -28468,7 +28475,7 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
-Register X86TargetLowering::getRegisterByName(const char* RegName, LLT VT,
+Register X86TargetLowering::getRegisterByName(const char *RegName, LLT VT,
const MachineFunction &MF) const {
const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
@@ -28525,10 +28532,10 @@ bool X86TargetLowering::needsFixedCatchObjects() const {
}
SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
- SDValue Chain = Op.getOperand(0);
- SDValue Offset = Op.getOperand(1);
- SDValue Handler = Op.getOperand(2);
- SDLoc dl (Op);
+ SDValue Chain = Op.getOperand(0);
+ SDValue Offset = Op.getOperand(1);
+ SDValue Handler = Op.getOperand(2);
+ SDLoc dl(Op);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
@@ -28539,9 +28546,9 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT);
Register StoreAddrReg = (PtrVT == MVT::i64) ? X86::RCX : X86::ECX;
- SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Frame,
- DAG.getIntPtrConstant(RegInfo->getSlotSize(),
- dl));
+ SDValue StoreAddr =
+ DAG.getNode(ISD::ADD, dl, PtrVT, Frame,
+ DAG.getIntPtrConstant(RegInfo->getSlotSize(), dl));
StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StoreAddr, Offset);
Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo());
Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);
@@ -28564,19 +28571,20 @@ SDValue X86TargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
(void)TII->getGlobalBaseReg(&DAG.getMachineFunction());
}
return DAG.getNode(X86ISD::EH_SJLJ_SETJMP, DL,
- DAG.getVTList(MVT::i32, MVT::Other),
- Op.getOperand(0), Op.getOperand(1));
+ DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
+ Op.getOperand(1));
}
SDValue X86TargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
- return DAG.getNode(X86ISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
- Op.getOperand(0), Op.getOperand(1));
+ return DAG.getNode(X86ISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(0),
+ Op.getOperand(1));
}
-SDValue X86TargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue
+X86TargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
+ SelectionDAG &DAG) const {
SDLoc DL(Op);
return DAG.getNode(X86ISD::EH_SJLJ_SETUP_DISPATCH, DL, MVT::Other,
Op.getOperand(0));
@@ -28592,7 +28600,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
SDValue Trmp = Op.getOperand(1); // trampoline
SDValue FPtr = Op.getOperand(2); // nested function
SDValue Nest = Op.getOperand(3); // 'nest' parameter value
- SDLoc dl (Op);
+ SDLoc dl(Op);
const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
@@ -28601,7 +28609,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
SDValue OutChains[6];
// Large code-model.
- const unsigned char JMP64r = 0xFF; // 64-bit jmp through register opcode.
+ const unsigned char JMP64r = 0xFF; // 64-bit jmp through register opcode.
const unsigned char MOV64ri = 0xB8; // X86::MOV64ri opcode.
const unsigned char N86R10 = TRI->getEncodingValue(X86::R10) & 0x7;
@@ -28649,7 +28657,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
} else {
const Function *Func =
- cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
+ cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
CallingConv::ID CC = Func->getCallingConv();
unsigned NestReg;
@@ -28671,7 +28679,8 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
unsigned Idx = 0;
for (FunctionType::param_iterator I = FTy->param_begin(),
- E = FTy->param_end(); I != E; ++I, ++Idx)
+ E = FTy->param_end();
+ I != E; ++I, ++Idx)
if (Attrs.hasParamAttr(Idx, Attribute::InReg)) {
const DataLayout &DL = DAG.getDataLayout();
// FIXME: should only count parameters that are lowered to integers.
@@ -28777,18 +28786,16 @@ SDValue X86TargetLowering::LowerGET_ROUNDING(SDValue Op,
Chain = CWD.getValue(1);
// Mask and turn the control bits into a shift for the lookup table.
- SDValue Shift =
- DAG.getNode(ISD::SRL, DL, MVT::i16,
- DAG.getNode(ISD::AND, DL, MVT::i16,
- CWD, DAG.getConstant(0xc00, DL, MVT::i16)),
- DAG.getConstant(9, DL, MVT::i8));
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i16,
+ DAG.getNode(ISD::AND, DL, MVT::i16, CWD,
+ DAG.getConstant(0xc00, DL, MVT::i16)),
+ DAG.getConstant(9, DL, MVT::i8));
Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, Shift);
SDValue LUT = DAG.getConstant(0x2d, DL, MVT::i32);
- SDValue RetVal =
- DAG.getNode(ISD::AND, DL, MVT::i32,
- DAG.getNode(ISD::SRL, DL, MVT::i32, LUT, Shift),
- DAG.getConstant(3, DL, MVT::i32));
+ SDValue RetVal = DAG.getNode(ISD::AND, DL, MVT::i32,
+ DAG.getNode(ISD::SRL, DL, MVT::i32, LUT, Shift),
+ DAG.getConstant(3, DL, MVT::i32));
RetVal = DAG.getZExtOrTrunc(RetVal, DL, VT);
@@ -29074,17 +29081,15 @@ static SDValue LowerVectorCTLZ_AVX512CDI(SDValue Op, SelectionDAG &DAG,
MVT EltVT = VT.getVectorElementType();
unsigned NumElems = VT.getVectorNumElements();
- assert((EltVT == MVT::i8 || EltVT == MVT::i16) &&
- "Unsupported element type");
+ assert((EltVT == MVT::i8 || EltVT == MVT::i16) && "Unsupported element type");
// Split vector, it's Lo and Hi parts will be handled in next iteration.
- if (NumElems > 16 ||
- (NumElems == 16 && !Subtarget.canExtendTo512DQ()))
+ if (NumElems > 16 || (NumElems == 16 && !Subtarget.canExtendTo512DQ()))
return splitVectorIntUnary(Op, DAG, dl);
MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems);
assert((NewVT.is256BitVector() || NewVT.is512BitVector()) &&
- "Unsupported value type for operation");
+ "Unsupported value type for operation");
// Use native supported vector instruction vplzcntd.
Op = DAG.getNode(ISD::ZERO_EXTEND, dl, NewVT, Op.getOperand(0));
@@ -29782,7 +29787,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
// Merge the two vectors back together with a shuffle. This expands into 2
// shuffles.
- static const int ShufMask[] = { 0, 4, 2, 6 };
+ static const int ShufMask[] = {0, 4, 2, 6};
return DAG.getVectorShuffle(VT, dl, Evens, Odds, ShufMask);
}
@@ -29912,7 +29917,7 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
//
// Place the odd value at an even position (basically, shift all values 1
// step to the left):
- const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1,
+ const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1,
9, -1, 11, -1, 13, -1, 15, -1};
// <a|b|c|d> => <b|undef|d|undef>
SDValue Odd0 =
@@ -29962,7 +29967,7 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
// Only i8 vectors should need custom lowering after this.
assert((VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256()) ||
- (VT == MVT::v64i8 && Subtarget.hasBWI())) &&
+ (VT == MVT::v64i8 && Subtarget.hasBWI())) &&
"Unsupported vector type");
// Lower v16i8/v32i8 as extension to v8i16/v16i16 vector pairs, multiply,
@@ -30117,7 +30122,8 @@ static SDValue LowerMULO(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getMergeValues({Low, Ovf}, dl);
}
-SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op,
+ SelectionDAG &DAG) const {
assert(Subtarget.isTargetWin64() && "Unexpected target");
EVT VT = Op.getValueType();
assert(VT.isInteger() && VT.getSizeInBits() == 128 &&
@@ -30132,13 +30138,13 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons
RTLIB::Libcall LC;
bool isSigned;
switch (Op->getOpcode()) {
- // clang-format off
+ // clang-format off
default: llvm_unreachable("Unexpected request for libcall!");
case ISD::SDIV: isSigned = true; LC = RTLIB::SDIV_I128; break;
case ISD::UDIV: isSigned = false; LC = RTLIB::UDIV_I128; break;
case ISD::SREM: isSigned = true; LC = RTLIB::SREM_I128; break;
case ISD::UREM: isSigned = false; LC = RTLIB::UREM_I128; break;
- // clang-format on
+ // clang-format on
}
SDLoc dl(Op);
@@ -30277,9 +30283,9 @@ static bool supportedVectorShiftWithImm(EVT VT, const X86Subtarget &Subtarget,
// The shift amount is a variable, but it is the same for all vector lanes.
// These instructions are defined together with shift-immediate.
-static
-bool supportedVectorShiftWithBaseAmnt(EVT VT, const X86Subtarget &Subtarget,
- unsigned Opcode) {
+static bool supportedVectorShiftWithBaseAmnt(EVT VT,
+ const X86Subtarget &Subtarget,
+ unsigned Opcode) {
return supportedVectorShiftWithImm(VT, Subtarget, Opcode);
}
@@ -30308,7 +30314,7 @@ static bool supportedVectorVarShift(EVT VT, const X86Subtarget &Subtarget,
return true;
bool LShift = VT.is128BitVector() || VT.is256BitVector();
- bool AShift = LShift && VT != MVT::v2i64 && VT != MVT::v4i64;
+ bool AShift = LShift && VT != MVT::v2i64 && VT != MVT::v4i64;
return (Opcode == ISD::SRA) ? AShift : LShift;
}
@@ -32299,7 +32305,8 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
auto SSID = AI->getSyncScopeID();
// We must restrict the ordering to avoid generating loads with Release or
// ReleaseAcquire orderings.
- auto Order = AtomicCmpXchgInst::getStrongestFailureOrdering(AI->getOrdering());
+ auto Order =
+ AtomicCmpXchgInst::getStrongestFailureOrdering(AI->getOrdering());
// Before the load we need a fence. Here is an example lifted from
// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf showing why a fence
@@ -32368,31 +32375,28 @@ static SDValue emitLockedStackOp(SelectionDAG &DAG,
if (Subtarget.is64Bit()) {
SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32);
- SDValue Ops[] = {
- DAG.getRegister(X86::RSP, MVT::i64), // Base
- DAG.getTargetConstant(1, DL, MVT::i8), // Scale
- DAG.getRegister(0, MVT::i64), // Index
- DAG.getTargetConstant(SPOffset, DL, MVT::i32), // Disp
- DAG.getRegister(0, MVT::i16), // Segment.
- Zero,
- Chain};
- SDNode *Res = DAG.getMachineNode(X86::OR32mi8Locked, DL, MVT::i32,
- MVT::Other, Ops);
+ SDValue Ops[] = {DAG.getRegister(X86::RSP, MVT::i64), // Base
+ DAG.getTargetConstant(1, DL, MVT::i8), // Scale
+ DAG.getRegister(0, MVT::i64), // Index
+ DAG.getTargetConstant(SPOffset, DL, MVT::i32), // Disp
+ DAG.getRegister(0, MVT::i16), // Segment.
+ Zero,
+ Chain};
+ SDNode *Res =
+ DAG.getMachineNode(X86::OR32mi8Locked, DL, MVT::i32, MVT::Other, Ops);
return SDValue(Res, 1);
}
SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32);
- SDValue Ops[] = {
- DAG.getRegister(X86::ESP, MVT::i32), // Base
- DAG.getTargetConstant(1, DL, MVT::i8), // Scale
- DAG.getRegister(0, MVT::i32), // Index
- DAG.getTargetConstant(SPOffset, DL, MVT::i32), // Disp
- DAG.getRegister(0, MVT::i16), // Segment.
- Zero,
- Chain
- };
- SDNode *Res = DAG.getMachineNode(X86::OR32mi8Locked, DL, MVT::i32,
- MVT::Other, Ops);
+ SDValue Ops[] = {DAG.getRegister(X86::ESP, MVT::i32), // Base
+ DAG.getTargetConstant(1, DL, MVT::i8), // Scale
+ DAG.getRegister(0, MVT::i32), // Index
+ DAG.getTargetConstant(SPOffset, DL, MVT::i32), // Disp
+ DAG.getRegister(0, MVT::i16), // Segment.
+ Zero,
+ Chain};
+ SDNode *Res =
+ DAG.getMachineNode(X86::OR32mi8Locked, DL, MVT::i32, MVT::Other, Ops);
return SDValue(Res, 1);
}
@@ -32425,36 +32429,44 @@ static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget &Subtarget,
SDLoc DL(Op);
unsigned Reg = 0;
unsigned size = 0;
- switch(T.SimpleTy) {
- default: llvm_unreachable("Invalid value type!");
- case MVT::i8: Reg = X86::AL; size = 1; break;
- case MVT::i16: Reg = X86::AX; size = 2; break;
- case MVT::i32: Reg = X86::EAX; size = 4; break;
+ switch (T.SimpleTy) {
+ default:
+ llvm_unreachable("Invalid value type!");
+ case MVT::i8:
+ Reg = X86::AL;
+ size = 1;
+ break;
+ case MVT::i16:
+ Reg = X86::AX;
+ size = 2;
+ break;
+ case MVT::i32:
+ Reg = X86::EAX;
+ size = 4;
+ break;
case MVT::i64:
assert(Subtarget.is64Bit() && "Node not type legal!");
- Reg = X86::RAX; size = 8;
+ Reg = X86::RAX;
+ size = 8;
break;
}
- SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg,
- Op.getOperand(2), SDValue());
- SDValue Ops[] = { cpIn.getValue(0),
- Op.getOperand(1),
- Op.getOperand(3),
- DAG.getTargetConstant(size, DL, MVT::i8),
- cpIn.getValue(1) };
+ SDValue cpIn =
+ DAG.getCopyToReg(Op.getOperand(0), DL, Reg, Op.getOperand(2), SDValue());
+ SDValue Ops[] = {cpIn.getValue(0), Op.getOperand(1), Op.getOperand(3),
+ DAG.getTargetConstant(size, DL, MVT::i8), cpIn.getValue(1)};
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand();
- SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys,
- Ops, T, MMO);
+ SDValue Result =
+ DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys, Ops, T, MMO);
SDValue cpOut =
- DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
+ DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
SDValue EFLAGS = DAG.getCopyFromReg(cpOut.getValue(1), DL, X86::EFLAGS,
MVT::i32, cpOut.getValue(2));
SDValue Success = getSETCC(X86::COND_E, EFLAGS, DL, DAG);
- return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(),
- cpOut, Success, EFLAGS.getValue(1));
+ return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), cpOut, Success,
+ EFLAGS.getValue(1));
}
// Create MOVMSKB, taking into account whether we need to split for AVX1.
@@ -32516,7 +32528,8 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget,
}
assert((SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 ||
- SrcVT == MVT::i64) && "Unexpected VT!");
+ SrcVT == MVT::i64) &&
+ "Unexpected VT!");
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
if (!(DstVT == MVT::f64 && SrcVT == MVT::i64) &&
@@ -32530,8 +32543,7 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget,
// Example: from MVT::v2i32 to MVT::v4i32.
MVT NewVT = MVT::getVectorVT(SrcVT.getVectorElementType(),
SrcVT.getVectorNumElements() * 2);
- Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewVT, Src,
- DAG.getUNDEF(SrcVT));
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewVT, Src, DAG.getUNDEF(SrcVT));
} else {
assert(SrcVT == MVT::i64 && !Subtarget.is64Bit() &&
"Unexpected source type in LowerBITCAST");
@@ -32677,7 +32689,8 @@ static SDValue LowerVectorCTPOP(SDValue Op, const SDLoc &DL,
if (Subtarget.hasVPOPCNTDQ()) {
unsigned NumElems = VT.getVectorNumElements();
assert((VT.getVectorElementType() == MVT::i8 ||
- VT.getVectorElementType() == MVT::i16) && "Unexpected type");
+ VT.getVectorElementType() == MVT::i16) &&
+ "Unexpected type");
if (NumElems < 16 || (NumElems == 16 && Subtarget.canExtendTo512DQ())) {
MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems);
Op = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, Op0);
@@ -33076,16 +33089,16 @@ static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG,
SDValue NewChain = DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Chain);
assert(!N->hasAnyUseOfValue(0));
// NOTE: The getUNDEF is needed to give something for the unused result 0.
- return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(),
- DAG.getUNDEF(VT), NewChain);
+ return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), DAG.getUNDEF(VT),
+ NewChain);
}
SDValue LockOp = lowerAtomicArithWithLOCK(N, DAG, Subtarget);
// RAUW the chain, but don't worry about the result, as it's unused.
assert(!N->hasAnyUseOfValue(0));
// NOTE: The getUNDEF is needed to give something for the unused result 0.
- return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(),
- DAG.getUNDEF(VT), LockOp.getValue(1));
+ return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), DAG.getUNDEF(VT),
+ LockOp.getValue(1));
}
static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG,
@@ -33185,17 +33198,17 @@ static SDValue LowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG) {
// Set the carry flag.
SDValue Carry = Op.getOperand(2);
EVT CarryVT = Carry.getValueType();
- Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32),
- Carry, DAG.getAllOnesConstant(DL, CarryVT));
+ Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32), Carry,
+ DAG.getAllOnesConstant(DL, CarryVT));
bool IsAdd = Opc == ISD::UADDO_CARRY || Opc == ISD::SADDO_CARRY;
- SDValue Sum = DAG.getNode(IsAdd ? X86ISD::ADC : X86ISD::SBB, DL, VTs,
- Op.getOperand(0), Op.getOperand(1),
- Carry.getValue(1));
+ SDValue Sum =
+ DAG.getNode(IsAdd ? X86ISD::ADC : X86ISD::SBB, DL, VTs, Op.getOperand(0),
+ Op.getOperand(1), Carry.getValue(1));
bool IsSigned = Opc == ISD::SADDO_CARRY || Opc == ISD::SSUBO_CARRY;
- SDValue SetCC = getSETCC(IsSigned ? X86::COND_O : X86::COND_B,
- Sum.getValue(1), DL, DAG);
+ SDValue SetCC =
+ getSETCC(IsSigned ? X86::COND_O : X86::COND_B, Sum.getValue(1), DL, DAG);
if (N->getValueType(1) == MVT::i1)
SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
@@ -33291,8 +33304,8 @@ static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget,
if (!Subtarget.hasVLX() && !VT.is512BitVector() &&
!Index.getSimpleValueType().is512BitVector()) {
// Determine how much we need to widen by to get a 512-bit type.
- unsigned Factor = std::min(512/VT.getSizeInBits(),
- 512/IndexVT.getSizeInBits());
+ unsigned Factor =
+ std::min(512 / VT.getSizeInBits(), 512 / IndexVT.getSizeInBits());
unsigned NumElts = VT.getVectorNumElements() * Factor;
VT = MVT::getVectorVT(VT.getVectorElementType(), NumElts);
@@ -33334,7 +33347,7 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
N->isExpandingLoad());
// Emit a blend.
SDValue Select = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru);
- return DAG.getMergeValues({ Select, NewLoad.getValue(1) }, dl);
+ return DAG.getMergeValues({Select, NewLoad.getValue(1)}, dl);
}
assert((!N->isExpandingLoad() || Subtarget.hasAVX512()) &&
@@ -33401,7 +33414,7 @@ static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget,
// This operation is legal for targets with VLX, but without
// VLX the vector should be widened to 512 bit
- unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits();
+ unsigned NumEltsInWideVec = 512 / VT.getScalarSizeInBits();
MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec);
// Mask element has to be i1.
@@ -33443,8 +33456,8 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
if (Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() &&
!IndexVT.is512BitVector()) {
// Determine how much we need to widen by to get a 512-bit type.
- unsigned Factor = std::min(512/VT.getSizeInBits(),
- 512/IndexVT.getSizeInBits());
+ unsigned Factor =
+ std::min(512 / VT.getSizeInBits(), 512 / IndexVT.getSizeInBits());
unsigned NumElts = VT.getVectorNumElements() * Factor;
@@ -33461,8 +33474,8 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
if (PassThru.isUndef())
PassThru = getZeroVector(VT, Subtarget, DAG, dl);
- SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index,
- N->getScale() };
+ SDValue Ops[] = {N->getChain(), PassThru, Mask,
+ N->getBasePtr(), Index, N->getScale()};
SDValue NewGather = DAG.getMemIntrinsicNode(
X86ISD::MGATHER, dl, DAG.getVTList(VT, MVT::Other), Ops, N->getMemoryVT(),
N->getMemOperand());
@@ -33660,7 +33673,7 @@ SDValue X86TargetLowering::visitMaskedStore(SelectionDAG &DAG, const SDLoc &DL,
/// Provide custom lowering hooks for some operations.
SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
- // clang-format off
+ // clang-format off
default: llvm_unreachable("Should not custom lower this!");
case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, Subtarget, DAG);
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
@@ -33823,7 +33836,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
/// Replace a node with an illegal result type with a new node built out of
/// custom code.
void X86TargetLowering::ReplaceNodeResults(SDNode *N,
- SmallVectorImpl<SDValue>&Results,
+ SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
SDLoc dl(N);
unsigned Opc = N->getOpcode();
@@ -34002,8 +34015,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SDValue Ovf = DAG.getSetCC(dl, N->getValueType(1), Hi, HiCmp, ISD::SETNE);
// Widen the result with by padding with undef.
- Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Res,
- DAG.getUNDEF(VT));
+ Res =
+ DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Res, DAG.getUNDEF(VT));
Results.push_back(Res);
Results.push_back(Ovf);
return;
@@ -34020,11 +34033,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
"Unexpected type action!");
unsigned NumConcat = 128 / InVT.getSizeInBits();
- EVT InWideVT = EVT::getVectorVT(*DAG.getContext(),
- InVT.getVectorElementType(),
- NumConcat * InVT.getVectorNumElements());
- EVT WideVT = EVT::getVectorVT(*DAG.getContext(),
- VT.getVectorElementType(),
+ EVT InWideVT =
+ EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+ NumConcat * InVT.getVectorNumElements());
+ EVT WideVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
NumConcat * VT.getVectorNumElements());
SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));
@@ -34088,7 +34100,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
- SDValue V = LowerWin64_i128OP(SDValue(N,0), DAG);
+ SDValue V = LowerWin64_i128OP(SDValue(N, 0), DAG);
Results.push_back(V);
return;
}
@@ -34166,9 +34178,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Lo = DAG.getNode(X86ISD::VTRUNC, dl, MVT::v16i8, Lo);
Hi = DAG.getNode(X86ISD::VTRUNC, dl, MVT::v16i8, Hi);
- SDValue Res = DAG.getVectorShuffle(MVT::v16i8, dl, Lo, Hi,
- { 0, 1, 2, 3, 16, 17, 18, 19,
- -1, -1, -1, -1, -1, -1, -1, -1 });
+ SDValue Res = DAG.getVectorShuffle(
+ MVT::v16i8, dl, Lo, Hi,
+ {0, 1, 2, 3, 16, 17, 18, 19, -1, -1, -1, -1, -1, -1, -1, -1});
Results.push_back(Res);
return;
}
@@ -34200,7 +34212,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SDValue In = N->getOperand(0);
EVT InVT = In.getValueType();
if (!Subtarget.hasSSE41() && VT == MVT::v4i64 &&
- (InVT == MVT::v4i16 || InVT == MVT::v4i8)){
+ (InVT == MVT::v4i16 || InVT == MVT::v4i8)) {
assert(getTypeAction(*DAG.getContext(), InVT) == TypeWidenVector &&
"Unexpected type action!");
assert(Opc == ISD::SIGN_EXTEND && "Unexpected opcode");
@@ -34216,11 +34228,11 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
// Create an unpackl and unpackh to interleave the sign bits then bitcast
// to v2i64.
- SDValue Lo = DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits,
- {0, 4, 1, 5});
+ SDValue Lo =
+ DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits, {0, 4, 1, 5});
Lo = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Lo);
- SDValue Hi = DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits,
- {2, 6, 3, 7});
+ SDValue Hi =
+ DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits, {2, 6, 3, 7});
Hi = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Hi);
SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
@@ -34407,7 +34419,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
-
if (VT == MVT::v2i32) {
assert((!IsStrict || IsSigned || Subtarget.hasAVX512()) &&
"Strict unsigned conversion requires AVX512");
@@ -34492,9 +34503,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
SDValue ZeroIdx = DAG.getVectorIdxConstant(0, dl);
- SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecInVT,
- DAG.getConstantFP(0.0, dl, VecInVT), Src,
- ZeroIdx);
+ SDValue Res =
+ DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecInVT,
+ DAG.getConstantFP(0.0, dl, VecInVT), Src, ZeroIdx);
SDValue Chain;
if (IsStrict) {
SDVTList Tys = DAG.getVTList(VecVT, MVT::Other);
@@ -34581,8 +34592,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
EVT SrcVT = Src.getValueType();
if (Subtarget.hasDQI() && Subtarget.hasVLX() && SrcVT == MVT::v2i64) {
if (IsStrict) {
- unsigned Opc = IsSigned ? X86ISD::STRICT_CVTSI2P
- : X86ISD::STRICT_CVTUI2P;
+ unsigned Opc =
+ IsSigned ? X86ISD::STRICT_CVTSI2P : X86ISD::STRICT_CVTUI2P;
SDValue Res = DAG.getNode(Opc, dl, {MVT::v4f32, MVT::Other},
{N->getOperand(0), Src});
Results.push_back(Res);
@@ -34596,7 +34607,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
if (SrcVT == MVT::v2i64 && !IsSigned && Subtarget.is64Bit() &&
Subtarget.hasSSE41() && !Subtarget.hasAVX512()) {
SDValue Zero = DAG.getConstant(0, dl, SrcVT);
- SDValue One = DAG.getConstant(1, dl, SrcVT);
+ SDValue One = DAG.getConstant(1, dl, SrcVT);
SDValue Sign = DAG.getNode(ISD::OR, dl, SrcVT,
DAG.getNode(ISD::SRL, dl, SrcVT, Src, One),
DAG.getNode(ISD::AND, dl, SrcVT, Src, One));
@@ -34662,9 +34673,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
if (IsStrict) {
SDValue Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::v2f64, MVT::Other},
{N->getOperand(0), Or, VBias});
- SDValue Res = DAG.getNode(X86ISD::STRICT_VFPROUND, dl,
- {MVT::v4f32, MVT::Other},
- {Sub.getValue(1), Sub});
+ SDValue Res =
+ DAG.getNode(X86ISD::STRICT_VFPROUND, dl, {MVT::v4f32, MVT::Other},
+ {Sub.getValue(1), Sub});
Results.push_back(Res);
Results.push_back(Res.getValue(1));
} else {
@@ -34745,8 +34756,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::INTRINSIC_W_CHAIN: {
unsigned IntNo = N->getConstantOperandVal(1);
switch (IntNo) {
- default : llvm_unreachable("Do not know how to custom type "
- "legalize this intrinsic operation!");
+ default:
+ llvm_unreachable("Do not know how to custom type "
+ "legalize this intrinsic operation!");
case Intrinsic::x86_rdtsc:
return getReadTimeStampCounter(N, dl, X86::RDTSC, DAG, Subtarget,
Results);
@@ -34759,7 +34771,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
case Intrinsic::x86_rdpru:
expandIntrinsicWChainHelper(N, dl, DAG, X86::RDPRU, X86::ECX, Subtarget,
- Results);
+ Results);
return;
case Intrinsic::x86_xgetbv:
expandIntrinsicWChainHelper(N, dl, DAG, X86::XGETBV, X86::ECX, Subtarget,
@@ -34816,12 +34828,12 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl,
- Regs64bit ? X86::RAX : X86::EAX,
- HalfT, Result.getValue(1));
+ Regs64bit ? X86::RAX : X86::EAX, HalfT,
+ Result.getValue(1));
SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl,
- Regs64bit ? X86::RDX : X86::EDX,
- HalfT, cpOutL.getValue(2));
- SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)};
+ Regs64bit ? X86::RDX : X86::EDX, HalfT,
+ cpOutL.getValue(2));
+ SDValue OpsF[] = {cpOutL.getValue(0), cpOutH.getValue(0)};
SDValue EFLAGS = DAG.getCopyFromReg(cpOutH.getValue(1), dl, X86::EFLAGS,
MVT::i32, cpOutH.getValue(2));
@@ -34863,7 +34875,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
// Then extract the lower 64-bits.
MVT LdVT = Subtarget.hasSSE2() ? MVT::v2i64 : MVT::v4f32;
SDVTList Tys = DAG.getVTList(LdVT, MVT::Other);
- SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
+ SDValue Ops[] = {Node->getChain(), Node->getBasePtr()};
SDValue Ld = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
MVT::i64, Node->getMemOperand());
if (Subtarget.hasSSE2()) {
@@ -34887,10 +34899,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
// First load this into an 80-bit X87 register. This will put the whole
// integer into the significand.
SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
- SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
- SDValue Result = DAG.getMemIntrinsicNode(X86ISD::FILD,
- dl, Tys, Ops, MVT::i64,
- Node->getMemOperand());
+ SDValue Ops[] = {Node->getChain(), Node->getBasePtr()};
+ SDValue Result = DAG.getMemIntrinsicNode(
+ X86ISD::FILD, dl, Tys, Ops, MVT::i64, Node->getMemOperand());
SDValue Chain = Result.getValue(1);
// Now store the X87 register to a stack temporary and convert to i64.
@@ -34901,7 +34912,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
MachinePointerInfo MPI =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
- SDValue StoreOps[] = { Chain, Result, StackPtr };
+ SDValue StoreOps[] = {Chain, Result, StackPtr};
Chain = DAG.getMemIntrinsicNode(
X86ISD::FIST, dl, DAG.getVTList(MVT::Other), StoreOps, MVT::i64,
MPI, std::nullopt /*Align*/, MachineMemOperand::MOStore);
@@ -34959,8 +34970,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
assert(getTypeAction(*DAG.getContext(), DstVT) == TypeWidenVector &&
"Unexpected type action!");
EVT WideVT = getTypeToTransformTo(*DAG.getContext(), DstVT);
- SDValue Res = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64,
- N->getOperand(0));
+ SDValue Res =
+ DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, N->getOperand(0));
Res = DAG.getBitcast(WideVT, Res);
Results.push_back(Res);
return;
@@ -34982,8 +34993,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SDValue Mask = Gather->getMask();
assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type");
SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT,
- Gather->getPassThru(),
- DAG.getUNDEF(VT));
+ Gather->getPassThru(), DAG.getUNDEF(VT));
if (!Subtarget.hasVLX()) {
// We need to widen the mask, but the instruction will only use 2
// of its elements. So we can use undef.
@@ -34991,8 +35001,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
DAG.getUNDEF(MVT::v2i1));
Mask = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Mask);
}
- SDValue Ops[] = { Gather->getChain(), PassThru, Mask,
- Gather->getBasePtr(), Index, Gather->getScale() };
+ SDValue Ops[] = {Gather->getChain(), PassThru, Mask,
+ Gather->getBasePtr(), Index, Gather->getScale()};
SDValue Res = DAG.getMemIntrinsicNode(
X86ISD::MGATHER, dl, DAG.getVTList(WideVT, MVT::Other), Ops,
Gather->getMemoryVT(), Gather->getMemOperand());
@@ -35037,7 +35047,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::ADDRSPACECAST: {
- SDValue V = LowerADDRSPACECAST(SDValue(N,0), DAG);
+ SDValue V = LowerADDRSPACECAST(SDValue(N, 0), DAG);
Results.push_back(V);
return;
}
@@ -35068,471 +35078,474 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((X86ISD::NodeType)Opcode) {
- case X86ISD::FIRST_NUMBER: break;
-#define NODE_NAME_CASE(NODE) case X86ISD::NODE: return "X86ISD::" #NODE;
- NODE_NAME_CASE(BSF)
- NODE_NAME_CASE(BSR)
- NODE_NAME_CASE(FSHL)
- NODE_NAME_CASE(FSHR)
- NODE_NAME_CASE(FAND)
- NODE_NAME_CASE(FANDN)
- NODE_NAME_CASE(FOR)
- NODE_NAME_CASE(FXOR)
- NODE_NAME_CASE(FILD)
- NODE_NAME_CASE(FIST)
- NODE_NAME_CASE(FP_TO_INT_IN_MEM)
- NODE_NAME_CASE(FLD)
- NODE_NAME_CASE(FST)
- NODE_NAME_CASE(CALL)
- NODE_NAME_CASE(CALL_RVMARKER)
- NODE_NAME_CASE(IMP_CALL)
- NODE_NAME_CASE(BT)
- NODE_NAME_CASE(CMP)
- NODE_NAME_CASE(FCMP)
- NODE_NAME_CASE(STRICT_FCMP)
- NODE_NAME_CASE(STRICT_FCMPS)
- NODE_NAME_CASE(COMI)
- NODE_NAME_CASE(UCOMI)
- NODE_NAME_CASE(COMX)
- NODE_NAME_CASE(UCOMX)
- NODE_NAME_CASE(CMPM)
- NODE_NAME_CASE(CMPMM)
- NODE_NAME_CASE(STRICT_CMPM)
- NODE_NAME_CASE(CMPMM_SAE)
- NODE_NAME_CASE(SETCC)
- NODE_NAME_CASE(SETCC_CARRY)
- NODE_NAME_CASE(FSETCC)
- NODE_NAME_CASE(FSETCCM)
- NODE_NAME_CASE(FSETCCM_SAE)
- NODE_NAME_CASE(CMOV)
- NODE_NAME_CASE(BRCOND)
- NODE_NAME_CASE(RET_GLUE)
- NODE_NAME_CASE(IRET)
- NODE_NAME_CASE(REP_STOS)
- NODE_NAME_CASE(REP_MOVS)
- NODE_NAME_CASE(GlobalBaseReg)
- NODE_NAME_CASE(Wrapper)
- NODE_NAME_CASE(WrapperRIP)
- NODE_NAME_CASE(MOVQ2DQ)
- NODE_NAME_CASE(MOVDQ2Q)
- NODE_NAME_CASE(MMX_MOVD2W)
- NODE_NAME_CASE(MMX_MOVW2D)
- NODE_NAME_CASE(PEXTRB)
- NODE_NAME_CASE(PEXTRW)
- NODE_NAME_CASE(INSERTPS)
- NODE_NAME_CASE(PINSRB)
- NODE_NAME_CASE(PINSRW)
- NODE_NAME_CASE(PSHUFB)
- NODE_NAME_CASE(ANDNP)
- NODE_NAME_CASE(BLENDI)
- NODE_NAME_CASE(BLENDV)
- NODE_NAME_CASE(HADD)
- NODE_NAME_CASE(HSUB)
- NODE_NAME_CASE(HADDS)
- NODE_NAME_CASE(HSUBS)
- NODE_NAME_CASE(FHADD)
- NODE_NAME_CASE(FHSUB)
- NODE_NAME_CASE(CONFLICT)
- NODE_NAME_CASE(FMAX)
- NODE_NAME_CASE(FMAXS)
- NODE_NAME_CASE(FMAX_SAE)
- NODE_NAME_CASE(FMAXS_SAE)
- NODE_NAME_CASE(STRICT_FMAX)
- NODE_NAME_CASE(FMIN)
- NODE_NAME_CASE(FMINS)
- NODE_NAME_CASE(FMIN_SAE)
- NODE_NAME_CASE(FMINS_SAE)
- NODE_NAME_CASE(STRICT_FMIN)
- NODE_NAME_CASE(FMAXC)
- NODE_NAME_CASE(FMINC)
- NODE_NAME_CASE(FRSQRT)
- NODE_NAME_CASE(FRCP)
- NODE_NAME_CASE(EXTRQI)
- NODE_NAME_CASE(INSERTQI)
- NODE_NAME_CASE(TLSADDR)
- NODE_NAME_CASE(TLSBASEADDR)
- NODE_NAME_CASE(TLSCALL)
- NODE_NAME_CASE(TLSDESC)
- NODE_NAME_CASE(EH_SJLJ_SETJMP)
- NODE_NAME_CASE(EH_SJLJ_LONGJMP)
- NODE_NAME_CASE(EH_SJLJ_SETUP_DISPATCH)
- NODE_NAME_CASE(EH_RETURN)
- NODE_NAME_CASE(TC_RETURN)
- NODE_NAME_CASE(FNSTCW16m)
- NODE_NAME_CASE(FLDCW16m)
- NODE_NAME_CASE(FNSTENVm)
- NODE_NAME_CASE(FLDENVm)
- NODE_NAME_CASE(LCMPXCHG_DAG)
- NODE_NAME_CASE(LCMPXCHG8_DAG)
- NODE_NAME_CASE(LCMPXCHG16_DAG)
- NODE_NAME_CASE(LCMPXCHG16_SAVE_RBX_DAG)
- NODE_NAME_CASE(LADD)
- NODE_NAME_CASE(LSUB)
- NODE_NAME_CASE(LOR)
- NODE_NAME_CASE(LXOR)
- NODE_NAME_CASE(LAND)
- NODE_NAME_CASE(LBTS)
- NODE_NAME_CASE(LBTC)
- NODE_NAME_CASE(LBTR)
- NODE_NAME_CASE(LBTS_RM)
- NODE_NAME_CASE(LBTC_RM)
- NODE_NAME_CASE(LBTR_RM)
- NODE_NAME_CASE(AADD)
- NODE_NAME_CASE(AOR)
- NODE_NAME_CASE(AXOR)
- NODE_NAME_CASE(AAND)
- NODE_NAME_CASE(VZEXT_MOVL)
- NODE_NAME_CASE(VZEXT_LOAD)
- NODE_NAME_CASE(VEXTRACT_STORE)
- NODE_NAME_CASE(VTRUNC)
- NODE_NAME_CASE(VTRUNCS)
- NODE_NAME_CASE(VTRUNCUS)
- NODE_NAME_CASE(VMTRUNC)
- NODE_NAME_CASE(VMTRUNCS)
- NODE_NAME_CASE(VMTRUNCUS)
- NODE_NAME_CASE(VTRUNCSTORES)
- NODE_NAME_CASE(VTRUNCSTOREUS)
- NODE_NAME_CASE(VMTRUNCSTORES)
- NODE_NAME_CASE(VMTRUNCSTOREUS)
- NODE_NAME_CASE(VFPEXT)
- NODE_NAME_CASE(STRICT_VFPEXT)
- NODE_NAME_CASE(VFPEXT_SAE)
- NODE_NAME_CASE(VFPEXTS)
- NODE_NAME_CASE(VFPEXTS_SAE)
- NODE_NAME_CASE(VFPROUND)
- NODE_NAME_CASE(VFPROUND2)
- NODE_NAME_CASE(VFPROUND2_RND)
- NODE_NAME_CASE(STRICT_VFPROUND)
- NODE_NAME_CASE(VMFPROUND)
- NODE_NAME_CASE(VFPROUND_RND)
- NODE_NAME_CASE(VFPROUNDS)
- NODE_NAME_CASE(VFPROUNDS_RND)
- NODE_NAME_CASE(VSHLDQ)
- NODE_NAME_CASE(VSRLDQ)
- NODE_NAME_CASE(VSHL)
- NODE_NAME_CASE(VSRL)
- NODE_NAME_CASE(VSRA)
- NODE_NAME_CASE(VSHLI)
- NODE_NAME_CASE(VSRLI)
- NODE_NAME_CASE(VSRAI)
- NODE_NAME_CASE(VSHLV)
- NODE_NAME_CASE(VSRLV)
- NODE_NAME_CASE(VSRAV)
- NODE_NAME_CASE(VROTLI)
- NODE_NAME_CASE(VROTRI)
- NODE_NAME_CASE(VPPERM)
- NODE_NAME_CASE(CMPP)
- NODE_NAME_CASE(STRICT_CMPP)
- NODE_NAME_CASE(PCMPEQ)
- NODE_NAME_CASE(PCMPGT)
- NODE_NAME_CASE(PHMINPOS)
- NODE_NAME_CASE(ADD)
- NODE_NAME_CASE(SUB)
- NODE_NAME_CASE(ADC)
- NODE_NAME_CASE(SBB)
- NODE_NAME_CASE(SMUL)
- NODE_NAME_CASE(UMUL)
- NODE_NAME_CASE(OR)
- NODE_NAME_CASE(XOR)
- NODE_NAME_CASE(AND)
- NODE_NAME_CASE(BEXTR)
- NODE_NAME_CASE(BEXTRI)
- NODE_NAME_CASE(BZHI)
- NODE_NAME_CASE(PDEP)
- NODE_NAME_CASE(PEXT)
- NODE_NAME_CASE(MUL_IMM)
- NODE_NAME_CASE(MOVMSK)
- NODE_NAME_CASE(PTEST)
- NODE_NAME_CASE(TESTP)
- NODE_NAME_CASE(KORTEST)
- NODE_NAME_CASE(KTEST)
- NODE_NAME_CASE(KADD)
- NODE_NAME_CASE(KSHIFTL)
- NODE_NAME_CASE(KSHIFTR)
- NODE_NAME_CASE(PACKSS)
- NODE_NAME_CASE(PACKUS)
- NODE_NAME_CASE(PALIGNR)
- NODE_NAME_CASE(VALIGN)
- NODE_NAME_CASE(VSHLD)
- NODE_NAME_CASE(VSHRD)
- NODE_NAME_CASE(PSHUFD)
- NODE_NAME_CASE(PSHUFHW)
- NODE_NAME_CASE(PSHUFLW)
- NODE_NAME_CASE(SHUFP)
- NODE_NAME_CASE(SHUF128)
- NODE_NAME_CASE(MOVLHPS)
- NODE_NAME_CASE(MOVHLPS)
- NODE_NAME_CASE(MOVDDUP)
- NODE_NAME_CASE(MOVSHDUP)
- NODE_NAME_CASE(MOVSLDUP)
- NODE_NAME_CASE(MOVSD)
- NODE_NAME_CASE(MOVSS)
- NODE_NAME_CASE(MOVSH)
- NODE_NAME_CASE(UNPCKL)
- NODE_NAME_CASE(UNPCKH)
- NODE_NAME_CASE(VBROADCAST)
- NODE_NAME_CASE(VBROADCAST_LOAD)
- NODE_NAME_CASE(VBROADCASTM)
- NODE_NAME_CASE(SUBV_BROADCAST_LOAD)
- NODE_NAME_CASE(VPERMILPV)
- NODE_NAME_CASE(VPERMILPI)
- NODE_NAME_CASE(VPERM2X128)
- NODE_NAME_CASE(VPERMV)
- NODE_NAME_CASE(VPERMV3)
- NODE_NAME_CASE(VPERMI)
- NODE_NAME_CASE(VPTERNLOG)
- NODE_NAME_CASE(FP_TO_SINT_SAT)
- NODE_NAME_CASE(FP_TO_UINT_SAT)
- NODE_NAME_CASE(VFIXUPIMM)
- NODE_NAME_CASE(VFIXUPIMM_SAE)
- NODE_NAME_CASE(VFIXUPIMMS)
- NODE_NAME_CASE(VFIXUPIMMS_SAE)
- NODE_NAME_CASE(VRANGE)
- NODE_NAME_CASE(VRANGE_SAE)
- NODE_NAME_CASE(VRANGES)
- NODE_NAME_CASE(VRANGES_SAE)
- NODE_NAME_CASE(PMULUDQ)
- NODE_NAME_CASE(PMULDQ)
- NODE_NAME_CASE(PSADBW)
- NODE_NAME_CASE(DBPSADBW)
- NODE_NAME_CASE(VASTART_SAVE_XMM_REGS)
- NODE_NAME_CASE(VAARG_64)
- NODE_NAME_CASE(VAARG_X32)
- NODE_NAME_CASE(DYN_ALLOCA)
- NODE_NAME_CASE(MFENCE)
- NODE_NAME_CASE(SEG_ALLOCA)
- NODE_NAME_CASE(PROBED_ALLOCA)
- NODE_NAME_CASE(RDRAND)
- NODE_NAME_CASE(RDSEED)
- NODE_NAME_CASE(RDPKRU)
- NODE_NAME_CASE(WRPKRU)
- NODE_NAME_CASE(VPMADDUBSW)
- NODE_NAME_CASE(VPMADDWD)
- NODE_NAME_CASE(VPSHA)
- NODE_NAME_CASE(VPSHL)
- NODE_NAME_CASE(VPCOM)
- NODE_NAME_CASE(VPCOMU)
- NODE_NAME_CASE(VPERMIL2)
- NODE_NAME_CASE(FMSUB)
- NODE_NAME_CASE(STRICT_FMSUB)
- NODE_NAME_CASE(FNMADD)
- NODE_NAME_CASE(STRICT_FNMADD)
- NODE_NAME_CASE(FNMSUB)
- NODE_NAME_CASE(STRICT_FNMSUB)
- NODE_NAME_CASE(FMADDSUB)
- NODE_NAME_CASE(FMSUBADD)
- NODE_NAME_CASE(FMADD_RND)
- NODE_NAME_CASE(FNMADD_RND)
- NODE_NAME_CASE(FMSUB_RND)
- NODE_NAME_CASE(FNMSUB_RND)
- NODE_NAME_CASE(FMADDSUB_RND)
- NODE_NAME_CASE(FMSUBADD_RND)
- NODE_NAME_CASE(VFMADDC)
- NODE_NAME_CASE(VFMADDC_RND)
- NODE_NAME_CASE(VFCMADDC)
- NODE_NAME_CASE(VFCMADDC_RND)
- NODE_NAME_CASE(VFMULC)
- NODE_NAME_CASE(VFMULC_RND)
- NODE_NAME_CASE(VFCMULC)
- NODE_NAME_CASE(VFCMULC_RND)
- NODE_NAME_CASE(VFMULCSH)
- NODE_NAME_CASE(VFMULCSH_RND)
- NODE_NAME_CASE(VFCMULCSH)
- NODE_NAME_CASE(VFCMULCSH_RND)
- NODE_NAME_CASE(VFMADDCSH)
- NODE_NAME_CASE(VFMADDCSH_RND)
- NODE_NAME_CASE(VFCMADDCSH)
- NODE_NAME_CASE(VFCMADDCSH_RND)
- NODE_NAME_CASE(VPMADD52H)
- NODE_NAME_CASE(VPMADD52L)
- NODE_NAME_CASE(VRNDSCALE)
- NODE_NAME_CASE(STRICT_VRNDSCALE)
- NODE_NAME_CASE(VRNDSCALE_SAE)
- NODE_NAME_CASE(VRNDSCALES)
- NODE_NAME_CASE(VRNDSCALES_SAE)
- NODE_NAME_CASE(VREDUCE)
- NODE_NAME_CASE(VREDUCE_SAE)
- NODE_NAME_CASE(VREDUCES)
- NODE_NAME_CASE(VREDUCES_SAE)
- NODE_NAME_CASE(VGETMANT)
- NODE_NAME_CASE(VGETMANT_SAE)
- NODE_NAME_CASE(VGETMANTS)
- NODE_NAME_CASE(VGETMANTS_SAE)
- NODE_NAME_CASE(PCMPESTR)
- NODE_NAME_CASE(PCMPISTR)
- NODE_NAME_CASE(XTEST)
- NODE_NAME_CASE(COMPRESS)
- NODE_NAME_CASE(EXPAND)
- NODE_NAME_CASE(SELECTS)
- NODE_NAME_CASE(ADDSUB)
- NODE_NAME_CASE(RCP14)
- NODE_NAME_CASE(RCP14S)
- NODE_NAME_CASE(RSQRT14)
- NODE_NAME_CASE(RSQRT14S)
- NODE_NAME_CASE(FADD_RND)
- NODE_NAME_CASE(FADDS)
- NODE_NAME_CASE(FADDS_RND)
- NODE_NAME_CASE(FSUB_RND)
- NODE_NAME_CASE(FSUBS)
- NODE_NAME_CASE(FSUBS_RND)
- NODE_NAME_CASE(FMUL_RND)
- NODE_NAME_CASE(FMULS)
- NODE_NAME_CASE(FMULS_RND)
- NODE_NAME_CASE(FDIV_RND)
- NODE_NAME_CASE(FDIVS)
- NODE_NAME_CASE(FDIVS_RND)
- NODE_NAME_CASE(FSQRT_RND)
- NODE_NAME_CASE(FSQRTS)
- NODE_NAME_CASE(FSQRTS_RND)
- NODE_NAME_CASE(FGETEXP)
- NODE_NAME_CASE(FGETEXP_SAE)
- NODE_NAME_CASE(FGETEXPS)
- NODE_NAME_CASE(FGETEXPS_SAE)
- NODE_NAME_CASE(SCALEF)
- NODE_NAME_CASE(SCALEF_RND)
- NODE_NAME_CASE(SCALEFS)
- NODE_NAME_CASE(SCALEFS_RND)
- NODE_NAME_CASE(MULHRS)
- NODE_NAME_CASE(SINT_TO_FP_RND)
- NODE_NAME_CASE(UINT_TO_FP_RND)
- NODE_NAME_CASE(CVTTP2SI)
- NODE_NAME_CASE(CVTTP2UI)
- NODE_NAME_CASE(STRICT_CVTTP2SI)
- NODE_NAME_CASE(STRICT_CVTTP2UI)
- NODE_NAME_CASE(MCVTTP2SI)
- NODE_NAME_CASE(MCVTTP2UI)
- NODE_NAME_CASE(CVTTP2SI_SAE)
- NODE_NAME_CASE(CVTTP2UI_SAE)
- NODE_NAME_CASE(CVTTS2SI)
- NODE_NAME_CASE(CVTTS2UI)
- NODE_NAME_CASE(CVTTS2SI_SAE)
- NODE_NAME_CASE(CVTTS2UI_SAE)
- NODE_NAME_CASE(CVTSI2P)
- NODE_NAME_CASE(CVTUI2P)
- NODE_NAME_CASE(STRICT_CVTSI2P)
- NODE_NAME_CASE(STRICT_CVTUI2P)
- NODE_NAME_CASE(MCVTSI2P)
- NODE_NAME_CASE(MCVTUI2P)
- NODE_NAME_CASE(VFPCLASS)
- NODE_NAME_CASE(VFPCLASSS)
- NODE_NAME_CASE(MULTISHIFT)
- NODE_NAME_CASE(SCALAR_SINT_TO_FP)
- NODE_NAME_CASE(SCALAR_SINT_TO_FP_RND)
- NODE_NAME_CASE(SCALAR_UINT_TO_FP)
- NODE_NAME_CASE(SCALAR_UINT_TO_FP_RND)
- NODE_NAME_CASE(CVTPS2PH)
- NODE_NAME_CASE(STRICT_CVTPS2PH)
- NODE_NAME_CASE(CVTPS2PH_SAE)
- NODE_NAME_CASE(MCVTPS2PH)
- NODE_NAME_CASE(MCVTPS2PH_SAE)
- NODE_NAME_CASE(CVTPH2PS)
- NODE_NAME_CASE(STRICT_CVTPH2PS)
- NODE_NAME_CASE(CVTPH2PS_SAE)
- NODE_NAME_CASE(CVTP2SI)
- NODE_NAME_CASE(CVTP2UI)
- NODE_NAME_CASE(MCVTP2SI)
- NODE_NAME_CASE(MCVTP2UI)
- NODE_NAME_CASE(CVTP2SI_RND)
- NODE_NAME_CASE(CVTP2UI_RND)
- NODE_NAME_CASE(CVTS2SI)
- NODE_NAME_CASE(CVTS2UI)
- NODE_NAME_CASE(CVTS2SI_RND)
- NODE_NAME_CASE(CVTS2UI_RND)
- NODE_NAME_CASE(CVTNEPS2BF16)
- NODE_NAME_CASE(MCVTNEPS2BF16)
- NODE_NAME_CASE(DPBF16PS)
- NODE_NAME_CASE(DPFP16PS)
- NODE_NAME_CASE(MPSADBW)
- NODE_NAME_CASE(LWPINS)
- NODE_NAME_CASE(MGATHER)
- NODE_NAME_CASE(MSCATTER)
- NODE_NAME_CASE(VPDPBUSD)
- NODE_NAME_CASE(VPDPBUSDS)
- NODE_NAME_CASE(VPDPWSSD)
- NODE_NAME_CASE(VPDPWSSDS)
- NODE_NAME_CASE(VPSHUFBITQMB)
- NODE_NAME_CASE(GF2P8MULB)
- NODE_NAME_CASE(GF2P8AFFINEQB)
- NODE_NAME_CASE(GF2P8AFFINEINVQB)
- NODE_NAME_CASE(NT_CALL)
- NODE_NAME_CASE(NT_BRIND)
- NODE_NAME_CASE(UMWAIT)
- NODE_NAME_CASE(TPAUSE)
- NODE_NAME_CASE(ENQCMD)
- NODE_NAME_CASE(ENQCMDS)
- NODE_NAME_CASE(VP2INTERSECT)
- NODE_NAME_CASE(VPDPBSUD)
- NODE_NAME_CASE(VPDPBSUDS)
- NODE_NAME_CASE(VPDPBUUD)
- NODE_NAME_CASE(VPDPBUUDS)
- NODE_NAME_CASE(VPDPBSSD)
- NODE_NAME_CASE(VPDPBSSDS)
- NODE_NAME_CASE(VPDPWSUD)
- NODE_NAME_CASE(VPDPWSUDS)
- NODE_NAME_CASE(VPDPWUSD)
- NODE_NAME_CASE(VPDPWUSDS)
- NODE_NAME_CASE(VPDPWUUD)
- NODE_NAME_CASE(VPDPWUUDS)
- NODE_NAME_CASE(VMINMAX)
- NODE_NAME_CASE(VMINMAX_SAE)
- NODE_NAME_CASE(VMINMAXS)
- NODE_NAME_CASE(VMINMAXS_SAE)
- NODE_NAME_CASE(CVTP2IBS)
- NODE_NAME_CASE(CVTP2IUBS)
- NODE_NAME_CASE(CVTP2IBS_RND)
- NODE_NAME_CASE(CVTP2IUBS_RND)
- NODE_NAME_CASE(CVTTP2IBS)
- NODE_NAME_CASE(CVTTP2IUBS)
- NODE_NAME_CASE(CVTTP2IBS_SAE)
- NODE_NAME_CASE(CVTTP2IUBS_SAE)
- NODE_NAME_CASE(VCVT2PH2BF8)
- NODE_NAME_CASE(VCVT2PH2BF8S)
- NODE_NAME_CASE(VCVT2PH2HF8)
- NODE_NAME_CASE(VCVT2PH2HF8S)
- NODE_NAME_CASE(VCVTBIASPH2BF8)
- NODE_NAME_CASE(VCVTBIASPH2BF8S)
- NODE_NAME_CASE(VCVTBIASPH2HF8)
- NODE_NAME_CASE(VCVTBIASPH2HF8S)
- NODE_NAME_CASE(VCVTPH2BF8)
- NODE_NAME_CASE(VCVTPH2BF8S)
- NODE_NAME_CASE(VCVTPH2HF8)
- NODE_NAME_CASE(VCVTPH2HF8S)
- NODE_NAME_CASE(VMCVTBIASPH2BF8)
- NODE_NAME_CASE(VMCVTBIASPH2BF8S)
- NODE_NAME_CASE(VMCVTBIASPH2HF8)
- NODE_NAME_CASE(VMCVTBIASPH2HF8S)
- NODE_NAME_CASE(VMCVTPH2BF8)
- NODE_NAME_CASE(VMCVTPH2BF8S)
- NODE_NAME_CASE(VMCVTPH2HF8)
- NODE_NAME_CASE(VMCVTPH2HF8S)
- NODE_NAME_CASE(VCVTHF82PH)
- NODE_NAME_CASE(AESENC128KL)
- NODE_NAME_CASE(AESDEC128KL)
- NODE_NAME_CASE(AESENC256KL)
- NODE_NAME_CASE(AESDEC256KL)
- NODE_NAME_CASE(AESENCWIDE128KL)
- NODE_NAME_CASE(AESDECWIDE128KL)
- NODE_NAME_CASE(AESENCWIDE256KL)
- NODE_NAME_CASE(AESDECWIDE256KL)
- NODE_NAME_CASE(CMPCCXADD)
- NODE_NAME_CASE(TESTUI)
- NODE_NAME_CASE(FP80_ADD)
- NODE_NAME_CASE(STRICT_FP80_ADD)
- NODE_NAME_CASE(CCMP)
- NODE_NAME_CASE(CTEST)
- NODE_NAME_CASE(CLOAD)
- NODE_NAME_CASE(CSTORE)
- NODE_NAME_CASE(CVTTS2SIS)
- NODE_NAME_CASE(CVTTS2UIS)
- NODE_NAME_CASE(CVTTS2SIS_SAE)
- NODE_NAME_CASE(CVTTS2UIS_SAE)
- NODE_NAME_CASE(CVTTP2SIS)
- NODE_NAME_CASE(MCVTTP2SIS)
- NODE_NAME_CASE(CVTTP2UIS_SAE)
- NODE_NAME_CASE(CVTTP2SIS_SAE)
- NODE_NAME_CASE(CVTTP2UIS)
- NODE_NAME_CASE(MCVTTP2UIS)
- NODE_NAME_CASE(POP_FROM_X87_REG)
+ case X86ISD::FIRST_NUMBER:
+ break;
+#define NODE_NAME_CASE(NODE) \
+ case X86ISD::NODE: \
+ return "X86ISD::" #NODE;
+ NODE_NAME_CASE(BSF)
+ NODE_NAME_CASE(BSR)
+ NODE_NAME_CASE(FSHL)
+ NODE_NAME_CASE(FSHR)
+ NODE_NAME_CASE(FAND)
+ NODE_NAME_CASE(FANDN)
+ NODE_NAME_CASE(FOR)
+ NODE_NAME_CASE(FXOR)
+ NODE_NAME_CASE(FILD)
+ NODE_NAME_CASE(FIST)
+ NODE_NAME_CASE(FP_TO_INT_IN_MEM)
+ NODE_NAME_CASE(FLD)
+ NODE_NAME_CASE(FST)
+ NODE_NAME_CASE(CALL)
+ NODE_NAME_CASE(CALL_RVMARKER)
+ NODE_NAME_CASE(IMP_CALL)
+ NODE_NAME_CASE(BT)
+ NODE_NAME_CASE(CMP)
+ NODE_NAME_CASE(FCMP)
+ NODE_NAME_CASE(STRICT_FCMP)
+ NODE_NAME_CASE(STRICT_FCMPS)
+ NODE_NAME_CASE(COMI)
+ NODE_NAME_CASE(UCOMI)
+ NODE_NAME_CASE(COMX)
+ NODE_NAME_CASE(UCOMX)
+ NODE_NAME_CASE(CMPM)
+ NODE_NAME_CASE(CMPMM)
+ NODE_NAME_CASE(STRICT_CMPM)
+ NODE_NAME_CASE(CMPMM_SAE)
+ NODE_NAME_CASE(SETCC)
+ NODE_NAME_CASE(SETCC_CARRY)
+ NODE_NAME_CASE(FSETCC)
+ NODE_NAME_CASE(FSETCCM)
+ NODE_NAME_CASE(FSETCCM_SAE)
+ NODE_NAME_CASE(CMOV)
+ NODE_NAME_CASE(BRCOND)
+ NODE_NAME_CASE(RET_GLUE)
+ NODE_NAME_CASE(IRET)
+ NODE_NAME_CASE(REP_STOS)
+ NODE_NAME_CASE(REP_MOVS)
+ NODE_NAME_CASE(GlobalBaseReg)
+ NODE_NAME_CASE(Wrapper)
+ NODE_NAME_CASE(WrapperRIP)
+ NODE_NAME_CASE(MOVQ2DQ)
+ NODE_NAME_CASE(MOVDQ2Q)
+ NODE_NAME_CASE(MMX_MOVD2W)
+ NODE_NAME_CASE(MMX_MOVW2D)
+ NODE_NAME_CASE(PEXTRB)
+ NODE_NAME_CASE(PEXTRW)
+ NODE_NAME_CASE(INSERTPS)
+ NODE_NAME_CASE(PINSRB)
+ NODE_NAME_CASE(PINSRW)
+ NODE_NAME_CASE(PSHUFB)
+ NODE_NAME_CASE(ANDNP)
+ NODE_NAME_CASE(BLENDI)
+ NODE_NAME_CASE(BLENDV)
+ NODE_NAME_CASE(HADD)
+ NODE_NAME_CASE(HSUB)
+ NODE_NAME_CASE(HADDS)
+ NODE_NAME_CASE(HSUBS)
+ NODE_NAME_CASE(FHADD)
+ NODE_NAME_CASE(FHSUB)
+ NODE_NAME_CASE(CONFLICT)
+ NODE_NAME_CASE(FMAX)
+ NODE_NAME_CASE(FMAXS)
+ NODE_NAME_CASE(FMAX_SAE)
+ NODE_NAME_CASE(FMAXS_SAE)
+ NODE_NAME_CASE(STRICT_FMAX)
+ NODE_NAME_CASE(FMIN)
+ NODE_NAME_CASE(FMINS)
+ NODE_NAME_CASE(FMIN_SAE)
+ NODE_NAME_CASE(FMINS_SAE)
+ NODE_NAME_CASE(STRICT_FMIN)
+ NODE_NAME_CASE(FMAXC)
+ NODE_NAME_CASE(FMINC)
+ NODE_NAME_CASE(FRSQRT)
+ NODE_NAME_CASE(FRCP)
+ NODE_NAME_CASE(EXTRQI)
+ NODE_NAME_CASE(INSERTQI)
+ NODE_NAME_CASE(TLSADDR)
+ NODE_NAME_CASE(TLSBASEADDR)
+ NODE_NAME_CASE(TLSCALL)
+ NODE_NAME_CASE(TLSDESC)
+ NODE_NAME_CASE(EH_SJLJ_SETJMP)
+ NODE_NAME_CASE(EH_SJLJ_LONGJMP)
+ NODE_NAME_CASE(EH_SJLJ_SETUP_DISPATCH)
+ NODE_NAME_CASE(EH_RETURN)
+ NODE_NAME_CASE(TC_RETURN)
+ NODE_NAME_CASE(FNSTCW16m)
+ NODE_NAME_CASE(FLDCW16m)
+ NODE_NAME_CASE(FNSTENVm)
+ NODE_NAME_CASE(FLDENVm)
+ NODE_NAME_CASE(LCMPXCHG_DAG)
+ NODE_NAME_CASE(LCMPXCHG8_DAG)
+ NODE_NAME_CASE(LCMPXCHG16_DAG)
+ NODE_NAME_CASE(LCMPXCHG16_SAVE_RBX_DAG)
+ NODE_NAME_CASE(LADD)
+ NODE_NAME_CASE(LSUB)
+ NODE_NAME_CASE(LOR)
+ NODE_NAME_CASE(LXOR)
+ NODE_NAME_CASE(LAND)
+ NODE_NAME_CASE(LBTS)
+ NODE_NAME_CASE(LBTC)
+ NODE_NAME_CASE(LBTR)
+ NODE_NAME_CASE(LBTS_RM)
+ NODE_NAME_CASE(LBTC_RM)
+ NODE_NAME_CASE(LBTR_RM)
+ NODE_NAME_CASE(AADD)
+ NODE_NAME_CASE(AOR)
+ NODE_NAME_CASE(AXOR)
+ NODE_NAME_CASE(AAND)
+ NODE_NAME_CASE(VZEXT_MOVL)
+ NODE_NAME_CASE(VZEXT_LOAD)
+ NODE_NAME_CASE(VEXTRACT_STORE)
+ NODE_NAME_CASE(VTRUNC)
+ NODE_NAME_CASE(VTRUNCS)
+ NODE_NAME_CASE(VTRUNCUS)
+ NODE_NAME_CASE(VMTRUNC)
+ NODE_NAME_CASE(VMTRUNCS)
+ NODE_NAME_CASE(VMTRUNCUS)
+ NODE_NAME_CASE(VTRUNCSTORES)
+ NODE_NAME_CASE(VTRUNCSTOREUS)
+ NODE_NAME_CASE(VMTRUNCSTORES)
+ NODE_NAME_CASE(VMTRUNCSTOREUS)
+ NODE_NAME_CASE(VFPEXT)
+ NODE_NAME_CASE(STRICT_VFPEXT)
+ NODE_NAME_CASE(VFPEXT_SAE)
+ NODE_NAME_CASE(VFPEXTS)
+ NODE_NAME_CASE(VFPEXTS_SAE)
+ NODE_NAME_CASE(VFPROUND)
+ NODE_NAME_CASE(VFPROUND2)
+ NODE_NAME_CASE(VFPROUND2_RND)
+ NODE_NAME_CASE(STRICT_VFPROUND)
+ NODE_NAME_CASE(VMFPROUND)
+ NODE_NAME_CASE(VFPROUND_RND)
+ NODE_NAME_CASE(VFPROUNDS)
+ NODE_NAME_CASE(VFPROUNDS_RND)
+ NODE_NAME_CASE(VSHLDQ)
+ NODE_NAME_CASE(VSRLDQ)
+ NODE_NAME_CASE(VSHL)
+ NODE_NAME_CASE(VSRL)
+ NODE_NAME_CASE(VSRA)
+ NODE_NAME_CASE(VSHLI)
+ NODE_NAME_CASE(VSRLI)
+ NODE_NAME_CASE(VSRAI)
+ NODE_NAME_CASE(VSHLV)
+ NODE_NAME_CASE(VSRLV)
+ NODE_NAME_CASE(VSRAV)
+ NODE_NAME_CASE(VROTLI)
+ NODE_NAME_CASE(VROTRI)
+ NODE_NAME_CASE(VPPERM)
+ NODE_NAME_CASE(CMPP)
+ NODE_NAME_CASE(STRICT_CMPP)
+ NODE_NAME_CASE(PCMPEQ)
+ NODE_NAME_CASE(PCMPGT)
+ NODE_NAME_CASE(PHMINPOS)
+ NODE_NAME_CASE(ADD)
+ NODE_NAME_CASE(SUB)
+ NODE_NAME_CASE(ADC)
+ NODE_NAME_CASE(SBB)
+ NODE_NAME_CASE(SMUL)
+ NODE_NAME_CASE(UMUL)
+ NODE_NAME_CASE(OR)
+ NODE_NAME_CASE(XOR)
+ NODE_NAME_CASE(AND)
+ NODE_NAME_CASE(BEXTR)
+ NODE_NAME_CASE(BEXTRI)
+ NODE_NAME_CASE(BZHI)
+ NODE_NAME_CASE(PDEP)
+ NODE_NAME_CASE(PEXT)
+ NODE_NAME_CASE(MUL_IMM)
+ NODE_NAME_CASE(MOVMSK)
+ NODE_NAME_CASE(PTEST)
+ NODE_NAME_CASE(TESTP)
+ NODE_NAME_CASE(KORTEST)
+ NODE_NAME_CASE(KTEST)
+ NODE_NAME_CASE(KADD)
+ NODE_NAME_CASE(KSHIFTL)
+ NODE_NAME_CASE(KSHIFTR)
+ NODE_NAME_CASE(PACKSS)
+ NODE_NAME_CASE(PACKUS)
+ NODE_NAME_CASE(PALIGNR)
+ NODE_NAME_CASE(VALIGN)
+ NODE_NAME_CASE(VSHLD)
+ NODE_NAME_CASE(VSHRD)
+ NODE_NAME_CASE(PSHUFD)
+ NODE_NAME_CASE(PSHUFHW)
+ NODE_NAME_CASE(PSHUFLW)
+ NODE_NAME_CASE(SHUFP)
+ NODE_NAME_CASE(SHUF128)
+ NODE_NAME_CASE(MOVLHPS)
+ NODE_NAME_CASE(MOVHLPS)
+ NODE_NAME_CASE(MOVDDUP)
+ NODE_NAME_CASE(MOVSHDUP)
+ NODE_NAME_CASE(MOVSLDUP)
+ NODE_NAME_CASE(MOVSD)
+ NODE_NAME_CASE(MOVSS)
+ NODE_NAME_CASE(MOVSH)
+ NODE_NAME_CASE(UNPCKL)
+ NODE_NAME_CASE(UNPCKH)
+ NODE_NAME_CASE(VBROADCAST)
+ NODE_NAME_CASE(VBROADCAST_LOAD)
+ NODE_NAME_CASE(VBROADCASTM)
+ NODE_NAME_CASE(SUBV_BROADCAST_LOAD)
+ NODE_NAME_CASE(VPERMILPV)
+ NODE_NAME_CASE(VPERMILPI)
+ NODE_NAME_CASE(VPERM2X128)
+ NODE_NAME_CASE(VPERMV)
+ NODE_NAME_CASE(VPERMV3)
+ NODE_NAME_CASE(VPERMI)
+ NODE_NAME_CASE(VPTERNLOG)
+ NODE_NAME_CASE(FP_TO_SINT_SAT)
+ NODE_NAME_CASE(FP_TO_UINT_SAT)
+ NODE_NAME_CASE(VFIXUPIMM)
+ NODE_NAME_CASE(VFIXUPIMM_SAE)
+ NODE_NAME_CASE(VFIXUPIMMS)
+ NODE_NAME_CASE(VFIXUPIMMS_SAE)
+ NODE_NAME_CASE(VRANGE)
+ NODE_NAME_CASE(VRANGE_SAE)
+ NODE_NAME_CASE(VRANGES)
+ NODE_NAME_CASE(VRANGES_SAE)
+ NODE_NAME_CASE(PMULUDQ)
+ NODE_NAME_CASE(PMULDQ)
+ NODE_NAME_CASE(PSADBW)
+ NODE_NAME_CASE(DBPSADBW)
+ NODE_NAME_CASE(VASTART_SAVE_XMM_REGS)
+ NODE_NAME_CASE(VAARG_64)
+ NODE_NAME_CASE(VAARG_X32)
+ NODE_NAME_CASE(DYN_ALLOCA)
+ NODE_NAME_CASE(MFENCE)
+ NODE_NAME_CASE(SEG_ALLOCA)
+ NODE_NAME_CASE(PROBED_ALLOCA)
+ NODE_NAME_CASE(RDRAND)
+ NODE_NAME_CASE(RDSEED)
+ NODE_NAME_CASE(RDPKRU)
+ NODE_NAME_CASE(WRPKRU)
+ NODE_NAME_CASE(VPMADDUBSW)
+ NODE_NAME_CASE(VPMADDWD)
+ NODE_NAME_CASE(VPSHA)
+ NODE_NAME_CASE(VPSHL)
+ NODE_NAME_CASE(VPCOM)
+ NODE_NAME_CASE(VPCOMU)
+ NODE_NAME_CASE(VPERMIL2)
+ NODE_NAME_CASE(FMSUB)
+ NODE_NAME_CASE(STRICT_FMSUB)
+ NODE_NAME_CASE(FNMADD)
+ NODE_NAME_CASE(STRICT_FNMADD)
+ NODE_NAME_CASE(FNMSUB)
+ NODE_NAME_CASE(STRICT_FNMSUB)
+ NODE_NAME_CASE(FMADDSUB)
+ NODE_NAME_CASE(FMSUBADD)
+ NODE_NAME_CASE(FMADD_RND)
+ NODE_NAME_CASE(FNMADD_RND)
+ NODE_NAME_CASE(FMSUB_RND)
+ NODE_NAME_CASE(FNMSUB_RND)
+ NODE_NAME_CASE(FMADDSUB_RND)
+ NODE_NAME_CASE(FMSUBADD_RND)
+ NODE_NAME_CASE(VFMADDC)
+ NODE_NAME_CASE(VFMADDC_RND)
+ NODE_NAME_CASE(VFCMADDC)
+ NODE_NAME_CASE(VFCMADDC_RND)
+ NODE_NAME_CASE(VFMULC)
+ NODE_NAME_CASE(VFMULC_RND)
+ NODE_NAME_CASE(VFCMULC)
+ NODE_NAME_CASE(VFCMULC_RND)
+ NODE_NAME_CASE(VFMULCSH)
+ NODE_NAME_CASE(VFMULCSH_RND)
+ NODE_NAME_CASE(VFCMULCSH)
+ NODE_NAME_CASE(VFCMULCSH_RND)
+ NODE_NAME_CASE(VFMADDCSH)
+ NODE_NAME_CASE(VFMADDCSH_RND)
+ NODE_NAME_CASE(VFCMADDCSH)
+ NODE_NAME_CASE(VFCMADDCSH_RND)
+ NODE_NAME_CASE(VPMADD52H)
+ NODE_NAME_CASE(VPMADD52L)
+ NODE_NAME_CASE(VRNDSCALE)
+ NODE_NAME_CASE(STRICT_VRNDSCALE)
+ NODE_NAME_CASE(VRNDSCALE_SAE)
+ NODE_NAME_CASE(VRNDSCALES)
+ NODE_NAME_CASE(VRNDSCALES_SAE)
+ NODE_NAME_CASE(VREDUCE)
+ NODE_NAME_CASE(VREDUCE_SAE)
+ NODE_NAME_CASE(VREDUCES)
+ NODE_NAME_CASE(VREDUCES_SAE)
+ NODE_NAME_CASE(VGETMANT)
+ NODE_NAME_CASE(VGETMANT_SAE)
+ NODE_NAME_CASE(VGETMANTS)
+ NODE_NAME_CASE(VGETMANTS_SAE)
+ NODE_NAME_CASE(PCMPESTR)
+ NODE_NAME_CASE(PCMPISTR)
+ NODE_NAME_CASE(XTEST)
+ NODE_NAME_CASE(COMPRESS)
+ NODE_NAME_CASE(EXPAND)
+ NODE_NAME_CASE(SELECTS)
+ NODE_NAME_CASE(ADDSUB)
+ NODE_NAME_CASE(RCP14)
+ NODE_NAME_CASE(RCP14S)
+ NODE_NAME_CASE(RSQRT14)
+ NODE_NAME_CASE(RSQRT14S)
+ NODE_NAME_CASE(FADD_RND)
+ NODE_NAME_CASE(FADDS)
+ NODE_NAME_CASE(FADDS_RND)
+ NODE_NAME_CASE(FSUB_RND)
+ NODE_NAME_CASE(FSUBS)
+ NODE_NAME_CASE(FSUBS_RND)
+ NODE_NAME_CASE(FMUL_RND)
+ NODE_NAME_CASE(FMULS)
+ NODE_NAME_CASE(FMULS_RND)
+ NODE_NAME_CASE(FDIV_RND)
+ NODE_NAME_CASE(FDIVS)
+ NODE_NAME_CASE(FDIVS_RND)
+ NODE_NAME_CASE(FSQRT_RND)
+ NODE_NAME_CASE(FSQRTS)
+ NODE_NAME_CASE(FSQRTS_RND)
+ NODE_NAME_CASE(FGETEXP)
+ NODE_NAME_CASE(FGETEXP_SAE)
+ NODE_NAME_CASE(FGETEXPS)
+ NODE_NAME_CASE(FGETEXPS_SAE)
+ NODE_NAME_CASE(SCALEF)
+ NODE_NAME_CASE(SCALEF_RND)
+ NODE_NAME_CASE(SCALEFS)
+ NODE_NAME_CASE(SCALEFS_RND)
+ NODE_NAME_CASE(MULHRS)
+ NODE_NAME_CASE(SINT_TO_FP_RND)
+ NODE_NAME_CASE(UINT_TO_FP_RND)
+ NODE_NAME_CASE(CVTTP2SI)
+ NODE_NAME_CASE(CVTTP2UI)
+ NODE_NAME_CASE(STRICT_CVTTP2SI)
+ NODE_NAME_CASE(STRICT_CVTTP2UI)
+ NODE_NAME_CASE(MCVTTP2SI)
+ NODE_NAME_CASE(MCVTTP2UI)
+ NODE_NAME_CASE(CVTTP2SI_SAE)
+ NODE_NAME_CASE(CVTTP2UI_SAE)
+ NODE_NAME_CASE(CVTTS2SI)
+ NODE_NAME_CASE(CVTTS2UI)
+ NODE_NAME_CASE(CVTTS2SI_SAE)
+ NODE_NAME_CASE(CVTTS2UI_SAE)
+ NODE_NAME_CASE(CVTSI2P)
+ NODE_NAME_CASE(CVTUI2P)
+ NODE_NAME_CASE(STRICT_CVTSI2P)
+ NODE_NAME_CASE(STRICT_CVTUI2P)
+ NODE_NAME_CASE(MCVTSI2P)
+ NODE_NAME_CASE(MCVTUI2P)
+ NODE_NAME_CASE(VFPCLASS)
+ NODE_NAME_CASE(VFPCLASSS)
+ NODE_NAME_CASE(MULTISHIFT)
+ NODE_NAME_CASE(SCALAR_SINT_TO_FP)
+ NODE_NAME_CASE(SCALAR_SINT_TO_FP_RND)
+ NODE_NAME_CASE(SCALAR_UINT_TO_FP)
+ NODE_NAME_CASE(SCALAR_UINT_TO_FP_RND)
+ NODE_NAME_CASE(CVTPS2PH)
+ NODE_NAME_CASE(STRICT_CVTPS2PH)
+ NODE_NAME_CASE(CVTPS2PH_SAE)
+ NODE_NAME_CASE(MCVTPS2PH)
+ NODE_NAME_CASE(MCVTPS2PH_SAE)
+ NODE_NAME_CASE(CVTPH2PS)
+ NODE_NAME_CASE(STRICT_CVTPH2PS)
+ NODE_NAME_CASE(CVTPH2PS_SAE)
+ NODE_NAME_CASE(CVTP2SI)
+ NODE_NAME_CASE(CVTP2UI)
+ NODE_NAME_CASE(MCVTP2SI)
+ NODE_NAME_CASE(MCVTP2UI)
+ NODE_NAME_CASE(CVTP2SI_RND)
+ NODE_NAME_CASE(CVTP2UI_RND)
+ NODE_NAME_CASE(CVTS2SI)
+ NODE_NAME_CASE(CVTS2UI)
+ NODE_NAME_CASE(CVTS2SI_RND)
+ NODE_NAME_CASE(CVTS2UI_RND)
+ NODE_NAME_CASE(CVTNEPS2BF16)
+ NODE_NAME_CASE(MCVTNEPS2BF16)
+ NODE_NAME_CASE(DPBF16PS)
+ NODE_NAME_CASE(DPFP16PS)
+ NODE_NAME_CASE(MPSADBW)
+ NODE_NAME_CASE(LWPINS)
+ NODE_NAME_CASE(MGATHER)
+ NODE_NAME_CASE(MSCATTER)
+ NODE_NAME_CASE(VPDPBUSD)
+ NODE_NAME_CASE(VPDPBUSDS)
+ NODE_NAME_CASE(VPDPWSSD)
+ NODE_NAME_CASE(VPDPWSSDS)
+ NODE_NAME_CASE(VPSHUFBITQMB)
+ NODE_NAME_CASE(GF2P8MULB)
+ NODE_NAME_CASE(GF2P8AFFINEQB)
+ NODE_NAME_CASE(GF2P8AFFINEINVQB)
+ NODE_NAME_CASE(NT_CALL)
+ NODE_NAME_CASE(NT_BRIND)
+ NODE_NAME_CASE(UMWAIT)
+ NODE_NAME_CASE(TPAUSE)
+ NODE_NAME_CASE(ENQCMD)
+ NODE_NAME_CASE(ENQCMDS)
+ NODE_NAME_CASE(VP2INTERSECT)
+ NODE_NAME_CASE(VPDPBSUD)
+ NODE_NAME_CASE(VPDPBSUDS)
+ NODE_NAME_CASE(VPDPBUUD)
+ NODE_NAME_CASE(VPDPBUUDS)
+ NODE_NAME_CASE(VPDPBSSD)
+ NODE_NAME_CASE(VPDPBSSDS)
+ NODE_NAME_CASE(VPDPWSUD)
+ NODE_NAME_CASE(VPDPWSUDS)
+ NODE_NAME_CASE(VPDPWUSD)
+ NODE_NAME_CASE(VPDPWUSDS)
+ NODE_NAME_CASE(VPDPWUUD)
+ NODE_NAME_CASE(VPDPWUUDS)
+ NODE_NAME_CASE(VMINMAX)
+ NODE_NAME_CASE(VMINMAX_SAE)
+ NODE_NAME_CASE(VMINMAXS)
+ NODE_NAME_CASE(VMINMAXS_SAE)
+ NODE_NAME_CASE(CVTP2IBS)
+ NODE_NAME_CASE(CVTP2IUBS)
+ NODE_NAME_CASE(CVTP2IBS_RND)
+ NODE_NAME_CASE(CVTP2IUBS_RND)
+ NODE_NAME_CASE(CVTTP2IBS)
+ NODE_NAME_CASE(CVTTP2IUBS)
+ NODE_NAME_CASE(CVTTP2IBS_SAE)
+ NODE_NAME_CASE(CVTTP2IUBS_SAE)
+ NODE_NAME_CASE(VCVT2PH2BF8)
+ NODE_NAME_CASE(VCVT2PH2BF8S)
+ NODE_NAME_CASE(VCVT2PH2HF8)
+ NODE_NAME_CASE(VCVT2PH2HF8S)
+ NODE_NAME_CASE(VCVTBIASPH2BF8)
+ NODE_NAME_CASE(VCVTBIASPH2BF8S)
+ NODE_NAME_CASE(VCVTBIASPH2HF8)
+ NODE_NAME_CASE(VCVTBIASPH2HF8S)
+ NODE_NAME_CASE(VCVTPH2BF8)
+ NODE_NAME_CASE(VCVTPH2BF8S)
+ NODE_NAME_CASE(VCVTPH2HF8)
+ NODE_NAME_CASE(VCVTPH2HF8S)
+ NODE_NAME_CASE(VMCVTBIASPH2BF8)
+ NODE_NAME_CASE(VMCVTBIASPH2BF8S)
+ NODE_NAME_CASE(VMCVTBIASPH2HF8)
+ NODE_NAME_CASE(VMCVTBIASPH2HF8S)
+ NODE_NAME_CASE(VMCVTPH2BF8)
+ NODE_NAME_CASE(VMCVTPH2BF8S)
+ NODE_NAME_CASE(VMCVTPH2HF8)
+ NODE_NAME_CASE(VMCVTPH2HF8S)
+ NODE_NAME_CASE(VCVTHF82PH)
+ NODE_NAME_CASE(AESENC128KL)
+ NODE_NAME_CASE(AESDEC128KL)
+ NODE_NAME_CASE(AESENC256KL)
+ NODE_NAME_CASE(AESDEC256KL)
+ NODE_NAME_CASE(AESENCWIDE128KL)
+ NODE_NAME_CASE(AESDECWIDE128KL)
+ NODE_NAME_CASE(AESENCWIDE256KL)
+ NODE_NAME_CASE(AESDECWIDE256KL)
+ NODE_NAME_CASE(CMPCCXADD)
+ NODE_NAME_CASE(TESTUI)
+ NODE_NAME_CASE(FP80_ADD)
+ NODE_NAME_CASE(STRICT_FP80_ADD)
+ NODE_NAME_CASE(CCMP)
+ NODE_NAME_CASE(CTEST)
+ NODE_NAME_CASE(CLOAD)
+ NODE_NAME_CASE(CSTORE)
+ NODE_NAME_CASE(CVTTS2SIS)
+ NODE_NAME_CASE(CVTTS2UIS)
+ NODE_NAME_CASE(CVTTS2SIS_SAE)
+ NODE_NAME_CASE(CVTTS2UIS_SAE)
+ NODE_NAME_CASE(CVTTP2SIS)
+ NODE_NAME_CASE(MCVTTP2SIS)
+ NODE_NAME_CASE(CVTTP2UIS_SAE)
+ NODE_NAME_CASE(CVTTP2SIS_SAE)
+ NODE_NAME_CASE(CVTTP2UIS)
+ NODE_NAME_CASE(MCVTTP2UIS)
+ NODE_NAME_CASE(POP_FROM_X87_REG)
}
return nullptr;
#undef NODE_NAME_CASE
@@ -35585,7 +35598,7 @@ bool X86TargetLowering::isLegalAddressingMode(const DataLayout &DL,
if (AM.HasBaseReg)
return false;
break;
- default: // Other stuff never works.
+ default: // Other stuff never works.
return false;
}
@@ -35690,12 +35703,13 @@ bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
if (Val.getOpcode() != ISD::LOAD)
return false;
- if (!VT1.isSimple() || !VT1.isInteger() ||
- !VT2.isSimple() || !VT2.isInteger())
+ if (!VT1.isSimple() || !VT1.isInteger() || !VT2.isSimple() ||
+ !VT2.isInteger())
return false;
switch (VT1.getSimpleVT().SimpleTy) {
- default: break;
+ default:
+ break;
case MVT::i8:
case MVT::i16:
case MVT::i32:
@@ -35926,8 +35940,10 @@ static MachineBasicBlock *emitXBegin(MachineInstr &MI, MachineBasicBlock *MBB,
// sinkMBB:
// DstReg := phi(mainDstReg/mainBB, fallDstReg/fallBB)
BuildMI(*sinkMBB, sinkMBB->begin(), MIMD, TII->get(X86::PHI), DstReg)
- .addReg(mainDstReg).addMBB(mainMBB)
- .addReg(fallDstReg).addMBB(fallMBB);
+ .addReg(mainDstReg)
+ .addMBB(mainMBB)
+ .addReg(fallDstReg)
+ .addMBB(fallMBB);
MI.eraseFromParent();
return sinkMBB;
@@ -35993,8 +36009,8 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
unsigned TotalNumXMMRegs = 8;
bool UseGPOffset = (ArgMode == 1);
bool UseFPOffset = (ArgMode == 2);
- unsigned MaxOffset = TotalNumIntRegs * 8 +
- (UseFPOffset ? TotalNumXMMRegs * 16 : 0);
+ unsigned MaxOffset =
+ TotalNumIntRegs * 8 + (UseFPOffset ? TotalNumXMMRegs * 16 : 0);
/* Align ArgSize to a multiple of 8 */
unsigned ArgSizeA8 = (ArgSize + 7) & ~7;
@@ -36072,13 +36088,14 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
// Check if there is enough room left to pull this argument.
BuildMI(thisMBB, MIMD, TII->get(X86::CMP32ri))
- .addReg(OffsetReg)
- .addImm(MaxOffset + 8 - ArgSizeA8);
+ .addReg(OffsetReg)
+ .addImm(MaxOffset + 8 - ArgSizeA8);
// Branch to "overflowMBB" if offset >= max
// Fall through to "offsetMBB" otherwise
BuildMI(thisMBB, MIMD, TII->get(X86::JCC_1))
- .addMBB(overflowMBB).addImm(X86::COND_AE);
+ .addMBB(overflowMBB)
+ .addImm(X86::COND_AE);
}
// In offsetMBB, emit code to use the reg_save_area.
@@ -36120,8 +36137,8 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
// Compute the offset for the next argument
Register NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass);
BuildMI(offsetMBB, MIMD, TII->get(X86::ADD32ri), NextOffsetReg)
- .addReg(OffsetReg)
- .addImm(UseFPOffset ? 16 : 8);
+ .addReg(OffsetReg)
+ .addImm(UseFPOffset ? 16 : 8);
// Store it back into the va_list.
BuildMI(offsetMBB, MIMD, TII->get(X86::MOV32mr))
@@ -36134,8 +36151,7 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
.setMemRefs(StoreOnlyMMO);
// Jump to endMBB
- BuildMI(offsetMBB, MIMD, TII->get(X86::JMP_1))
- .addMBB(endMBB);
+ BuildMI(offsetMBB, MIMD, TII->get(X86::JMP_1)).addMBB(endMBB);
}
//
@@ -36176,7 +36192,7 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
.addImm(~(uint64_t)(Alignment.value() - 1));
} else {
BuildMI(overflowMBB, MIMD, TII->get(TargetOpcode::COPY), OverflowDestReg)
- .addReg(OverflowAddrReg);
+ .addReg(OverflowAddrReg);
}
// Compute the next overflow address after this argument.
@@ -36202,10 +36218,11 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
// If we branched, emit the PHI to the front of endMBB.
if (offsetMBB) {
- BuildMI(*endMBB, endMBB->begin(), MIMD,
- TII->get(X86::PHI), DestReg)
- .addReg(OffsetDestReg).addMBB(offsetMBB)
- .addReg(OverflowDestReg).addMBB(overflowMBB);
+ BuildMI(*endMBB, endMBB->begin(), MIMD, TII->get(X86::PHI), DestReg)
+ .addReg(OffsetDestReg)
+ .addMBB(offsetMBB)
+ .addReg(OverflowDestReg)
+ .addMBB(overflowMBB);
}
// Erase the pseudo instruction
@@ -36220,8 +36237,8 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
// kill marker, and set it if it should. Returns the correct kill
// marker value.
static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr,
- MachineBasicBlock* BB,
- const TargetRegisterInfo* TRI) {
+ MachineBasicBlock *BB,
+ const TargetRegisterInfo *TRI) {
if (isPhysRegUsedAfter(X86::EFLAGS, SelectItr))
return false;
@@ -36688,11 +36705,21 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
//
// + ---- <- ------------ <- ------------- <- ------------ +
// | |
- // [free probe] -> [page alloc] -> [alloc probe] -> [tail alloc] + -> [dyn probe] -> [page alloc] -> [dyn probe] -> [tail alloc] +
- // | |
- // + <- ----------- <- ------------ <- ----------- <- ------------ +
+ // [free probe] -> [page alloc] -> [alloc probe] -> [tail alloc] + -> [dyn
+ // probe] -> [page alloc] -> [dyn probe] -> [tail alloc] +
+ // | |
+ // + <-
+ // -----------
+ // <-
+ // ------------
+ // <-
+ // -----------
+ // <-
+ // ------------
+ // +
//
- // The property we want to enforce is to never have more than [page alloc] between two probes.
+ // The property we want to enforce is to never have more than [page alloc]
+ // between two probes.
const unsigned XORMIOpc =
TFI.Uses64BitFramePtr ? X86::XOR64mi32 : X86::XOR32mi;
@@ -36784,56 +36811,61 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI,
// Add code to the main basic block to check if the stack limit has been hit,
// and if so, jump to mallocMBB otherwise to bumpMBB.
BuildMI(BB, MIMD, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg);
- BuildMI(BB, MIMD, TII->get(IsLP64 ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg)
- .addReg(tmpSPVReg).addReg(sizeVReg);
- BuildMI(BB, MIMD, TII->get(IsLP64 ? X86::CMP64mr:X86::CMP32mr))
- .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg)
- .addReg(SPLimitVReg);
+ BuildMI(BB, MIMD, TII->get(IsLP64 ? X86::SUB64rr : X86::SUB32rr), SPLimitVReg)
+ .addReg(tmpSPVReg)
+ .addReg(sizeVReg);
+ BuildMI(BB, MIMD, TII->get(IsLP64 ? X86::CMP64mr : X86::CMP32mr))
+ .addReg(0)
+ .addImm(1)
+ .addReg(0)
+ .addImm(TlsOffset)
+ .addReg(TlsReg)
+ .addReg(SPLimitVReg);
BuildMI(BB, MIMD, TII->get(X86::JCC_1)).addMBB(mallocMBB).addImm(X86::COND_G);
// bumpMBB simply decreases the stack pointer, since we know the current
// stacklet has enough space.
BuildMI(bumpMBB, MIMD, TII->get(TargetOpcode::COPY), physSPReg)
- .addReg(SPLimitVReg);
+ .addReg(SPLimitVReg);
BuildMI(bumpMBB, MIMD, TII->get(TargetOpcode::COPY), bumpSPPtrVReg)
- .addReg(SPLimitVReg);
+ .addReg(SPLimitVReg);
BuildMI(bumpMBB, MIMD, TII->get(X86::JMP_1)).addMBB(continueMBB);
// Calls into a routine in libgcc to allocate more space from the heap.
const uint32_t *RegMask =
Subtarget.getRegisterInfo()->getCallPreservedMask(*MF, CallingConv::C);
if (IsLP64) {
- BuildMI(mallocMBB, MIMD, TII->get(X86::MOV64rr), X86::RDI)
- .addReg(sizeVReg);
+ BuildMI(mallocMBB, MIMD, TII->get(X86::MOV64rr), X86::RDI).addReg(sizeVReg);
BuildMI(mallocMBB, MIMD, TII->get(X86::CALL64pcrel32))
- .addExternalSymbol("__morestack_allocate_stack_space")
- .addRegMask(RegMask)
- .addReg(X86::RDI, RegState::Implicit)
- .addReg(X86::RAX, RegState::ImplicitDefine);
+ .addExternalSymbol("__morestack_allocate_stack_space")
+ .addRegMask(RegMask)
+ .addReg(X86::RDI, RegState::Implicit)
+ .addReg(X86::RAX, RegState::ImplicitDefine);
} else if (Is64Bit) {
- BuildMI(mallocMBB, MIMD, TII->get(X86::MOV32rr), X86::EDI)
- .addReg(sizeVReg);
+ BuildMI(mallocMBB, MIMD, TII->get(X86::MOV32rr), X86::EDI).addReg(sizeVReg);
BuildMI(mallocMBB, MIMD, TII->get(X86::CALL64pcrel32))
- .addExternalSymbol("__morestack_allocate_stack_space")
- .addRegMask(RegMask)
- .addReg(X86::EDI, RegState::Implicit)
- .addReg(X86::EAX, RegState::ImplicitDefine);
+ .addExternalSymbol("__morestack_allocate_stack_space")
+ .addRegMask(RegMask)
+ .addReg(X86::EDI, RegState::Implicit)
+ .addReg(X86::EAX, RegState::ImplicitDefine);
} else {
- BuildMI(mallocMBB, MIMD, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg)
- .addImm(12);
+ BuildMI(mallocMBB, MIMD, TII->get(X86::SUB32ri), physSPReg)
+ .addReg(physSPReg)
+ .addImm(12);
BuildMI(mallocMBB, MIMD, TII->get(X86::PUSH32r)).addReg(sizeVReg);
BuildMI(mallocMBB, MIMD, TII->get(X86::CALLpcrel32))
- .addExternalSymbol("__morestack_allocate_stack_space")
- .addRegMask(RegMask)
- .addReg(X86::EAX, RegState::ImplicitDefine);
+ .addExternalSymbol("__morestack_allocate_stack_space")
+ .addRegMask(RegMask)
+ .addReg(X86::EAX, RegState::ImplicitDefine);
}
if (!Is64Bit)
- BuildMI(mallocMBB, MIMD, TII->get(X86::ADD32ri), physSPReg).addReg(physSPReg)
- .addImm(16);
+ BuildMI(mallocMBB, MIMD, TII->get(X86::ADD32ri), physSPReg)
+ .addReg(physSPReg)
+ .addImm(16);
BuildMI(mallocMBB, MIMD, TII->get(TargetOpcode::COPY), mallocPtrVReg)
- .addReg(IsLP64 ? X86::RAX : X86::EAX);
+ .addReg(IsLP64 ? X86::RAX : X86::EAX);
BuildMI(mallocMBB, MIMD, TII->get(X86::JMP_1)).addMBB(continueMBB);
// Set up the CFG correctly.
@@ -36888,7 +36920,8 @@ X86TargetLowering::EmitLoweredCatchRet(MachineInstr &MI,
RestoreMBB->setIsEHPad(true);
auto RestoreMBBI = RestoreMBB->begin();
- BuildMI(*RestoreMBB, RestoreMBBI, MIMD, TII.get(X86::JMP_4)).addMBB(TargetMBB);
+ BuildMI(*RestoreMBB, RestoreMBBI, MIMD, TII.get(X86::JMP_4))
+ .addMBB(TargetMBB);
return BB;
}
@@ -36910,9 +36943,10 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI,
// FIXME: The 32-bit calls have non-standard calling conventions. Use a
// proper register mask.
const uint32_t *RegMask =
- Subtarget.is64Bit() ?
- Subtarget.getRegisterInfo()->getDarwinTLSCallPreservedMask() :
- Subtarget.getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C);
+ Subtarget.is64Bit()
+ ? Subtarget.getRegisterInfo()->getDarwinTLSCallPreservedMask()
+ : Subtarget.getRegisterInfo()->getCallPreservedMask(*F,
+ CallingConv::C);
if (Subtarget.is64Bit()) {
MachineInstrBuilder MIB =
BuildMI(*BB, MI, MIMD, TII->get(X86::MOV64rm), X86::RDI)
@@ -37168,8 +37202,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
MemOpndSlot = CurOp;
MVT PVT = getPointerTy(MF->getDataLayout());
- assert((PVT == MVT::i64 || PVT == MVT::i32) &&
- "Invalid Pointer Size!");
+ assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
// For v = setjmp(buf), we generate
//
@@ -37217,19 +37250,19 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
LabelReg = MRI.createVirtualRegister(PtrRC);
if (Subtarget.is64Bit()) {
MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(X86::LEA64r), LabelReg)
- .addReg(X86::RIP)
- .addImm(0)
- .addReg(0)
- .addMBB(restoreMBB)
- .addReg(0);
+ .addReg(X86::RIP)
+ .addImm(0)
+ .addReg(0)
+ .addMBB(restoreMBB)
+ .addReg(0);
} else {
- const X86InstrInfo *XII = static_cast<const X86InstrInfo*>(TII);
+ const X86InstrInfo *XII = static_cast<const X86InstrInfo *>(TII);
MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(X86::LEA32r), LabelReg)
- .addReg(XII->getGlobalBaseReg(MF))
- .addImm(0)
- .addReg(0)
- .addMBB(restoreMBB, Subtarget.classifyBlockAddressReference())
- .addReg(0);
+ .addReg(XII->getGlobalBaseReg(MF))
+ .addImm(0)
+ .addReg(0)
+ .addMBB(restoreMBB, Subtarget.classifyBlockAddressReference())
+ .addReg(0);
}
} else
PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi;
@@ -37253,7 +37286,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
// Setup
MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(X86::EH_SjLj_Setup))
- .addMBB(restoreMBB);
+ .addMBB(restoreMBB);
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
MIB.addRegMask(RegInfo->getNoPreservedMask());
@@ -37280,9 +37313,9 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
Register FramePtr = RegInfo->getFrameRegister(*MF);
Register BasePtr = RegInfo->getBaseRegister();
unsigned Opm = Uses64BitFramePtr ? X86::MOV64rm : X86::MOV32rm;
- addRegOffset(BuildMI(restoreMBB, MIMD, TII->get(Opm), BasePtr),
- FramePtr, true, X86FI->getRestoreBasePointerOffset())
- .setMIFlag(MachineInstr::FrameSetup);
+ addRegOffset(BuildMI(restoreMBB, MIMD, TII->get(Opm), BasePtr), FramePtr,
+ true, X86FI->getRestoreBasePointerOffset())
+ .setMIFlag(MachineInstr::FrameSetup);
}
BuildMI(restoreMBB, MIMD, TII->get(X86::MOV32ri), restoreDstReg).addImm(1);
BuildMI(restoreMBB, MIMD, TII->get(X86::JMP_1)).addMBB(sinkMBB);
@@ -37365,9 +37398,9 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
if (PVT == MVT::i64) {
Register TmpZReg = MRI.createVirtualRegister(PtrRC);
BuildMI(checkSspMBB, MIMD, TII->get(X86::SUBREG_TO_REG), TmpZReg)
- .addImm(0)
- .addReg(ZReg)
- .addImm(X86::sub_32bit);
+ .addImm(0)
+ .addReg(ZReg)
+ .addImm(X86::sub_32bit);
ZReg = TmpZReg;
}
@@ -37498,11 +37531,10 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands());
MVT PVT = getPointerTy(MF->getDataLayout());
- assert((PVT == MVT::i64 || PVT == MVT::i32) &&
- "Invalid Pointer Size!");
+ assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
const TargetRegisterClass *RC =
- (PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass;
+ (PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass;
Register Tmp = MRI.createVirtualRegister(RC);
// Since FP is only updated here but NOT referenced, it's treated as GPR.
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
@@ -37883,7 +37915,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
const MIMetadata MIMD(MI);
auto TMMImmToTMMReg = [](unsigned Imm) {
- assert (Imm < 8 && "Illegal tmm index");
+ assert(Imm < 8 && "Illegal tmm index");
return X86::TMM0 + Imm;
};
switch (MI.getOpcode()) {
@@ -38019,29 +38051,30 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// OR 0b11 into bit 10 and 11. 0b11 is the encoding for round toward zero.
Register NewCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
BuildMI(*BB, MI, MIMD, TII->get(X86::OR32ri), NewCW)
- .addReg(OldCW, RegState::Kill).addImm(0xC00);
+ .addReg(OldCW, RegState::Kill)
+ .addImm(0xC00);
// Extract to 16 bits.
Register NewCW16 =
MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass);
BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), NewCW16)
- .addReg(NewCW, RegState::Kill, X86::sub_16bit);
+ .addReg(NewCW, RegState::Kill, X86::sub_16bit);
// Prepare memory for FLDCW.
int NewCWFrameIdx =
MF->getFrameInfo().CreateStackObject(2, Align(2), false);
addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::MOV16mr)),
NewCWFrameIdx)
- .addReg(NewCW16, RegState::Kill);
+ .addReg(NewCW16, RegState::Kill);
// Reload the modified control word now...
- addFrameReference(BuildMI(*BB, MI, MIMD,
- TII->get(X86::FLDCW16m)), NewCWFrameIdx);
+ addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::FLDCW16m)),
+ NewCWFrameIdx);
// Get the X86 opcode to use.
unsigned Opc;
switch (MI.getOpcode()) {
- // clang-format off
+ // clang-format off
default: llvm_unreachable("illegal opcode!");
case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break;
case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break;
@@ -38052,7 +38085,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break;
case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break;
case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break;
- // clang-format on
+ // clang-format on
}
X86AddressMode AM = getAddressFromInstr(&MI, 0);
@@ -38269,7 +38302,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::PTMMULTF32PS: {
unsigned Opc;
switch (MI.getOpcode()) {
- default: llvm_unreachable("illegal opcode!");
+ default:
+ llvm_unreachable("illegal opcode!");
// clang-format off
case X86::PTDPBSSD: Opc = X86::TDPBSSD; break;
case X86::PTDPBSUD: Opc = X86::TDPBSUD; break;
@@ -38316,7 +38350,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::PTILESTORED: {
unsigned Opc;
switch (MI.getOpcode()) {
- default: llvm_unreachable("illegal opcode!");
+ default:
+ llvm_unreachable("illegal opcode!");
#define GET_EGPR_IF_ENABLED(OPC) (Subtarget.hasEGPR() ? OPC##_EVEX : OPC)
case X86::PTILELOADD:
Opc = GET_EGPR_IF_ENABLED(X86::TILELOADD);
@@ -38438,11 +38473,9 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// X86 Optimization Hooks
//===----------------------------------------------------------------------===//
-bool
-X86TargetLowering::targetShrinkDemandedConstant(SDValue Op,
- const APInt &DemandedBits,
- const APInt &DemandedElts,
- TargetLoweringOpt &TLO) const {
+bool X86TargetLowering::targetShrinkDemandedConstant(
+ SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
+ TargetLoweringOpt &TLO) const {
EVT VT = Op.getValueType();
unsigned Opcode = Op.getOpcode();
unsigned EltSize = VT.getScalarSizeInBits();
@@ -38627,16 +38660,15 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
unsigned NumElts = DemandedElts.getBitWidth();
unsigned Opc = Op.getOpcode();
EVT VT = Op.getValueType();
- assert((Opc >= ISD::BUILTIN_OP_END ||
- Opc == ISD::INTRINSIC_WO_CHAIN ||
- Opc == ISD::INTRINSIC_W_CHAIN ||
- Opc == ISD::INTRINSIC_VOID) &&
+ assert((Opc >= ISD::BUILTIN_OP_END || Opc == ISD::INTRINSIC_WO_CHAIN ||
+ Opc == ISD::INTRINSIC_W_CHAIN || Opc == ISD::INTRINSIC_VOID) &&
"Should use MaskedValueIsZero if you don't know whether Op"
" is a target node!");
Known.resetAll();
switch (Opc) {
- default: break;
+ default:
+ break;
case X86ISD::MUL_IMM: {
KnownBits Known2;
Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
@@ -38865,7 +38897,7 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
- if (auto* Cst1 = dyn_cast<ConstantSDNode>(Op1)) {
+ if (auto *Cst1 = dyn_cast<ConstantSDNode>(Op1)) {
unsigned Shift = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 0);
unsigned Length = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 8);
@@ -39059,7 +39091,8 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
unsigned NumElts = VT.getVectorNumElements();
if (Mask.size() == NumElts) {
SmallVector<APInt, 2> DemandedOps(NumOps, APInt(NumElts, 0));
- Known.Zero.setAllBits(); Known.One.setAllBits();
+ Known.Zero.setAllBits();
+ Known.One.setAllBits();
for (unsigned i = 0; i != NumElts; ++i) {
if (!DemandedElts[i])
continue;
@@ -39204,16 +39237,18 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
case X86ISD::ANDNP: {
unsigned Tmp0 =
DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
- if (Tmp0 == 1) return 1; // Early out.
+ if (Tmp0 == 1)
+ return 1; // Early out.
unsigned Tmp1 =
DAG.ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
return std::min(Tmp0, Tmp1);
}
case X86ISD::CMOV: {
- unsigned Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), Depth+1);
- if (Tmp0 == 1) return 1; // Early out.
- unsigned Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ unsigned Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ if (Tmp0 == 1)
+ return 1; // Early out.
+ unsigned Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), Depth + 1);
return std::min(Tmp0, Tmp1);
}
}
@@ -39589,7 +39624,6 @@ static bool matchUnaryPermuteShuffle(MVT MaskVT, ArrayRef<int> Mask,
PermuteImm = (unsigned)ShiftAmt;
return true;
}
-
}
}
@@ -39649,7 +39683,8 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
// Attempt to match against either an unary or binary PACKSS/PACKUS shuffle.
if (((MaskVT == MVT::v8i16 || MaskVT == MVT::v16i8) && Subtarget.hasSSE2()) ||
- ((MaskVT == MVT::v16i16 || MaskVT == MVT::v32i8) && Subtarget.hasInt256()) ||
+ ((MaskVT == MVT::v16i16 || MaskVT == MVT::v32i8) &&
+ Subtarget.hasInt256()) ||
((MaskVT == MVT::v32i16 || MaskVT == MVT::v64i8) && Subtarget.hasBWI())) {
if (matchShuffleWithPACK(MaskVT, SrcVT, V1, V2, Shuffle, Mask, DAG,
Subtarget)) {
@@ -40208,9 +40243,9 @@ static SDValue combineX86ShuffleChain(
SDValue LHS = isInRange(Mask[0], 0, 2) ? V1 : V2;
SDValue RHS = isInRange(Mask[1], 0, 2) ? V1 : V2;
return DAG.getNode(X86ISD::VPERM2X128, DL, RootVT,
- CanonicalizeShuffleInput(RootVT, LHS),
- CanonicalizeShuffleInput(RootVT, RHS),
- DAG.getTargetConstant(PermMask, DL, MVT::i8));
+ CanonicalizeShuffleInput(RootVT, LHS),
+ CanonicalizeShuffleInput(RootVT, RHS),
+ DAG.getTargetConstant(PermMask, DL, MVT::i8));
}
}
}
@@ -40304,8 +40339,8 @@ static SDValue combineX86ShuffleChain(
}
if (matchUnaryPermuteShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
- AllowIntDomain, DAG, Subtarget, Shuffle, ShuffleVT,
- PermuteImm) &&
+ AllowIntDomain, DAG, Subtarget, Shuffle,
+ ShuffleVT, PermuteImm) &&
(!IsMaskedShuffle ||
(NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 0 && RootOpc == Shuffle)
@@ -41185,11 +41220,9 @@ static SDValue combineX86ShufflesConstants(MVT VT, ArrayRef<SDValue> Ops,
}
namespace llvm {
- namespace X86 {
- enum {
- MaxShuffleCombineDepth = 8
- };
- } // namespace X86
+namespace X86 {
+enum { MaxShuffleCombineDepth = 8 };
+} // namespace X86
} // namespace llvm
/// Fully generic combining of x86 shuffle instructions.
@@ -41593,7 +41626,8 @@ static SDValue combineX86ShufflesRecursively(
// The Op itself may be of different VT, so we need to scale the mask.
unsigned NumOpElts = Op.getValueType().getVectorNumElements();
- APInt OpScaledDemandedElts = APIntOps::ScaleBitMask(OpDemandedElts, NumOpElts);
+ APInt OpScaledDemandedElts =
+ APIntOps::ScaleBitMask(OpDemandedElts, NumOpElts);
// Can this operand be simplified any further, given it's demanded elements?
if (SDValue NewOp = TLI.SimplifyMultipleUseDemandedVectorElts(
@@ -42399,7 +42433,7 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
ISD::isNormalLoad(Src.getNode())) {
LoadSDNode *LN = cast<LoadSDNode>(Src);
SDVTList Tys = DAG.getVTList(VT, MVT::Other);
- SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
+ SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
SDValue BcastLd =
DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops,
LN->getMemoryVT(), LN->getMemOperand());
@@ -42431,7 +42465,7 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
// Unless its volatile or atomic.
if (LN->isSimple()) {
SDVTList Tys = DAG.getVTList(VT, MVT::Other);
- SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
+ SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
SDValue BcastLd = DAG.getMemIntrinsicNode(
X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MVT::i16,
LN->getPointerInfo(), LN->getBaseAlign(),
@@ -42449,7 +42483,7 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
LoadSDNode *LN = cast<LoadSDNode>(Src.getOperand(0));
if (LN->getMemoryVT().getSizeInBits() == 16) {
SDVTList Tys = DAG.getVTList(VT, MVT::Other);
- SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
+ SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
SDValue BcastLd =
DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops,
LN->getMemoryVT(), LN->getMemOperand());
@@ -42476,7 +42510,7 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
SDVTList Tys = DAG.getVTList(VT, MVT::Other);
SDValue Ptr = DAG.getMemBasePlusOffset(
LN->getBasePtr(), TypeSize::getFixed(Offset), DL);
- SDValue Ops[] = { LN->getChain(), Ptr };
+ SDValue Ops[] = {LN->getChain(), Ptr};
SDValue BcastLd = DAG.getMemIntrinsicNode(
X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MVT::i16,
LN->getPointerInfo().getWithOffset(Offset), LN->getBaseAlign(),
@@ -42494,7 +42528,7 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
MemSDNode *LN = cast<MemIntrinsicSDNode>(Src);
if (LN->getMemoryVT().getSizeInBits() == VT.getScalarSizeInBits()) {
SDVTList Tys = DAG.getVTList(VT, MVT::Other);
- SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
+ SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
SDValue BcastLd =
DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops,
LN->getMemoryVT(), LN->getMemOperand());
@@ -43003,13 +43037,13 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
if (Op1.getOpcode() == X86ISD::VBROADCAST_LOAD && Op1.hasOneUse()) {
auto *MemIntr = cast<MemIntrinsicSDNode>(Op1);
if (MemIntr->getMemoryVT().getScalarSizeInBits() == 32) {
- SDValue Load = DAG.getLoad(MVT::f32, DL, MemIntr->getChain(),
- MemIntr->getBasePtr(),
- MemIntr->getMemOperand());
- SDValue Insert = DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0,
- DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT,
- Load),
- DAG.getTargetConstant(InsertPSMask & 0x3f, DL, MVT::i8));
+ SDValue Load =
+ DAG.getLoad(MVT::f32, DL, MemIntr->getChain(),
+ MemIntr->getBasePtr(), MemIntr->getMemOperand());
+ SDValue Insert = DAG.getNode(
+ X86ISD::INSERTPS, DL, VT, Op0,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Load),
+ DAG.getTargetConstant(InsertPSMask & 0x3f, DL, MVT::i8));
DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), Load.getValue(1));
return Insert;
}
@@ -43163,8 +43197,8 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
if (Mask[0] == Mask[1] && Mask[2] == Mask[3] &&
(V.getOpcode() == X86ISD::PSHUFLW ||
V.getOpcode() == X86ISD::PSHUFHW) &&
- V.getOpcode() != N.getOpcode() &&
- V.hasOneUse() && V.getOperand(0).hasOneUse()) {
+ V.getOpcode() != N.getOpcode() && V.hasOneUse() &&
+ V.getOperand(0).hasOneUse()) {
SDValue D = peekThroughOneUseBitcasts(V.getOperand(0));
if (D.getOpcode() == X86ISD::PSHUFD) {
SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
@@ -43238,11 +43272,11 @@ static bool isAddSubOrSubAddMask(ArrayRef<int> Mask, bool &Op0Even) {
/// operation. If true is returned then the operands of ADDSUB(SUBADD) operation
/// are written to the parameters \p Opnd0 and \p Opnd1.
///
-/// We combine shuffle to ADDSUB(SUBADD) directly on the abstract vector shuffle nodes
-/// so it is easier to generically match. We also insert dummy vector shuffle
-/// nodes for the operands which explicitly discard the lanes which are unused
-/// by this operation to try to flow through the rest of the combiner
-/// the fact that they're unused.
+/// We combine shuffle to ADDSUB(SUBADD) directly on the abstract vector shuffle
+/// nodes so it is easier to generically match. We also insert dummy vector
+/// shuffle nodes for the operands which explicitly discard the lanes which are
+/// unused by this operation to try to flow through the rest of the combiner the
+/// fact that they're unused.
static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget,
SelectionDAG &DAG, SDValue &Opnd0, SDValue &Opnd1,
bool &IsSubAdd, bool &HasAllowContract) {
@@ -43276,13 +43310,15 @@ static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget,
// commute the FADD operands.
SDValue LHS, RHS;
if (V1.getOpcode() == ISD::FSUB) {
- LHS = V1->getOperand(0); RHS = V1->getOperand(1);
+ LHS = V1->getOperand(0);
+ RHS = V1->getOperand(1);
if ((V2->getOperand(0) != LHS || V2->getOperand(1) != RHS) &&
(V2->getOperand(0) != RHS || V2->getOperand(1) != LHS))
return false;
} else {
assert(V2.getOpcode() == ISD::FSUB && "Unexpected opcode");
- LHS = V2->getOperand(0); RHS = V2->getOperand(1);
+ LHS = V2->getOperand(0);
+ RHS = V2->getOperand(1);
if ((V1->getOperand(0) != LHS || V1->getOperand(1) != RHS) &&
(V1->getOperand(0) != RHS || V1->getOperand(1) != LHS))
return false;
@@ -43294,8 +43330,8 @@ static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget,
return false;
// It's a subadd if the vector in the even parity is an FADD.
- IsSubAdd = Op0Even ? V1->getOpcode() == ISD::FADD
- : V2->getOpcode() == ISD::FADD;
+ IsSubAdd =
+ Op0Even ? V1->getOpcode() == ISD::FADD : V2->getOpcode() == ISD::FADD;
HasAllowContract =
V1->getFlags().hasAllowContract() && V2->getFlags().hasAllowContract();
@@ -43584,7 +43620,8 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
// TODO: Multiply by zero.
- // If RHS/LHS elements are known zero then we don't need the LHS/RHS equivalent.
+ // If RHS/LHS elements are known zero then we don't need the LHS/RHS
+ // equivalent.
APInt DemandedLHSElts = DemandedSrcElts & ~RHSZero;
if (SimplifyDemandedVectorElts(LHS, DemandedLHSElts, LHSUndef, LHSZero, TLO,
Depth + 1))
@@ -44358,7 +44395,7 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
// For splats, unless we *only* demand the 0'th element,
// stop attempts at simplification here, we aren't going to improve things,
// this is better than any potential shuffle.
- if (!DemandedElts.isOne() && TLO.DAG.isSplatValue(Op, /*AllowUndefs*/false))
+ if (!DemandedElts.isOne() && TLO.DAG.isSplatValue(Op, /*AllowUndefs*/ false))
return false;
// Get target/faux shuffle mask.
@@ -44456,7 +44493,7 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
EVT VT = Op.getValueType();
unsigned BitWidth = OriginalDemandedBits.getBitWidth();
unsigned Opc = Op.getOpcode();
- switch(Opc) {
+ switch (Opc) {
case X86ISD::VTRUNC: {
KnownBits KnownOp;
SDValue Src = Op.getOperand(0);
@@ -44464,8 +44501,10 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
// Simplify the input, using demanded bit information.
APInt TruncMask = OriginalDemandedBits.zext(SrcVT.getScalarSizeInBits());
- APInt DemandedElts = OriginalDemandedElts.trunc(SrcVT.getVectorNumElements());
- if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, KnownOp, TLO, Depth + 1))
+ APInt DemandedElts =
+ OriginalDemandedElts.trunc(SrcVT.getVectorNumElements());
+ if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, KnownOp, TLO,
+ Depth + 1))
return true;
break;
}
@@ -44569,7 +44608,8 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
}
}
- // If we are only demanding sign bits then we can use the shift source directly.
+ // If we are only demanding sign bits then we can use the shift source
+ // directly.
unsigned NumSignBits =
TLO.DAG.ComputeNumSignBits(Op0, OriginalDemandedElts, Depth + 1);
unsigned UpperDemandedBits = BitWidth - OriginalDemandedBits.countr_zero();
@@ -44760,8 +44800,8 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
return true;
KnownBits KnownVec;
- if (SimplifyDemandedBits(Vec, DemandedVecBits, DemandedVecElts,
- KnownVec, TLO, Depth + 1))
+ if (SimplifyDemandedBits(Vec, DemandedVecBits, DemandedVecElts, KnownVec,
+ TLO, Depth + 1))
return true;
if (SDValue V = SimplifyMultipleUseDemandedBits(
@@ -45424,13 +45464,13 @@ static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
// Helper to flip between AND/OR/XOR opcodes and their X86ISD FP equivalents.
static unsigned getAltBitOpcode(unsigned Opcode) {
- switch(Opcode) {
- // clang-format off
+ switch (Opcode) {
+ // clang-format off
case ISD::AND: return X86ISD::FAND;
case ISD::OR: return X86ISD::FOR;
case ISD::XOR: return X86ISD::FXOR;
case X86ISD::ANDNP: return X86ISD::FANDN;
- // clang-format on
+ // clang-format on
}
llvm_unreachable("Unknown bitwise opcode");
}
@@ -45653,8 +45693,7 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
// Convert a vXi1 constant build vector to the same width scalar integer.
static SDValue combinevXi1ConstantToInteger(SDValue Op, SelectionDAG &DAG) {
EVT SrcVT = Op.getValueType();
- assert(SrcVT.getVectorElementType() == MVT::i1 &&
- "Expected a vXi1 vector");
+ assert(SrcVT.getVectorElementType() == MVT::i1 && "Expected a vXi1 vector");
assert(ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
"Expected a constant build vector");
@@ -45972,7 +46011,7 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
// and the vbroadcast_load are both integer or both fp. In some cases this
// will remove the bitcast entirely.
if (N0.getOpcode() == X86ISD::VBROADCAST_LOAD && N0.hasOneUse() &&
- VT.isFloatingPoint() != SrcVT.isFloatingPoint() && VT.isVector()) {
+ VT.isFloatingPoint() != SrcVT.isFloatingPoint() && VT.isVector()) {
auto *BCast = cast<MemIntrinsicSDNode>(N0);
unsigned SrcVTSize = SrcVT.getScalarSizeInBits();
unsigned MemSize = BCast->getMemoryVT().getScalarSizeInBits();
@@ -45985,7 +46024,7 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
LoadVT = MVT::getVectorVT(LoadVT, SrcVT.getVectorNumElements());
SDVTList Tys = DAG.getVTList(LoadVT, MVT::Other);
- SDValue Ops[] = { BCast->getChain(), BCast->getBasePtr() };
+ SDValue Ops[] = {BCast->getChain(), BCast->getBasePtr()};
SDValue ResNode =
DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, SDLoc(N), Tys, Ops,
MemVT, BCast->getMemOperand());
@@ -46035,7 +46074,7 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
bool LowUndef = true, AllUndefOrZero = true;
for (unsigned i = 1, e = SrcVT.getVectorNumElements(); i != e; ++i) {
SDValue Op = N0.getOperand(i);
- LowUndef &= Op.isUndef() || (i >= e/2);
+ LowUndef &= Op.isUndef() || (i >= e / 2);
AllUndefOrZero &= isNullConstantOrUndef(Op);
}
if (AllUndefOrZero) {
@@ -46077,8 +46116,8 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
// Try to remove a bitcast of constant vXi1 vector. We have to legalize
// most of these to scalar anyway.
- if (Subtarget.hasAVX512() && VT.isScalarInteger() &&
- SrcVT.isVector() && SrcVT.getVectorElementType() == MVT::i1 &&
+ if (Subtarget.hasAVX512() && VT.isScalarInteger() && SrcVT.isVector() &&
+ SrcVT.getVectorElementType() == MVT::i1 &&
ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
return combinevXi1ConstantToInteger(N0, DAG);
}
@@ -46096,8 +46135,8 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
// Look for MOVMSK that is maybe truncated and then bitcasted to vXi1.
// Turn it into a sign bit compare that produces a k-register. This avoids
// a trip through a GPR.
- if (Subtarget.hasAVX512() && SrcVT.isScalarInteger() &&
- VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
+ if (Subtarget.hasAVX512() && SrcVT.isScalarInteger() && VT.isVector() &&
+ VT.getVectorElementType() == MVT::i1 &&
isPowerOf2_32(VT.getVectorNumElements())) {
unsigned NumElts = VT.getVectorNumElements();
SDValue Src = N0;
@@ -46151,12 +46190,12 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
// transferring the SSE operand to integer register and back.
unsigned FPOpcode;
switch (N0.getOpcode()) {
- // clang-format off
+ // clang-format off
case ISD::AND: FPOpcode = X86ISD::FAND; break;
case ISD::OR: FPOpcode = X86ISD::FOR; break;
case ISD::XOR: FPOpcode = X86ISD::FXOR; break;
default: return SDValue();
- // clang-format on
+ // clang-format on
}
// Check if we have a bitcast from another integer type as well.
@@ -46257,7 +46296,7 @@ static SDValue createVPDPBUSD(SelectionDAG &DAG, SDValue LHS, SDValue RHS,
// Actually build the DotProduct, split as 256/512 bits for
// AVXVNNI/AVX512VNNI.
auto DpBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
- ArrayRef<SDValue> Ops) {
+ ArrayRef<SDValue> Ops) {
MVT VT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32);
return DAG.getNode(X86ISD::VPDPBUSD, DL, VT, Ops);
};
@@ -46372,7 +46411,8 @@ static SDValue combineMinMaxReduction(SDNode *Extract, SelectionDAG &DAG,
DAG.getVectorIdxConstant(0, DL));
}
-// Attempt to replace an all_of/any_of/parity style horizontal reduction with a MOVMSK.
+// Attempt to replace an all_of/any_of/parity style horizontal reduction with a
+// MOVMSK.
static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
// Bail without SSE2.
@@ -46647,9 +46687,9 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
if (Stages > 3) {
unsigned SadElems = SadVT.getVectorNumElements();
- for(unsigned i = Stages - 3; i > 0; --i) {
+ for (unsigned i = Stages - 3; i > 0; --i) {
SmallVector<int, 16> Mask(SadElems, -1);
- for(unsigned j = 0, MaskEnd = 1 << (i - 1); j < MaskEnd; ++j)
+ for (unsigned j = 0, MaskEnd = 1 << (i - 1); j < MaskEnd; ++j)
Mask[j] = MaskEnd + j;
SDValue Shuffle =
@@ -46965,10 +47005,10 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG,
SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
Vec.getOperand(0).getValueType().getScalarType(),
Vec.getOperand(0), Index);
- SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
- Vec.getOperand(1), Index);
- SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
- Vec.getOperand(2), Index);
+ SDValue Ext1 =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Vec.getOperand(1), Index);
+ SDValue Ext2 =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Vec.getOperand(2), Index);
return DAG.getNode(ISD::SELECT, DL, VT, Ext0, Ext1, Ext2);
}
@@ -47248,8 +47288,9 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
return DAG.getConstant(EltBits[Idx].zext(NumEltBits), dl, VT);
}
- // Convert extract_element(bitcast(<X x i1>) -> bitcast(extract_subvector()).
- // Improves lowering of bool masks on rust which splits them into byte array.
+ // Convert extract_element(bitcast(<X x i1>) ->
+ // bitcast(extract_subvector()). Improves lowering of bool masks on rust
+ // which splits them into byte array.
if (InputVector.getOpcode() == ISD::BITCAST && (NumEltBits % 8) == 0) {
SDValue Src = peekThroughBitcasts(InputVector);
if (Src.getValueType().getScalarType() == MVT::i1 &&
@@ -47599,8 +47640,7 @@ static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
SDValue Cond = N->getOperand(0);
- if ((N->getOpcode() != ISD::VSELECT &&
- N->getOpcode() != X86ISD::BLENDV) ||
+ if ((N->getOpcode() != ISD::VSELECT && N->getOpcode() != X86ISD::BLENDV) ||
ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
return SDValue();
@@ -47882,7 +47922,8 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// Check for x CC y ? x : y.
if (DAG.isEqualTo(LHS, Op0) && DAG.isEqualTo(RHS, Op1)) {
switch (CC) {
- default: break;
+ default:
+ break;
case ISD::SETULT:
// Converting this to a min would handle NaNs incorrectly, and swapping
// the operands would cause it to handle comparisons between positive
@@ -47947,10 +47988,11 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
Opcode = X86ISD::FMAX;
break;
}
- // Check for x CC y ? y : x -- a min/max with reversed arms.
+ // Check for x CC y ? y : x -- a min/max with reversed arms.
} else if (DAG.isEqualTo(LHS, Op1) && DAG.isEqualTo(RHS, Op0)) {
switch (CC) {
- default: break;
+ default:
+ break;
case ISD::SETOGE:
// Converting this to a min would handle comparisons between positive
// and negative zero incorrectly, and swapping the operands would
@@ -48154,13 +48196,13 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
Cond1 == InnerSetCC.getOperand(1)) {
ISD::CondCode NewCC;
switch (CC == ISD::SETEQ ? InnerCC : CC) {
- // clang-format off
+ // clang-format off
case ISD::SETGT: NewCC = ISD::SETGE; break;
case ISD::SETLT: NewCC = ISD::SETLE; break;
case ISD::SETUGT: NewCC = ISD::SETUGE; break;
case ISD::SETULT: NewCC = ISD::SETULE; break;
default: NewCC = ISD::SETCC_INVALID; break;
- // clang-format on
+ // clang-format on
}
if (NewCC != ISD::SETCC_INVALID) {
Cond = DAG.getSetCC(DL, CondVT, Cond0, Cond1, NewCC);
@@ -48330,9 +48372,9 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// 16-bit lacks a proper blendv.
unsigned EltBitWidth = VT.getScalarSizeInBits();
bool CanShiftBlend =
- TLI.isTypeLegal(VT) && ((Subtarget.hasAVX() && EltBitWidth == 32) ||
- (Subtarget.hasAVX2() && EltBitWidth == 64) ||
- (Subtarget.hasXOP()));
+ TLI.isTypeLegal(VT) &&
+ ((Subtarget.hasAVX() && EltBitWidth == 32) ||
+ (Subtarget.hasAVX2() && EltBitWidth == 64) || (Subtarget.hasXOP()));
if (CanShiftBlend &&
ISD::matchUnaryPredicate(And.getOperand(1), [](ConstantSDNode *C) {
return C->getAPIntValue().isPowerOf2();
@@ -48571,7 +48613,7 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
SDValue Op2 = Cmp.getOperand(1);
SDValue SetCC;
- const ConstantSDNode* C = nullptr;
+ const ConstantSDNode *C = nullptr;
bool needOppositeCond = (CC == X86::COND_E);
bool checkAgainstTrue = false; // Is it a comparison against 1?
@@ -48592,8 +48634,7 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
bool truncatedToBoolWithAnd = false;
// Skip (zext $x), (trunc $x), or (and $x, 1) node.
while (SetCC.getOpcode() == ISD::ZERO_EXTEND ||
- SetCC.getOpcode() == ISD::TRUNCATE ||
- SetCC.getOpcode() == ISD::AND) {
+ SetCC.getOpcode() == ISD::TRUNCATE || SetCC.getOpcode() == ISD::AND) {
if (SetCC.getOpcode() == ISD::AND) {
int OpIdx = -1;
if (isOneConstant(SetCC.getOperand(0)))
@@ -48636,13 +48677,13 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
if (!FVal) {
SDValue Op = SetCC.getOperand(0);
// Skip 'zext' or 'trunc' node.
- if (Op.getOpcode() == ISD::ZERO_EXTEND ||
- Op.getOpcode() == ISD::TRUNCATE)
+ if (Op.getOpcode() == ISD::ZERO_EXTEND || Op.getOpcode() == ISD::TRUNCATE)
Op = Op.getOperand(0);
// A special case for rdrand/rdseed, where 0 is set if false cond is
// found.
if ((Op.getOpcode() != X86ISD::RDRAND &&
- Op.getOpcode() != X86ISD::RDSEED) || Op.getResNo() != 0)
+ Op.getOpcode() != X86ISD::RDSEED) ||
+ Op.getResNo() != 0)
return SDValue();
}
// Quit if false value is not the constant 0 or 1.
@@ -48687,7 +48728,8 @@ static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0,
SDValue SetCC0, SetCC1;
switch (Cond->getOpcode()) {
- default: return false;
+ default:
+ return false;
case ISD::AND:
case X86ISD::AND:
isAnd = true;
@@ -48752,8 +48794,7 @@ static SDValue combineCarryThroughADD(SDValue EFLAGS, SelectionDAG &DAG) {
}
// If this is a check of the z flag of an add with 1, switch to the
// C flag.
- if (CarryCC == X86::COND_E &&
- CarryOp1.getOpcode() == X86ISD::ADD &&
+ if (CarryCC == X86::COND_E && CarryOp1.getOpcode() == X86ISD::ADD &&
isOneConstant(CarryOp1.getOperand(1)))
return CarryOp1;
} else if (FoundAndLSB) {
@@ -49286,12 +49327,11 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
// Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst. This is efficient
// for any integer data type, including i8/i16.
- if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
+ if (FalseC->getAPIntValue() + 1 == TrueC->getAPIntValue()) {
Cond = getSETCC(CC, Cond, DL, DAG);
// Zero extend the condition if needed.
- Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
- FalseC->getValueType(0), Cond);
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), Cond);
Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
SDValue(FalseC, 0));
return Cond;
@@ -49307,24 +49347,25 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
bool isFastMultiplier = false;
if (Diff.ult(10)) {
switch (Diff.getZExtValue()) {
- default: break;
- case 1: // result = add base, cond
- case 2: // result = lea base( , cond*2)
- case 3: // result = lea base(cond, cond*2)
- case 4: // result = lea base( , cond*4)
- case 5: // result = lea base(cond, cond*4)
- case 8: // result = lea base( , cond*8)
- case 9: // result = lea base(cond, cond*8)
+ default:
+ break;
+ case 1: // result = add base, cond
+ case 2: // result = lea base( , cond*2)
+ case 3: // result = lea base(cond, cond*2)
+ case 4: // result = lea base( , cond*4)
+ case 5: // result = lea base(cond, cond*4)
+ case 8: // result = lea base( , cond*8)
+ case 9: // result = lea base(cond, cond*8)
isFastMultiplier = true;
break;
}
}
if (isFastMultiplier) {
- Cond = getSETCC(CC, Cond, DL ,DAG);
+ Cond = getSETCC(CC, Cond, DL, DAG);
// Zero extend the condition if needed.
- Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
- Cond);
+ Cond =
+ DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), Cond);
// Scale the condition by the difference.
if (Diff != 1)
Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
@@ -49892,11 +49933,8 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
}
// Optimization 2: Use VPMADD52L (52-bit multiply-add).
- // On targets with slow VPMULLQ (e.g., Ice Lake), VPMADD52L is significantly
- // faster (lower latency/better throughput).
- // VPMADD52L performs (A * B) + C. We can use it for pure multiplication if
- // the operands fit within 52 bits (top 12 bits are zero) by setting the
- // accumulator (C) to zero.
+ // On targets with slow VPMULLQ (e.g., Ice Lake),
+ // VPMADD52L is significantly faster (lower latency/better throughput).
if (Subtarget.hasAVX512() && Subtarget.hasIFMA()) {
if (Count0 >= 12 && Count1 >= 12) {
SDValue Zero = getZeroVector(VT.getSimpleVT(), Subtarget, DAG, DL);
@@ -50150,8 +50188,7 @@ static SDValue combineShiftLeft(SDNode *N, SelectionDAG &DAG,
// fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2))
// since the result of setcc_c is all zero's or all ones.
- if (VT.isInteger() && !VT.isVector() &&
- N1C && N0.getOpcode() == ISD::AND &&
+ if (VT.isInteger() && !VT.isVector() && N1C && N0.getOpcode() == ISD::AND &&
N0.getOperand(1).getOpcode() == ISD::Constant) {
SDValue N00 = N0.getOperand(0);
APInt Mask = N0.getConstantOperandAPInt(1);
@@ -50235,7 +50272,7 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG,
if (SraConst.isNegative())
return SDValue();
- for (MVT SVT : { MVT::i8, MVT::i16, MVT::i32 }) {
+ for (MVT SVT : {MVT::i8, MVT::i16, MVT::i32}) {
unsigned ShiftSize = SVT.getSizeInBits();
// Only deal with (Size - ShlConst) being equal to 8, 16 or 32.
if (ShiftSize >= Size || ShlConst != Size - ShiftSize)
@@ -50569,8 +50606,8 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
// Try to combine a PACKUSWB/PACKSSWB implemented truncate with a regular
// truncate to create a larger truncate.
- if (Subtarget.hasAVX512() &&
- N0.getOpcode() == ISD::TRUNCATE && N1.isUndef() && VT == MVT::v16i8 &&
+ if (Subtarget.hasAVX512() && N0.getOpcode() == ISD::TRUNCATE &&
+ N1.isUndef() && VT == MVT::v16i8 &&
N0.getOperand(0).getValueType() == MVT::v8i32) {
if ((IsSigned && DAG.ComputeNumSignBits(N0) > 8) ||
(!IsSigned &&
@@ -50917,7 +50954,7 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
SDValue CMP00 = CMP0->getOperand(0);
SDValue CMP01 = CMP0->getOperand(1);
- EVT VT = CMP00.getValueType();
+ EVT VT = CMP00.getValueType();
if (VT == MVT::f32 || VT == MVT::f64 ||
(VT == MVT::f16 && Subtarget.hasFP16())) {
@@ -50943,8 +50980,10 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
}
if (!ExpectingFlags) {
- enum X86::CondCode cc0 = (enum X86::CondCode)N0.getConstantOperandVal(0);
- enum X86::CondCode cc1 = (enum X86::CondCode)N1.getConstantOperandVal(0);
+ enum X86::CondCode cc0 =
+ (enum X86::CondCode)N0.getConstantOperandVal(0);
+ enum X86::CondCode cc1 =
+ (enum X86::CondCode)N1.getConstantOperandVal(0);
if (cc1 == X86::COND_E || cc1 == X86::COND_NE) {
X86::CondCode tmp = cc0;
@@ -50952,7 +50991,7 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
cc1 = tmp;
}
- if ((cc0 == X86::COND_E && cc1 == X86::COND_NP) ||
+ if ((cc0 == X86::COND_E && cc1 == X86::COND_NP) ||
(cc0 == X86::COND_NE && cc1 == X86::COND_P)) {
// FIXME: need symbolic constants for these magic numbers.
// See X86ATTInstPrinter.cpp:printSSECC().
@@ -50962,7 +51001,8 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CMP00, CMP01,
DAG.getTargetConstant(x86cc, DL, MVT::i8));
// Need to fill with zeros to ensure the bitcast will produce zeroes
- // for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that.
+ // for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee
+ // that.
SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v16i1,
DAG.getConstant(0, DL, MVT::v16i1),
FSetCC, DAG.getVectorIdxConstant(0, DL));
@@ -50994,8 +51034,8 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
SDValue OnesOrZeroesI = DAG.getBitcast(IntVT, OnesOrZeroesF);
SDValue ANDed = DAG.getNode(ISD::AND, DL, IntVT, OnesOrZeroesI,
DAG.getConstant(1, DL, IntVT));
- SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
- ANDed);
+ SDValue OneBitOfTruth =
+ DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ANDed);
return OneBitOfTruth;
}
}
@@ -51190,7 +51230,8 @@ static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL,
assert(VT.isVector() && "Expected vector type");
assert((N.getOpcode() == ISD::ANY_EXTEND ||
N.getOpcode() == ISD::ZERO_EXTEND ||
- N.getOpcode() == ISD::SIGN_EXTEND) && "Invalid Node");
+ N.getOpcode() == ISD::SIGN_EXTEND) &&
+ "Invalid Node");
SDValue Narrow = N.getOperand(0);
EVT NarrowVT = Narrow.getValueType();
@@ -51200,26 +51241,27 @@ static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL,
if (!Op)
return SDValue();
switch (N.getOpcode()) {
- default: llvm_unreachable("Unexpected opcode");
+ default:
+ llvm_unreachable("Unexpected opcode");
case ISD::ANY_EXTEND:
return Op;
case ISD::ZERO_EXTEND:
return DAG.getZeroExtendInReg(Op, DL, NarrowVT);
case ISD::SIGN_EXTEND:
- return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT,
- Op, DAG.getValueType(NarrowVT));
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
+ DAG.getValueType(NarrowVT));
}
}
static unsigned convertIntLogicToFPLogicOpcode(unsigned Opcode) {
unsigned FPOpcode;
switch (Opcode) {
- // clang-format off
+ // clang-format off
default: llvm_unreachable("Unexpected input node for FP logic conversion");
case ISD::AND: FPOpcode = X86ISD::FAND; break;
case ISD::OR: FPOpcode = X86ISD::FOR; break;
case ISD::XOR: FPOpcode = X86ISD::FXOR; break;
- // clang-format on
+ // clang-format on
}
return FPOpcode;
}
@@ -51662,8 +51704,7 @@ static SDValue combineScalarAndWithMaskSetcc(SDNode *N, SelectionDAG &DAG,
SmallVector<SDValue, 4> Ops(Src.getNumOperands(),
DAG.getConstant(0, dl, SubVecVT));
Ops[0] = SubVec;
- SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, SrcVT,
- Ops);
+ SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, SrcVT, Ops);
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcVT.getSizeInBits());
return DAG.getZExtOrTrunc(DAG.getBitcast(IntVT, Concat), dl, VT);
}
@@ -52012,7 +52053,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget))
return R;
- if (SDValue R = combineAndNotIntoANDNP(N, dl ,DAG))
+ if (SDValue R = combineAndNotIntoANDNP(N, dl, DAG))
return R;
if (SDValue ShiftRight = combineAndMaskToShift(N, dl, DAG, Subtarget))
@@ -52788,7 +52829,8 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
if (NotCond) {
SDValue R = DAG.getZExtOrTrunc(NotCond, dl, VT);
- R = DAG.getNode(ISD::MUL, dl, VT, R, DAG.getConstant(Val + 1, dl, VT));
+ R = DAG.getNode(ISD::MUL, dl, VT, R,
+ DAG.getConstant(Val + 1, dl, VT));
R = DAG.getNode(ISD::SUB, dl, VT, R, DAG.getConstant(1, dl, VT));
return R;
}
@@ -52925,7 +52967,7 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
return SDValue();
switch (VT.getSimpleVT().SimpleTy) {
- // clang-format off
+ // clang-format off
default: return SDValue();
case MVT::v16i8:
case MVT::v8i16:
@@ -53055,8 +53097,8 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL,
// split across two registers. We can use a packusdw+perm to clamp to 0-65535
// and concatenate at the same time. Then we can use a final vpmovuswb to
// clip to 0-255.
- if (Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
- InVT == MVT::v16i32 && VT == MVT::v16i8) {
+ if (Subtarget.hasBWI() && !Subtarget.useAVX512Regs() && InVT == MVT::v16i32 &&
+ VT == MVT::v16i8) {
if (SDValue USatVal = detectSSatPattern(In, VT, true)) {
// Emit a VPACKUSDW+VPERMQ followed by a VPMOVUSWB.
SDValue Mid = truncateVectorWithPACK(X86ISD::PACKUS, MVT::v16i16, USatVal,
@@ -53072,11 +53114,12 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL,
// FIXME: We could widen truncates to 512 to remove the VLX restriction.
// If the result type is 256-bits or larger and we have disable 512-bit
// registers, we should go ahead and use the pack instructions if possible.
- bool PreferAVX512 = ((Subtarget.hasAVX512() && InSVT == MVT::i32) ||
- (Subtarget.hasBWI() && InSVT == MVT::i16)) &&
- (InVT.getSizeInBits() > 128) &&
- (Subtarget.hasVLX() || InVT.getSizeInBits() > 256) &&
- !(!Subtarget.useAVX512Regs() && VT.getSizeInBits() >= 256);
+ bool PreferAVX512 =
+ ((Subtarget.hasAVX512() && InSVT == MVT::i32) ||
+ (Subtarget.hasBWI() && InSVT == MVT::i16)) &&
+ (InVT.getSizeInBits() > 128) &&
+ (Subtarget.hasVLX() || InVT.getSizeInBits() > 256) &&
+ !(!Subtarget.useAVX512Regs() && VT.getSizeInBits() >= 256);
if (!PreferAVX512 && VT.getVectorNumElements() > 1 &&
isPowerOf2_32(VT.getVectorNumElements()) &&
@@ -53089,8 +53132,8 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL,
SDValue Mid = truncateVectorWithPACK(X86ISD::PACKSS, MidVT, USatVal, DL,
DAG, Subtarget);
assert(Mid && "Failed to pack!");
- SDValue V = truncateVectorWithPACK(X86ISD::PACKUS, VT, Mid, DL, DAG,
- Subtarget);
+ SDValue V =
+ truncateVectorWithPACK(X86ISD::PACKUS, VT, Mid, DL, DAG, Subtarget);
assert(V && "Failed to pack!");
return V;
} else if (SVT == MVT::i8 || Subtarget.hasSSE41())
@@ -53414,10 +53457,9 @@ reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG,
CastVT = VT.changeVectorElementType(EltVT);
}
- SDValue Load =
- DAG.getLoad(EltVT, DL, ML->getChain(), Addr,
- ML->getPointerInfo().getWithOffset(Offset),
- Alignment, ML->getMemOperand()->getFlags());
+ SDValue Load = DAG.getLoad(EltVT, DL, ML->getChain(), Addr,
+ ML->getPointerInfo().getWithOffset(Offset),
+ Alignment, ML->getMemOperand()->getFlags());
SDValue PassThru = DAG.getBitcast(CastVT, ML->getPassThru());
@@ -53448,8 +53490,8 @@ combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG,
if (LoadFirstElt && LoadLastElt) {
SDValue VecLd = DAG.getLoad(VT, DL, ML->getChain(), ML->getBasePtr(),
ML->getMemOperand());
- SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), VecLd,
- ML->getPassThru());
+ SDValue Blend =
+ DAG.getSelect(DL, VT, ML->getMask(), VecLd, ML->getPassThru());
return DCI.CombineTo(ML, Blend, VecLd.getValue(1), true);
}
@@ -53471,8 +53513,8 @@ combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG,
VT, DL, ML->getChain(), ML->getBasePtr(), ML->getOffset(), ML->getMask(),
DAG.getUNDEF(VT), ML->getMemoryVT(), ML->getMemOperand(),
ML->getAddressingMode(), ML->getExtensionType());
- SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), NewML,
- ML->getPassThru());
+ SDValue Blend =
+ DAG.getSelect(DL, VT, ML->getMask(), NewML, ML->getPassThru());
return DCI.CombineTo(ML, Blend, NewML.getValue(1), true);
}
@@ -53552,8 +53594,8 @@ static SDValue reduceMaskedStoreToScalarStore(MaskedStoreSDNode *MS,
// Store that element at the appropriate offset from the base pointer.
return DAG.getStore(MS->getChain(), DL, Extract, Addr,
- MS->getPointerInfo().getWithOffset(Offset),
- Alignment, MS->getMemOperand()->getFlags());
+ MS->getPointerInfo().getWithOffset(Offset), Alignment,
+ MS->getMemOperand()->getFlags());
}
static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
@@ -53791,15 +53833,16 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
// Turn vXi1 stores of constants into a scalar store.
if ((VT == MVT::v8i1 || VT == MVT::v16i1 || VT == MVT::v32i1 ||
- VT == MVT::v64i1) && VT == StVT && TLI.isTypeLegal(VT) &&
+ VT == MVT::v64i1) &&
+ VT == StVT && TLI.isTypeLegal(VT) &&
ISD::isBuildVectorOfConstantSDNodes(StoredVal.getNode())) {
// If its a v64i1 store without 64-bit support, we need two stores.
if (!DCI.isBeforeLegalize() && VT == MVT::v64i1 && !Subtarget.is64Bit()) {
- SDValue Lo = DAG.getBuildVector(MVT::v32i1, dl,
- StoredVal->ops().slice(0, 32));
+ SDValue Lo =
+ DAG.getBuildVector(MVT::v32i1, dl, StoredVal->ops().slice(0, 32));
Lo = combinevXi1ConstantToInteger(Lo, DAG);
- SDValue Hi = DAG.getBuildVector(MVT::v32i1, dl,
- StoredVal->ops().slice(32, 32));
+ SDValue Hi =
+ DAG.getBuildVector(MVT::v32i1, dl, StoredVal->ops().slice(32, 32));
Hi = combinevXi1ConstantToInteger(Hi, DAG);
SDValue Ptr0 = St->getBasePtr();
@@ -53899,9 +53942,9 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
StoredVal.hasOneUse() &&
TLI.isTruncStoreLegal(StoredVal.getOperand(0).getValueType(), VT)) {
bool IsSigned = StoredVal.getOpcode() == X86ISD::VTRUNCS;
- return EmitTruncSStore(IsSigned, St->getChain(),
- dl, StoredVal.getOperand(0), St->getBasePtr(),
- VT, St->getMemOperand(), DAG);
+ return EmitTruncSStore(IsSigned, St->getChain(), dl,
+ StoredVal.getOperand(0), St->getBasePtr(), VT,
+ St->getMemOperand(), DAG);
}
// Try to fold a extract_element(VTRUNC) pattern into a truncating store.
@@ -53940,14 +53983,14 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
if (St->isTruncatingStore() && VT.isVector()) {
if (TLI.isTruncStoreLegal(VT, StVT)) {
if (SDValue Val = detectSSatPattern(St->getValue(), St->getMemoryVT()))
- return EmitTruncSStore(true /* Signed saturation */, St->getChain(),
- dl, Val, St->getBasePtr(),
- St->getMemoryVT(), St->getMemOperand(), DAG);
- if (SDValue Val = detectUSatPattern(St->getValue(), St->getMemoryVT(),
- DAG, dl))
+ return EmitTruncSStore(true /* Signed saturation */, St->getChain(), dl,
+ Val, St->getBasePtr(), St->getMemoryVT(),
+ St->getMemOperand(), DAG);
+ if (SDValue Val =
+ detectUSatPattern(St->getValue(), St->getMemoryVT(), DAG, dl))
return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(),
- dl, Val, St->getBasePtr(),
- St->getMemoryVT(), St->getMemOperand(), DAG);
+ dl, Val, St->getBasePtr(), St->getMemoryVT(),
+ St->getMemOperand(), DAG);
}
return SDValue();
@@ -54757,8 +54800,7 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL,
// (i16 (ssat (add (mul (zext (even elts (i8 A))), (sext (even elts (i8 B)))),
// (mul (zext (odd elts (i8 A)), (sext (odd elts (i8 B))))))))
static SDValue detectPMADDUBSW(SDValue In, EVT VT, SelectionDAG &DAG,
- const X86Subtarget &Subtarget,
- const SDLoc &DL) {
+ const X86Subtarget &Subtarget, const SDLoc &DL) {
using namespace SDPatternMatch;
if (!VT.isVector() || !Subtarget.hasSSSE3())
return SDValue();
@@ -54832,8 +54874,8 @@ static SDValue detectPMADDUBSW(SDValue In, EVT VT, SelectionDAG &DAG,
std::swap(IdxN01, IdxN11);
}
// N0 indices be the even element. N1 indices must be the next odd element.
- if (IdxN00 != 2 * i || IdxN10 != 2 * i + 1 ||
- IdxN01 != 2 * i || IdxN11 != 2 * i + 1)
+ if (IdxN00 != 2 * i || IdxN10 != 2 * i + 1 || IdxN01 != 2 * i ||
+ IdxN11 != 2 * i + 1)
return SDValue();
SDValue N00In = N00Elt.getOperand(0);
SDValue N01In = N01Elt.getOperand(0);
@@ -54844,8 +54886,8 @@ static SDValue detectPMADDUBSW(SDValue In, EVT VT, SelectionDAG &DAG,
ZExtIn = N00In;
SExtIn = N01In;
}
- if (ZExtIn != N00In || SExtIn != N01In ||
- ZExtIn != N10In || SExtIn != N11In)
+ if (ZExtIn != N00In || SExtIn != N01In || ZExtIn != N10In ||
+ SExtIn != N11In)
return SDValue();
}
@@ -54865,14 +54907,13 @@ static SDValue detectPMADDUBSW(SDValue In, EVT VT, SelectionDAG &DAG,
// Shrink by adding truncate nodes and let DAGCombine fold with the
// sources.
EVT InVT = Ops[0].getValueType();
- assert(InVT.getScalarType() == MVT::i8 &&
- "Unexpected scalar element type");
+ assert(InVT.getScalarType() == MVT::i8 && "Unexpected scalar element type");
assert(InVT == Ops[1].getValueType() && "Operands' types mismatch");
EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
InVT.getVectorNumElements() / 2);
return DAG.getNode(X86ISD::VPMADDUBSW, DL, ResVT, Ops[0], Ops[1]);
};
- return SplitOpsAndApply(DAG, Subtarget, DL, VT, { ZExtIn, SExtIn },
+ return SplitOpsAndApply(DAG, Subtarget, DL, VT, {ZExtIn, SExtIn},
PMADDBuilder);
}
@@ -55061,7 +55102,7 @@ static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc,
bool NegRes) {
if (NegMul) {
switch (Opcode) {
- // clang-format off
+ // clang-format off
default: llvm_unreachable("Unexpected opcode");
case ISD::FMA: Opcode = X86ISD::FNMADD; break;
case ISD::STRICT_FMA: Opcode = X86ISD::STRICT_FNMADD; break;
@@ -55075,13 +55116,13 @@ static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc,
case X86ISD::FNMSUB: Opcode = X86ISD::FMSUB; break;
case X86ISD::STRICT_FNMSUB: Opcode = X86ISD::STRICT_FMSUB; break;
case X86ISD::FNMSUB_RND: Opcode = X86ISD::FMSUB_RND; break;
- // clang-format on
+ // clang-format on
}
}
if (NegAcc) {
switch (Opcode) {
- // clang-format off
+ // clang-format off
default: llvm_unreachable("Unexpected opcode");
case ISD::FMA: Opcode = X86ISD::FMSUB; break;
case ISD::STRICT_FMA: Opcode = X86ISD::STRICT_FMSUB; break;
@@ -55099,7 +55140,7 @@ static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc,
case X86ISD::FMADDSUB_RND: Opcode = X86ISD::FMSUBADD_RND; break;
case X86ISD::FMSUBADD: Opcode = X86ISD::FMADDSUB; break;
case X86ISD::FMSUBADD_RND: Opcode = X86ISD::FMADDSUB_RND; break;
- // clang-format on
+ // clang-format on
}
}
@@ -55116,7 +55157,7 @@ static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc,
case X86ISD::FNMADD_RND: Opcode = X86ISD::FMSUB_RND; break;
case X86ISD::FNMSUB: Opcode = ISD::FMA; break;
case X86ISD::FNMSUB_RND: Opcode = X86ISD::FMADD_RND; break;
- // clang-format on
+ // clang-format on
}
}
@@ -55248,13 +55289,13 @@ static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG,
SDValue Op1 = DAG.getBitcast(IntVT, N->getOperand(1));
unsigned IntOpcode;
switch (N->getOpcode()) {
- // clang-format off
+ // clang-format off
default: llvm_unreachable("Unexpected FP logic op");
case X86ISD::FOR: IntOpcode = ISD::OR; break;
case X86ISD::FXOR: IntOpcode = ISD::XOR; break;
case X86ISD::FAND: IntOpcode = ISD::AND; break;
case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break;
- // clang-format on
+ // clang-format on
}
SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1);
return DAG.getBitcast(VT, IntOp);
@@ -55606,13 +55647,18 @@ static SDValue combineFMinFMax(SDNode *N, SelectionDAG &DAG) {
// into FMINC and FMAXC, which are Commutative operations.
unsigned NewOp = 0;
switch (N->getOpcode()) {
- default: llvm_unreachable("unknown opcode");
- case X86ISD::FMIN: NewOp = X86ISD::FMINC; break;
- case X86ISD::FMAX: NewOp = X86ISD::FMAXC; break;
+ default:
+ llvm_unreachable("unknown opcode");
+ case X86ISD::FMIN:
+ NewOp = X86ISD::FMINC;
+ break;
+ case X86ISD::FMAX:
+ NewOp = X86ISD::FMAXC;
+ break;
}
- return DAG.getNode(NewOp, SDLoc(N), N->getValueType(0),
- N->getOperand(0), N->getOperand(1));
+ return DAG.getNode(NewOp, SDLoc(N), N->getValueType(0), N->getOperand(0),
+ N->getOperand(1));
}
static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
@@ -55658,8 +55704,8 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
if (!VT.isVector() && DAG.getMachineFunction().getFunction().hasMinSize())
return SDValue();
- EVT SetCCType = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
- VT);
+ EVT SetCCType =
+ TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
// There are 4 possibilities involving NaN inputs, and these are the required
// outputs:
@@ -55709,8 +55755,8 @@ static SDValue combineX86INT_TO_FP(SDNode *N, SelectionDAG &DAG,
MVT LoadVT = MVT::getVectorVT(MemVT, 128 / NumBits);
if (SDValue VZLoad = narrowLoadToVZLoad(LN, MemVT, LoadVT, DAG)) {
SDLoc dl(N);
- SDValue Convert = DAG.getNode(N->getOpcode(), dl, VT,
- DAG.getBitcast(InVT, VZLoad));
+ SDValue Convert =
+ DAG.getNode(N->getOpcode(), dl, VT, DAG.getBitcast(InVT, VZLoad));
DCI.CombineTo(N, Convert);
DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
DCI.recursivelyDeleteUnusedNodes(LN);
@@ -56205,8 +56251,8 @@ static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG,
// Only combine legal element types.
EVT SVT = VT.getVectorElementType();
- if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32 &&
- SVT != MVT::i64 && SVT != MVT::f32 && SVT != MVT::f64)
+ if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32 && SVT != MVT::i64 &&
+ SVT != MVT::f32 && SVT != MVT::f64)
return SDValue();
// We don't have CMPP Instruction for vxf16
@@ -56246,16 +56292,15 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
SDLoc DL(N);
// (i32 (sext (i8 (x86isd::setcc_carry)))) -> (i32 (x86isd::setcc_carry))
- if (!DCI.isBeforeLegalizeOps() &&
- N0.getOpcode() == X86ISD::SETCC_CARRY) {
+ if (!DCI.isBeforeLegalizeOps() && N0.getOpcode() == X86ISD::SETCC_CARRY) {
SDValue Setcc = DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, N0->getOperand(0),
- N0->getOperand(1));
+ N0->getOperand(1));
bool ReplaceOtherUses = !N0.hasOneUse();
DCI.CombineTo(N, Setcc);
// Replace other uses with a truncate of the widened setcc_carry.
if (ReplaceOtherUses) {
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
- N0.getValueType(), Setcc);
+ SDValue Trunc =
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), Setcc);
DCI.CombineTo(N0.getNode(), Trunc);
}
@@ -56548,13 +56593,13 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ANY_EXTEND &&
N0.getOpcode() == X86ISD::SETCC_CARRY) {
SDValue Setcc = DAG.getNode(X86ISD::SETCC_CARRY, dl, VT, N0->getOperand(0),
- N0->getOperand(1));
+ N0->getOperand(1));
bool ReplaceOtherUses = !N0.hasOneUse();
DCI.CombineTo(N, Setcc);
// Replace other uses with a truncate of the widened setcc_carry.
if (ReplaceOtherUses) {
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
- N0.getValueType(), Setcc);
+ SDValue Trunc =
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), Setcc);
DCI.CombineTo(N0.getNode(), Trunc);
}
@@ -56830,8 +56875,8 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
if (CInt.isPowerOf2() && !CInt.isMinSignedValue()) {
SDValue BaseOp = LHS.getOperand(0);
SDValue SETCC0 = DAG.getSetCC(DL, VT, BaseOp, RHS, CC);
- SDValue SETCC1 = DAG.getSetCC(
- DL, VT, BaseOp, DAG.getConstant(-CInt, DL, OpVT), CC);
+ SDValue SETCC1 = DAG.getSetCC(DL, VT, BaseOp,
+ DAG.getConstant(-CInt, DL, OpVT), CC);
return DAG.getNode(CC == ISD::SETEQ ? ISD::OR : ISD::AND, DL, VT,
SETCC0, SETCC1);
}
@@ -57191,19 +57236,25 @@ static SDValue rebuildGatherScatter(MaskedGatherScatterSDNode *GorS,
SDLoc DL(GorS);
if (auto *Gather = dyn_cast<MaskedGatherSDNode>(GorS)) {
- SDValue Ops[] = { Gather->getChain(), Gather->getPassThru(),
- Gather->getMask(), Base, Index, Scale } ;
- return DAG.getMaskedGather(Gather->getVTList(),
- Gather->getMemoryVT(), DL, Ops,
- Gather->getMemOperand(),
+ SDValue Ops[] = {Gather->getChain(),
+ Gather->getPassThru(),
+ Gather->getMask(),
+ Base,
+ Index,
+ Scale};
+ return DAG.getMaskedGather(Gather->getVTList(), Gather->getMemoryVT(), DL,
+ Ops, Gather->getMemOperand(),
Gather->getIndexType(),
Gather->getExtensionType());
}
auto *Scatter = cast<MaskedScatterSDNode>(GorS);
- SDValue Ops[] = { Scatter->getChain(), Scatter->getValue(),
- Scatter->getMask(), Base, Index, Scale };
- return DAG.getMaskedScatter(Scatter->getVTList(),
- Scatter->getMemoryVT(), DL,
+ SDValue Ops[] = {Scatter->getChain(),
+ Scatter->getValue(),
+ Scatter->getMask(),
+ Base,
+ Index,
+ Scale};
+ return DAG.getMaskedScatter(Scatter->getVTList(), Scatter->getMemoryVT(), DL,
Ops, Scatter->getMemOperand(),
Scatter->getIndexType(),
Scatter->isTruncatingStore());
@@ -57434,8 +57485,8 @@ static SDValue combineVectorCompareAndMaskUnaryOp(SDNode *N,
SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
// The AND node needs bitcasts to/from an integer vector type around it.
SDValue MaskConst = DAG.getBitcast(IntVT, SourceConst);
- SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT, Op0->getOperand(0),
- MaskConst);
+ SDValue NewAnd =
+ DAG.getNode(ISD::AND, DL, IntVT, Op0->getOperand(0), MaskConst);
SDValue Res = DAG.getBitcast(VT, NewAnd);
if (IsStrict)
return DAG.getMergeValues({Res, SourceConst.getValue(1)}, DL);
@@ -57621,8 +57672,8 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
// use CVTSI2P.
assert(InVT == MVT::v2i64 && "Unexpected VT!");
SDValue Cast = DAG.getBitcast(MVT::v4i32, Op0);
- SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Cast, Cast,
- { 0, 2, -1, -1 });
+ SDValue Shuf =
+ DAG.getVectorShuffle(MVT::v4i32, dl, Cast, Cast, {0, 2, -1, -1});
if (IsStrict)
return DAG.getNode(X86ISD::STRICT_CVTSI2P, dl, {VT, MVT::Other},
{N->getOperand(0), Shuf});
@@ -57757,7 +57808,7 @@ static bool needCarryOrOverflowFlag(SDValue Flags) {
}
switch (CC) {
- // clang-format off
+ // clang-format off
default: break;
case X86::COND_A: case X86::COND_AE:
case X86::COND_B: case X86::COND_BE:
@@ -57765,7 +57816,7 @@ static bool needCarryOrOverflowFlag(SDValue Flags) {
case X86::COND_G: case X86::COND_GE:
case X86::COND_L: case X86::COND_LE:
return true;
- // clang-format on
+ // clang-format on
}
}
@@ -57901,11 +57952,12 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
// After this the truncate and arithmetic op must have a single use.
if (!Trunc.hasOneUse() || !Op.hasOneUse())
- return SDValue();
+ return SDValue();
unsigned NewOpc;
switch (Op.getOpcode()) {
- default: return SDValue();
+ default:
+ return SDValue();
case ISD::AND:
// Skip and with constant. We have special handling for and with immediate
// during isel to generate test instructions.
@@ -57913,8 +57965,12 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
return SDValue();
NewOpc = X86ISD::AND;
break;
- case ISD::OR: NewOpc = X86ISD::OR; break;
- case ISD::XOR: NewOpc = X86ISD::XOR; break;
+ case ISD::OR:
+ NewOpc = X86ISD::OR;
+ break;
+ case ISD::XOR:
+ NewOpc = X86ISD::XOR;
+ break;
case ISD::ADD:
// If the carry or overflow flag is used, we can't truncate.
if (needCarryOrOverflowFlag(SDValue(N, 0)))
@@ -58091,9 +58147,8 @@ static SDValue combineADC(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-static SDValue matchPMADDWD(SelectionDAG &DAG, SDNode *N,
- const SDLoc &DL, EVT VT,
- const X86Subtarget &Subtarget) {
+static SDValue matchPMADDWD(SelectionDAG &DAG, SDNode *N, const SDLoc &DL,
+ EVT VT, const X86Subtarget &Subtarget) {
using namespace SDPatternMatch;
// Example of pattern we try to detect:
@@ -58201,9 +58256,8 @@ static SDValue matchPMADDWD(SelectionDAG &DAG, SDNode *N,
// Attempt to turn this pattern into PMADDWD.
// (add (mul (sext (build_vector)), (sext (build_vector))),
// (mul (sext (build_vector)), (sext (build_vector)))
-static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDNode *N,
- const SDLoc &DL, EVT VT,
- const X86Subtarget &Subtarget) {
+static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDNode *N, const SDLoc &DL,
+ EVT VT, const X86Subtarget &Subtarget) {
using namespace SDPatternMatch;
if (!Subtarget.hasSSE2())
@@ -58299,7 +58353,7 @@ static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDNode *N,
// If the output is narrower than an input, extract the low part of the input
// vector.
EVT OutVT16 = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
- VT.getVectorNumElements() * 2);
+ VT.getVectorNumElements() * 2);
if (OutVT16.bitsLT(In0.getValueType())) {
In0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OutVT16, In0,
DAG.getVectorIdxConstant(0, DL));
@@ -58308,8 +58362,7 @@ static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDNode *N,
In1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OutVT16, In1,
DAG.getVectorIdxConstant(0, DL));
}
- return SplitOpsAndApply(DAG, Subtarget, DL, VT, { In0, In1 },
- PMADDBuilder);
+ return SplitOpsAndApply(DAG, Subtarget, DL, VT, {In0, In1}, PMADDBuilder);
}
// ADD(VPMADDWD(X,Y),VPMADDWD(Z,W)) -> VPMADDWD(SHUFFLE(X,Z), SHUFFLE(Y,W))
@@ -59278,8 +59331,8 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
unsigned Imm1 = Ops[1].getConstantOperandVal(2);
// TODO: Handle zero'd subvectors.
if ((Imm0 & 0x88) == 0 && (Imm1 & 0x88) == 0) {
- int Mask[4] = {(int)(Imm0 & 0x03), (int)((Imm0 >> 4) & 0x3), (int)(Imm1 & 0x03),
- (int)((Imm1 >> 4) & 0x3)};
+ int Mask[4] = {(int)(Imm0 & 0x03), (int)((Imm0 >> 4) & 0x3),
+ (int)(Imm1 & 0x03), (int)((Imm1 >> 4) & 0x3)};
MVT ShuffleVT = VT.isFloatingPoint() ? MVT::v8f64 : MVT::v8i64;
SDValue LHS = concatSubVectors(Ops[0].getOperand(0),
Ops[0].getOperand(1), DAG, DL);
@@ -59469,8 +59522,7 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
break;
}
- ISD::CondCode ICC =
- Opcode == X86ISD::PCMPEQ ? ISD::SETEQ : ISD::SETGT;
+ ISD::CondCode ICC = Opcode == X86ISD::PCMPEQ ? ISD::SETEQ : ISD::SETGT;
ISD::CondCode FCC =
Opcode == X86ISD::PCMPEQ ? ISD::SETOEQ : ISD::SETOGT;
@@ -59875,7 +59927,8 @@ static SDValue combineCONCAT_VECTORS(SDNode *N, SelectionDAG &DAG,
APInt Constant = APInt::getZero(VT.getSizeInBits());
for (unsigned I = 0, E = Ops.size(); I != E; ++I) {
auto *C = dyn_cast<ConstantSDNode>(peekThroughBitcasts(Ops[I]));
- if (!C) break;
+ if (!C)
+ break;
Constant.insertBits(C->getAPIntValue(), I * SubSizeInBits);
if (I == (E - 1)) {
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
@@ -59959,9 +60012,9 @@ static SDValue combineINSERT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
ISD::isBuildVectorAllZeros(Ins.getOperand(0).getNode()) &&
Ins.getOperand(1).getValueSizeInBits().getFixedValue() <=
SubVecVT.getFixedSizeInBits())
- return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT,
- getZeroVector(OpVT, Subtarget, DAG, dl),
- Ins.getOperand(1), N->getOperand(2));
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT,
+ getZeroVector(OpVT, Subtarget, DAG, dl),
+ Ins.getOperand(1), N->getOperand(2));
}
}
@@ -60651,7 +60704,7 @@ static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG,
LHS.getOperand(0).getValueType() == MVT::v4i32) {
SDLoc dl(N);
LHS = DAG.getVectorShuffle(MVT::v4i32, dl, LHS.getOperand(0),
- LHS.getOperand(0), { 0, -1, 1, -1 });
+ LHS.getOperand(0), {0, -1, 1, -1});
LHS = DAG.getBitcast(MVT::v2i64, LHS);
return DAG.getNode(N->getOpcode(), dl, MVT::v2i64, LHS, RHS);
}
@@ -60661,7 +60714,7 @@ static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG,
RHS.getOperand(0).getValueType() == MVT::v4i32) {
SDLoc dl(N);
RHS = DAG.getVectorShuffle(MVT::v4i32, dl, RHS.getOperand(0),
- RHS.getOperand(0), { 0, -1, 1, -1 });
+ RHS.getOperand(0), {0, -1, 1, -1});
RHS = DAG.getBitcast(MVT::v2i64, RHS);
return DAG.getNode(N->getOpcode(), dl, MVT::v2i64, LHS, RHS);
}
@@ -60932,16 +60985,16 @@ static SDValue combineFP_EXTEND(SDNode *N, SelectionDAG &DAG,
// Widen to at least 8 input elements.
if (NumElts < 8) {
unsigned NumConcats = 8 / NumElts;
- SDValue Fill = NumElts == 4 ? DAG.getUNDEF(IntVT)
- : DAG.getConstant(0, dl, IntVT);
+ SDValue Fill =
+ NumElts == 4 ? DAG.getUNDEF(IntVT) : DAG.getConstant(0, dl, IntVT);
SmallVector<SDValue, 4> Ops(NumConcats, Fill);
Ops[0] = Src;
Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, Ops);
}
// Destination is vXf32 with at least 4 elements.
- EVT CvtVT = EVT::getVectorVT(*DAG.getContext(), MVT::f32,
- std::max(4U, NumElts));
+ EVT CvtVT =
+ EVT::getVectorVT(*DAG.getContext(), MVT::f32, std::max(4U, NumElts));
SDValue Cvt, Chain;
if (IsStrict) {
Cvt = DAG.getNode(X86ISD::STRICT_CVTPH2PS, dl, {CvtVT, MVT::Other},
@@ -61211,7 +61264,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {
- // clang-format off
+ // clang-format off
default: break;
case ISD::SCALAR_TO_VECTOR:
return combineSCALAR_TO_VECTOR(N, DAG, Subtarget);
@@ -61566,7 +61619,8 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
bool Commute = false;
switch (Op.getOpcode()) {
- default: return false;
+ default:
+ return false;
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
@@ -61606,8 +61660,7 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
((Commute && !isa<ConstantSDNode>(N1)) ||
(Op.getOpcode() != ISD::MUL && IsFoldableRMW(N0, Op))))
return false;
- if (IsFoldableAtomicRMW(N0, Op) ||
- (Commute && IsFoldableAtomicRMW(N1, Op)))
+ if (IsFoldableAtomicRMW(N0, Op) || (Commute && IsFoldableAtomicRMW(N1, Op)))
return false;
}
}
@@ -61694,8 +61747,7 @@ X86TargetLowering::getConstraintType(StringRef Constraint) const {
default:
break;
}
- }
- else if (Constraint.size() == 2) {
+ } else if (Constraint.size() == 2) {
switch (Constraint[0]) {
default:
break;
@@ -61884,8 +61936,7 @@ X86TargetLowering::getSingleConstraintMatchWeight(
/// Try to replace an X constraint, which matches anything, with another that
/// has more specific requirements based on the type of the corresponding
/// operand.
-const char *X86TargetLowering::
-LowerXConstraint(EVT ConstraintVT) const {
+const char *X86TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
// FP X constraints get lowered to SSE1/2 registers if available, otherwise
// 'f' like normal targets.
if (ConstraintVT.isFloatingPoint()) {
@@ -61931,7 +61982,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
SDValue Result;
char ConstraintLetter = Constraint[0];
switch (ConstraintLetter) {
- default: break;
+ default:
+ break;
case 'I':
if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
if (C->getZExtValue() <= 31) {
@@ -62005,8 +62057,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), MVT::i64);
break;
}
- // FIXME gcc accepts some relocatable values here too, but only in certain
- // memory models; it's complicated.
+ // FIXME gcc accepts some relocatable values here too, but only in certain
+ // memory models; it's complicated.
}
return;
}
@@ -62049,8 +62101,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
if (auto *CST = dyn_cast<ConstantSDNode>(Op)) {
bool IsBool = CST->getConstantIntValue()->getBitWidth() == 1;
BooleanContent BCont = getBooleanContents(MVT::i64);
- ISD::NodeType ExtOpc = IsBool ? getExtendForContent(BCont)
- : ISD::SIGN_EXTEND;
+ ISD::NodeType ExtOpc =
+ IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? CST->getZExtValue()
: CST->getSExtValue();
Result = DAG.getTargetConstant(ExtVal, SDLoc(Op), MVT::i64);
@@ -62129,7 +62181,8 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (Constraint.size() == 1) {
// GCC Constraint Letters
switch (Constraint[0]) {
- default: break;
+ default:
+ break;
// 'A' means [ER]AX + [ER]DX.
case 'A':
if (Subtarget.is64Bit())
@@ -62157,7 +62210,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return std::make_pair(0U, &X86::VK64RegClass);
}
break;
- case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
+ case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
if (Subtarget.is64Bit()) {
if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, useEGPRInlineAsm(Subtarget)
@@ -62179,7 +62232,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
}
[[fallthrough]];
// 32-bit fallthrough
- case 'Q': // Q_REGS
+ case 'Q': // Q_REGS
if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, &X86::GR8_ABCD_LRegClass);
if (VT == MVT::i16)
@@ -62190,8 +62243,8 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (VT != MVT::f80 && !VT.isVector())
return std::make_pair(0U, &X86::GR64_ABCDRegClass);
break;
- case 'r': // GENERAL_REGS
- case 'l': // INDEX_REGS
+ case 'r': // GENERAL_REGS
+ case 'l': // INDEX_REGS
if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, useEGPRInlineAsm(Subtarget)
? &X86::GR8RegClass
@@ -62210,7 +62263,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
? &X86::GR64RegClass
: &X86::GR64_NOREX2RegClass);
break;
- case 'R': // LEGACY_REGS
+ case 'R': // LEGACY_REGS
if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, &X86::GR8_NOREXRegClass);
if (VT == MVT::i16)
@@ -62221,7 +62274,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (VT != MVT::f80 && !VT.isVector())
return std::make_pair(0U, &X86::GR64_NOREXRegClass);
break;
- case 'f': // FP Stack registers.
+ case 'f': // FP Stack registers.
// If SSE is enabled for this VT, use f80 to ensure the isel moves the
// value to the correct fpstack register class.
if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT))
@@ -62231,16 +62284,19 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f80)
return std::make_pair(0U, &X86::RFP80RegClass);
break;
- case 'y': // MMX_REGS if MMX allowed.
- if (!Subtarget.hasMMX()) break;
+ case 'y': // MMX_REGS if MMX allowed.
+ if (!Subtarget.hasMMX())
+ break;
return std::make_pair(0U, &X86::VR64RegClass);
case 'v':
- case 'x': // SSE_REGS if SSE1 allowed or AVX_REGS if AVX allowed
- if (!Subtarget.hasSSE1()) break;
+ case 'x': // SSE_REGS if SSE1 allowed or AVX_REGS if AVX allowed
+ if (!Subtarget.hasSSE1())
+ break;
bool VConstraint = (Constraint[0] == 'v');
switch (VT.SimpleTy) {
- default: break;
+ default:
+ break;
// Scalar SSE types.
case MVT::f16:
if (VConstraint && Subtarget.hasFP16())
@@ -62331,7 +62387,8 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case MVT::v16f32:
case MVT::v16i32:
case MVT::v8i64:
- if (!Subtarget.hasAVX512()) break;
+ if (!Subtarget.hasAVX512())
+ break;
if (VConstraint)
return std::make_pair(0U, &X86::VR512RegClass);
return std::make_pair(0U, &X86::VR512_0_15RegClass);
@@ -62347,12 +62404,15 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case '2':
return getRegForInlineAsmConstraint(TRI, "x", VT);
case 'm':
- if (!Subtarget.hasMMX()) break;
+ if (!Subtarget.hasMMX())
+ break;
return std::make_pair(0U, &X86::VR64RegClass);
case 'z':
- if (!Subtarget.hasSSE1()) break;
+ if (!Subtarget.hasSSE1())
+ break;
switch (VT.SimpleTy) {
- default: break;
+ default:
+ break;
// Scalar SSE types.
case MVT::f16:
if (!Subtarget.hasFP16())
@@ -62467,14 +62527,15 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// Use the default implementation in TargetLowering to convert the register
// constraint into a member of a register class.
- std::pair<Register, const TargetRegisterClass*> Res;
+ std::pair<Register, const TargetRegisterClass *> Res;
Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
// Not found as a standard register?
if (!Res.second) {
// Only match x87 registers if the VT is one SelectionDAGBuilder can convert
// to/from f80.
- if (VT == MVT::Other || VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f80) {
+ if (VT == MVT::Other || VT == MVT::f32 || VT == MVT::f64 ||
+ VT == MVT::f80) {
// Map st(0) -> st(7) -> ST0
if (Constraint.size() == 7 && Constraint[0] == '{' &&
tolower(Constraint[1]) == 's' && tolower(Constraint[2]) == 't' &&
@@ -62532,7 +62593,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// turn into {ax},{dx}.
// MVT::Other is used to specify clobber names.
if (TRI->isTypeLegalForClass(*Res.second, VT) || VT == MVT::Other)
- return Res; // Correct type already, nothing to do.
+ return Res; // Correct type already, nothing to do.
// Get a matching integer of the correct size. i.e. "ax" with MVT::32 should
// return "eax". This should even work for things like getting 64bit integer
@@ -62544,7 +62605,8 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// Therefore, use a helper method.
if (isGRClass(*Class)) {
unsigned Size = VT.getSizeInBits();
- if (Size == 1) Size = 8;
+ if (Size == 1)
+ Size = 8;
if (Size != 8 && Size != 16 && Size != 32 && Size != 64)
return std::make_pair(0, nullptr);
Register DestReg = getX86SubSuperRegister(Res.first, Size);
@@ -62552,9 +62614,11 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
bool is64Bit = Subtarget.is64Bit();
const TargetRegisterClass *RC =
Size == 8 ? (is64Bit ? &X86::GR8RegClass : &X86::GR8_NOREXRegClass)
- : Size == 16 ? (is64Bit ? &X86::GR16RegClass : &X86::GR16_NOREXRegClass)
- : Size == 32 ? (is64Bit ? &X86::GR32RegClass : &X86::GR32_NOREXRegClass)
- : /*Size == 64*/ (is64Bit ? &X86::GR64RegClass : nullptr);
+ : Size == 16
+ ? (is64Bit ? &X86::GR16RegClass : &X86::GR16_NOREXRegClass)
+ : Size == 32
+ ? (is64Bit ? &X86::GR32RegClass : &X86::GR32_NOREXRegClass)
+ : /*Size == 64*/ (is64Bit ? &X86::GR64RegClass : nullptr);
if (Size == 64 && !is64Bit) {
// Model GCC's behavior here and select a fixed pair of 32-bit
// registers.
@@ -62806,8 +62870,7 @@ X86TargetLowering::getStackProbeSymbolName(const MachineFunction &MF) const {
return Subtarget.isTargetCygMing() ? "_alloca" : "_chkstk";
}
-unsigned
-X86TargetLowering::getStackProbeSize(const MachineFunction &MF) const {
+unsigned X86TargetLowering::getStackProbeSize(const MachineFunction &MF) const {
// The default stack probe size is 4096 if the function has no stackprobesize
// attribute.
return MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size",
More information about the llvm-commits
mailing list