[llvm] [NVPTX] Propagate ISD::TRUNCATE to operands to reduce register pressure (PR #98666)
Justin Fargnoli via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 12 10:34:56 PDT 2024
https://github.com/justinfargnoli created https://github.com/llvm/llvm-project/pull/98666
None
>From 9342d1d71538303603e9386e8f0aa5283e518c75 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Sun, 30 Jun 2024 18:50:30 -0700
Subject: [PATCH 1/8] Initial commit
---
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 54 ++++++++++++++++++++-
llvm/test/CodeGen/NVPTX/combine-truncate.ll | 10 ++++
2 files changed, 63 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/NVPTX/combine-truncate.ll
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 476a532db0a37..e2a5aef6ddff6 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -725,7 +725,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
// We have some custom DAG combine patterns for these nodes
setTargetDAGCombine({ISD::ADD, ISD::AND, ISD::EXTRACT_VECTOR_ELT, ISD::FADD,
ISD::LOAD, ISD::MUL, ISD::SHL, ISD::SREM, ISD::UREM,
- ISD::VSELECT});
+ ISD::TRUNCATE, ISD::VSELECT});
// setcc for f16x2 and bf16x2 needs special handling to prevent
// legalizer's attempt to scalarize it due to v2i1 not being legal.
@@ -5541,6 +5541,52 @@ static SDValue PerformREMCombine(SDNode *N,
return SDValue();
}
+// truncate (logic_op x, y) --> logic_op (truncate x), (truncate y)
+// This will reduce register pressure.
+static SDValue PerformTruncCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ if (DCI.isBeforeLegalizeOps()) {
+ SDValue LogicalOp = N->getOperand(0);
+ switch (LogicalOp.getOpcode()) {
+ default:
+ break;
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: {
+ EVT VT = N->getValueType(0);
+ EVT LogicalVT = LogicalOp.getValueType();
+ if (VT == MVT::i32 && LogicalVT == MVT::i64) {
+ const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
+ if (VT.isScalarInteger() ||
+ TLI.isOperationLegal(LogicalOp.getOpcode(), VT)) {
+ if (all_of(LogicalOp.getNode()->uses(), [](SDNode *U) {
+ return U->isMachineOpcode() ?
+ U->getMachineOpcode() == NVPTX::CVT_u32_u64 :
+ U->getOpcode() == ISD::TRUNCATE;
+ })) {
+
+ SDLoc DL(N);
+ SDValue None = DCI.DAG.getTargetConstant(NVPTX::PTXCvtMode::NONE,
+ DL, MVT::i32);
+ SDNode *NarrowL = DCI.DAG.getMachineNode(
+ NVPTX::CVT_u32_u64, DL, VT, LogicalOp.getOperand(0), None);
+ SDNode *NarrowR = DCI.DAG.getMachineNode(
+ NVPTX::CVT_u32_u64, DL, VT, LogicalOp.getOperand(1), None);
+ return DCI.DAG.getNode(LogicalOp.getOpcode(), DL, VT,
+ SDValue(NarrowL, 0), SDValue(NarrowR, 0));
+ }
+ }
+ }
+ break;
+ }
+ }
+ }
+ return SDValue();
+}
+
enum OperandSignedness {
Signed = 0,
Unsigned,
@@ -5957,6 +6003,8 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::UREM:
case ISD::SREM:
return PerformREMCombine(N, DCI, OptLevel);
+ case ISD::TRUNCATE:
+ return PerformTruncCombine(N, DCI);
case ISD::SETCC:
return PerformSETCCCombine(N, DCI, STI.getSmVersion());
case ISD::LOAD:
@@ -5974,6 +6022,10 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::VSELECT:
return PerformVSELECTCombine(N, DCI);
}
+
+ if (N->isMachineOpcode() && N->getMachineOpcode() == NVPTX::CVT_u32_u64)
+ return PerformTruncCombine(N, DCI);
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/NVPTX/combine-truncate.ll b/llvm/test/CodeGen/NVPTX/combine-truncate.ll
new file mode 100644
index 0000000000000..f43887064acc0
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/combine-truncate.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s | FileCheck %s
+
+define i32 @foo(i64 %a, i64 %b) {
+; CHECK: or.b32
+; CHECK-NOT: or.b64
+entry:
+ %or = or i64 %a, %b
+ %trunc = trunc i64 %or to i32
+ ret i32 %trunc
+}
>From 21710eed553b00ba0cf25781dee6956dd2b744b7 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Mon, 8 Jul 2024 14:07:53 -0700
Subject: [PATCH 2/8] Prefer early return
---
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 71 +++++++++++----------
1 file changed, 36 insertions(+), 35 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index e2a5aef6ddff6..9ca524d84b66a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -5545,45 +5545,46 @@ static SDValue PerformREMCombine(SDNode *N,
// This will reduce register pressure.
static SDValue PerformTruncCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
- if (DCI.isBeforeLegalizeOps()) {
- SDValue LogicalOp = N->getOperand(0);
- switch (LogicalOp.getOpcode()) {
- default:
- break;
- case ISD::ADD:
- case ISD::SUB:
- case ISD::MUL:
- case ISD::AND:
- case ISD::OR:
- case ISD::XOR: {
- EVT VT = N->getValueType(0);
- EVT LogicalVT = LogicalOp.getValueType();
- if (VT == MVT::i32 && LogicalVT == MVT::i64) {
- const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
- if (VT.isScalarInteger() ||
- TLI.isOperationLegal(LogicalOp.getOpcode(), VT)) {
- if (all_of(LogicalOp.getNode()->uses(), [](SDNode *U) {
- return U->isMachineOpcode() ?
- U->getMachineOpcode() == NVPTX::CVT_u32_u64 :
- U->getOpcode() == ISD::TRUNCATE;
- })) {
-
- SDLoc DL(N);
- SDValue None = DCI.DAG.getTargetConstant(NVPTX::PTXCvtMode::NONE,
- DL, MVT::i32);
- SDNode *NarrowL = DCI.DAG.getMachineNode(
- NVPTX::CVT_u32_u64, DL, VT, LogicalOp.getOperand(0), None);
- SDNode *NarrowR = DCI.DAG.getMachineNode(
- NVPTX::CVT_u32_u64, DL, VT, LogicalOp.getOperand(1), None);
- return DCI.DAG.getNode(LogicalOp.getOpcode(), DL, VT,
- SDValue(NarrowL, 0), SDValue(NarrowR, 0));
- }
+ if (!DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDValue LogicalOp = N->getOperand(0);
+ switch (LogicalOp.getOpcode()) {
+ default:
+ break;
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: {
+ EVT VT = N->getValueType(0);
+ EVT LogicalVT = LogicalOp.getValueType();
+ if (VT == MVT::i32 && LogicalVT == MVT::i64) {
+ const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
+ if (VT.isScalarInteger() ||
+ TLI.isOperationLegal(LogicalOp.getOpcode(), VT)) {
+ if (all_of(LogicalOp.getNode()->uses(), [](SDNode *U) {
+ return U->isMachineOpcode() ?
+ U->getMachineOpcode() == NVPTX::CVT_u32_u64 :
+ U->getOpcode() == ISD::TRUNCATE;
+ })) {
+ SDLoc DL(N);
+ SDValue None = DCI.DAG.getTargetConstant(NVPTX::PTXCvtMode::NONE,
+ DL, MVT::i32);
+ SDNode *NarrowL = DCI.DAG.getMachineNode(
+ NVPTX::CVT_u32_u64, DL, VT, LogicalOp.getOperand(0), None);
+ SDNode *NarrowR = DCI.DAG.getMachineNode(
+ NVPTX::CVT_u32_u64, DL, VT, LogicalOp.getOperand(1), None);
+ return DCI.DAG.getNode(LogicalOp.getOpcode(), DL, VT,
+ SDValue(NarrowL, 0), SDValue(NarrowR, 0));
}
}
- break;
- }
}
+ break;
}
+ }
+
return SDValue();
}
>From 505a38b47eab8137f3173b2f8a7aa6b3b79d1a9a Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Mon, 8 Jul 2024 14:08:15 -0700
Subject: [PATCH 3/8] clang-format
---
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 9ca524d84b66a..32e1d366390da 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -5542,7 +5542,7 @@ static SDValue PerformREMCombine(SDNode *N,
}
// truncate (logic_op x, y) --> logic_op (truncate x), (truncate y)
-// This will reduce register pressure.
+// This will reduce register pressure.
static SDValue PerformTruncCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
if (!DCI.isBeforeLegalizeOps())
@@ -5550,7 +5550,7 @@ static SDValue PerformTruncCombine(SDNode *N,
SDValue LogicalOp = N->getOperand(0);
switch (LogicalOp.getOpcode()) {
- default:
+ default:
break;
case ISD::ADD:
case ISD::SUB:
@@ -5565,19 +5565,19 @@ static SDValue PerformTruncCombine(SDNode *N,
if (VT.isScalarInteger() ||
TLI.isOperationLegal(LogicalOp.getOpcode(), VT)) {
if (all_of(LogicalOp.getNode()->uses(), [](SDNode *U) {
- return U->isMachineOpcode() ?
- U->getMachineOpcode() == NVPTX::CVT_u32_u64 :
- U->getOpcode() == ISD::TRUNCATE;
- })) {
+ return U->isMachineOpcode()
+ ? U->getMachineOpcode() == NVPTX::CVT_u32_u64
+ : U->getOpcode() == ISD::TRUNCATE;
+ })) {
SDLoc DL(N);
- SDValue None = DCI.DAG.getTargetConstant(NVPTX::PTXCvtMode::NONE,
- DL, MVT::i32);
+ SDValue None =
+ DCI.DAG.getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, MVT::i32);
SDNode *NarrowL = DCI.DAG.getMachineNode(
NVPTX::CVT_u32_u64, DL, VT, LogicalOp.getOperand(0), None);
SDNode *NarrowR = DCI.DAG.getMachineNode(
NVPTX::CVT_u32_u64, DL, VT, LogicalOp.getOperand(1), None);
return DCI.DAG.getNode(LogicalOp.getOpcode(), DL, VT,
- SDValue(NarrowL, 0), SDValue(NarrowR, 0));
+ SDValue(NarrowL, 0), SDValue(NarrowR, 0));
}
}
}
>From 9fec6d8516f03eebb2a5899944866eee51093ddb Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Mon, 8 Jul 2024 14:17:55 -0700
Subject: [PATCH 4/8] Add negative test for `ISD::TRUNCATE`
---
llvm/test/CodeGen/NVPTX/combine-truncate.ll | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/NVPTX/combine-truncate.ll b/llvm/test/CodeGen/NVPTX/combine-truncate.ll
index f43887064acc0..3d0827606dbc2 100644
--- a/llvm/test/CodeGen/NVPTX/combine-truncate.ll
+++ b/llvm/test/CodeGen/NVPTX/combine-truncate.ll
@@ -1,6 +1,7 @@
; RUN: llc < %s | FileCheck %s
-define i32 @foo(i64 %a, i64 %b) {
+define i32 @trunc(i64 %a, i64 %b) {
+; CHECK-LABEL: trunc
; CHECK: or.b32
; CHECK-NOT: or.b64
entry:
@@ -8,3 +9,14 @@ entry:
%trunc = trunc i64 %or to i32
ret i32 %trunc
}
+
+define i32 @trunc_not(i64 %a, i64 %b, ptr %p) {
+; CHECK-LABEL: trunc_not
+; CHECK: or.b64
+; CHECK-NOT: or.b32
+entry:
+ %or = or i64 %a, %b
+ %trunc = trunc i64 %or to i32
+ store i64 %or, ptr %p
+ ret i32 %trunc
+}
>From fba1675b4ca60835dd9695491277a763c1f584c0 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Thu, 11 Jul 2024 19:42:56 -0700
Subject: [PATCH 5/8] Add cvt and cvt_not test
---
llvm/test/CodeGen/NVPTX/combine-truncate.ll | 90 ++++++++++++++++++---
1 file changed, 79 insertions(+), 11 deletions(-)
diff --git a/llvm/test/CodeGen/NVPTX/combine-truncate.ll b/llvm/test/CodeGen/NVPTX/combine-truncate.ll
index 3d0827606dbc2..1e0d63ca24880 100644
--- a/llvm/test/CodeGen/NVPTX/combine-truncate.ll
+++ b/llvm/test/CodeGen/NVPTX/combine-truncate.ll
@@ -1,22 +1,90 @@
-; RUN: llc < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -march=nvptx64 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %}
+
+target triple = "nvptx64-nvidia-cuda"
define i32 @trunc(i64 %a, i64 %b) {
-; CHECK-LABEL: trunc
-; CHECK: or.b32
-; CHECK-NOT: or.b64
-entry:
+; CHECK-LABEL: trunc(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-NEXT: .reg .b64 %rd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [trunc_param_0];
+; CHECK-NEXT: ld.param.u64 %rd2, [trunc_param_1];
+; CHECK-NEXT: cvt.u32.u64 %r1, %rd2;
+; CHECK-NEXT: cvt.u32.u64 %r2, %rd1;
+; CHECK-NEXT: or.b32 %r3, %r2, %r1;
+; CHECK-NEXT: st.param.b32 [func_retval0+0], %r3;
+; CHECK-NEXT: ret;
%or = or i64 %a, %b
%trunc = trunc i64 %or to i32
ret i32 %trunc
}
-define i32 @trunc_not(i64 %a, i64 %b, ptr %p) {
-; CHECK-LABEL: trunc_not
-; CHECK: or.b64
-; CHECK-NOT: or.b32
-entry:
+define i32 @trunc_not(i64 %a, i64 %b) {
+; CHECK-LABEL: trunc_not(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [trunc_not_param_0];
+; CHECK-NEXT: ld.param.u64 %rd2, [trunc_not_param_1];
+; CHECK-NEXT: or.b64 %rd3, %rd1, %rd2;
+; CHECK-NEXT: cvt.u32.u64 %r1, %rd3;
+; CHECK-NEXT: mov.u64 %rd4, 0;
+; CHECK-NEXT: st.u64 [%rd4], %rd3;
+; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1;
+; CHECK-NEXT: ret;
%or = or i64 %a, %b
%trunc = trunc i64 %or to i32
- store i64 %or, ptr %p
+ store i64 %or, ptr null
+ ret i32 %trunc
+}
+
+define i32 @trunc_cvt(i64 %a, i64 %b) {
+; CHECK-LABEL: trunc_cvt(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-NEXT: .reg .b64 %rd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [trunc_cvt_param_0];
+; CHECK-NEXT: ld.param.u64 %rd2, [trunc_cvt_param_1];
+; CHECK-NEXT: cvt.u32.u64 %r1, %rd2;
+; CHECK-NEXT: cvt.u32.u64 %r2, %rd1;
+; CHECK-NEXT: add.s32 %r3, %r2, %r1;
+; CHECK-NEXT: or.b32 %r4, %r3, %r2;
+; CHECK-NEXT: st.param.b32 [func_retval0+0], %r4;
+; CHECK-NEXT: ret;
+ %add = add i64 %a, %b
+ %or = or i64 %add, %a
+ %trunc = trunc i64 %or to i32
+ ret i32 %trunc
+}
+
+define i32 @trunc_cvt_not(i64 %a, i64 %b) {
+; CHECK-LABEL: trunc_cvt_not(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [trunc_cvt_not_param_0];
+; CHECK-NEXT: ld.param.u64 %rd2, [trunc_cvt_not_param_1];
+; CHECK-NEXT: mov.u64 %rd3, 0;
+; CHECK-NEXT: st.u64 [%rd3], %rd2;
+; CHECK-NEXT: cvt.u32.u64 %r1, %rd2;
+; CHECK-NEXT: cvt.u32.u64 %r2, %rd1;
+; CHECK-NEXT: add.s32 %r3, %r2, %r1;
+; CHECK-NEXT: or.b32 %r4, %r3, %r2;
+; CHECK-NEXT: st.param.b32 [func_retval0+0], %r4;
+; CHECK-NEXT: ret;
+ %add = add i64 %a, %b
+ store i64 %b, ptr null
+ %or = or i64 %add, %a
+ %trunc = trunc i64 %or to i32
ret i32 %trunc
}
>From 8c167f12fc64967e65fcc462f80dc05d8636339b Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Thu, 11 Jul 2024 19:52:32 -0700
Subject: [PATCH 6/8] Prefer early return
---
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 44 ++++++++++-----------
1 file changed, 22 insertions(+), 22 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 32e1d366390da..c02ea844353c0 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -5560,28 +5560,28 @@ static SDValue PerformTruncCombine(SDNode *N,
case ISD::XOR: {
EVT VT = N->getValueType(0);
EVT LogicalVT = LogicalOp.getValueType();
- if (VT == MVT::i32 && LogicalVT == MVT::i64) {
- const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
- if (VT.isScalarInteger() ||
- TLI.isOperationLegal(LogicalOp.getOpcode(), VT)) {
- if (all_of(LogicalOp.getNode()->uses(), [](SDNode *U) {
- return U->isMachineOpcode()
- ? U->getMachineOpcode() == NVPTX::CVT_u32_u64
- : U->getOpcode() == ISD::TRUNCATE;
- })) {
- SDLoc DL(N);
- SDValue None =
- DCI.DAG.getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, MVT::i32);
- SDNode *NarrowL = DCI.DAG.getMachineNode(
- NVPTX::CVT_u32_u64, DL, VT, LogicalOp.getOperand(0), None);
- SDNode *NarrowR = DCI.DAG.getMachineNode(
- NVPTX::CVT_u32_u64, DL, VT, LogicalOp.getOperand(1), None);
- return DCI.DAG.getNode(LogicalOp.getOpcode(), DL, VT,
- SDValue(NarrowL, 0), SDValue(NarrowR, 0));
- }
- }
- }
- break;
+ if (VT != MVT::i32 || LogicalVT != MVT::i64)
+ break;
+ const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
+ if (!VT.isScalarInteger() &&
+ !TLI.isOperationLegal(LogicalOp.getOpcode(), VT))
+ break;
+ if (!all_of(LogicalOp.getNode()->uses(), [](SDNode *U) {
+ return U->isMachineOpcode()
+ ? U->getMachineOpcode() == NVPTX::CVT_u32_u64
+ : U->getOpcode() == ISD::TRUNCATE;
+ }))
+ break;
+
+ SDLoc DL(N);
+ SDValue CVTNone =
+ DCI.DAG.getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, MVT::i32);
+ SDNode *NarrowL = DCI.DAG.getMachineNode(
+ NVPTX::CVT_u32_u64, DL, VT, LogicalOp.getOperand(0), CVTNone);
+ SDNode *NarrowR = DCI.DAG.getMachineNode(
+ NVPTX::CVT_u32_u64, DL, VT, LogicalOp.getOperand(1), CVTNone);
+ return DCI.DAG.getNode(LogicalOp.getOpcode(), DL, VT,
+ SDValue(NarrowL, 0), SDValue(NarrowR, 0));
}
}
>From 7813ca6a0e6c13aa96c97116fb21e97bf1845e6b Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Thu, 11 Jul 2024 19:53:08 -0700
Subject: [PATCH 7/8] clang-format
---
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index c02ea844353c0..26729c7adb020 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -5560,28 +5560,28 @@ static SDValue PerformTruncCombine(SDNode *N,
case ISD::XOR: {
EVT VT = N->getValueType(0);
EVT LogicalVT = LogicalOp.getValueType();
- if (VT != MVT::i32 || LogicalVT != MVT::i64)
+ if (VT != MVT::i32 || LogicalVT != MVT::i64)
break;
const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
- if (!VT.isScalarInteger() &&
+ if (!VT.isScalarInteger() &&
!TLI.isOperationLegal(LogicalOp.getOpcode(), VT))
break;
if (!all_of(LogicalOp.getNode()->uses(), [](SDNode *U) {
return U->isMachineOpcode()
- ? U->getMachineOpcode() == NVPTX::CVT_u32_u64
- : U->getOpcode() == ISD::TRUNCATE;
+ ? U->getMachineOpcode() == NVPTX::CVT_u32_u64
+ : U->getOpcode() == ISD::TRUNCATE;
}))
break;
SDLoc DL(N);
SDValue CVTNone =
DCI.DAG.getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, MVT::i32);
- SDNode *NarrowL = DCI.DAG.getMachineNode(
- NVPTX::CVT_u32_u64, DL, VT, LogicalOp.getOperand(0), CVTNone);
- SDNode *NarrowR = DCI.DAG.getMachineNode(
- NVPTX::CVT_u32_u64, DL, VT, LogicalOp.getOperand(1), CVTNone);
- return DCI.DAG.getNode(LogicalOp.getOpcode(), DL, VT,
- SDValue(NarrowL, 0), SDValue(NarrowR, 0));
+ SDNode *NarrowL = DCI.DAG.getMachineNode(NVPTX::CVT_u32_u64, DL, VT,
+ LogicalOp.getOperand(0), CVTNone);
+ SDNode *NarrowR = DCI.DAG.getMachineNode(NVPTX::CVT_u32_u64, DL, VT,
+ LogicalOp.getOperand(1), CVTNone);
+ return DCI.DAG.getNode(LogicalOp.getOpcode(), DL, VT, SDValue(NarrowL, 0),
+ SDValue(NarrowR, 0));
}
}
>From f2583eadc069619092217da5d1c0f8637b65254b Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Thu, 11 Jul 2024 19:55:20 -0700
Subject: [PATCH 8/8] Correct trunc_cvt_not test
---
llvm/test/CodeGen/NVPTX/combine-truncate.ll | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/llvm/test/CodeGen/NVPTX/combine-truncate.ll b/llvm/test/CodeGen/NVPTX/combine-truncate.ll
index 1e0d63ca24880..30e415ebe9527 100644
--- a/llvm/test/CodeGen/NVPTX/combine-truncate.ll
+++ b/llvm/test/CodeGen/NVPTX/combine-truncate.ll
@@ -68,22 +68,22 @@ define i32 @trunc_cvt(i64 %a, i64 %b) {
define i32 @trunc_cvt_not(i64 %a, i64 %b) {
; CHECK-LABEL: trunc_cvt_not(
; CHECK: {
-; CHECK-NEXT: .reg .b32 %r<5>;
-; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-NEXT: .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [trunc_cvt_not_param_0];
; CHECK-NEXT: ld.param.u64 %rd2, [trunc_cvt_not_param_1];
-; CHECK-NEXT: mov.u64 %rd3, 0;
-; CHECK-NEXT: st.u64 [%rd3], %rd2;
-; CHECK-NEXT: cvt.u32.u64 %r1, %rd2;
+; CHECK-NEXT: add.s64 %rd3, %rd1, %rd2;
+; CHECK-NEXT: mov.u64 %rd4, 0;
+; CHECK-NEXT: st.u64 [%rd4], %rd3;
+; CHECK-NEXT: cvt.u32.u64 %r1, %rd3;
; CHECK-NEXT: cvt.u32.u64 %r2, %rd1;
-; CHECK-NEXT: add.s32 %r3, %r2, %r1;
-; CHECK-NEXT: or.b32 %r4, %r3, %r2;
-; CHECK-NEXT: st.param.b32 [func_retval0+0], %r4;
+; CHECK-NEXT: or.b32 %r3, %r1, %r2;
+; CHECK-NEXT: st.param.b32 [func_retval0+0], %r3;
; CHECK-NEXT: ret;
%add = add i64 %a, %b
- store i64 %b, ptr null
+ store i64 %add, ptr null
%or = or i64 %add, %a
%trunc = trunc i64 %or to i32
ret i32 %trunc
More information about the llvm-commits
mailing list