[llvm] [NVPTX] Remove redundant addressing mode instrs (PR #128044)
Alex MacLean via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 20 12:17:36 PST 2025
https://github.com/AlexMaclean updated https://github.com/llvm/llvm-project/pull/128044
>From 422065b9f6f9aa95409442dc8bc5368cea33569d Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Thu, 20 Feb 2025 02:04:32 +0000
Subject: [PATCH 1/2] [NVPTX] Remove redundant addressing mode instrs
---
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 419 +++---------------
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h | 6 +-
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 114 -----
llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 30 --
.../Target/NVPTX/NVPTXReplaceImageHandles.cpp | 2 +-
.../NVPTX/expected-floating-point-literal.mir | 2 +-
.../floating-point-immediate-operands.mir | 8 +-
.../floating-point-invalid-type-error.mir | 2 +-
llvm/test/CodeGen/NVPTX/variadics-backend.ll | 36 +-
9 files changed, 78 insertions(+), 541 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index e96c1758676a1..6e990058958e0 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -13,8 +13,10 @@
#include "NVPTXISelDAGToDAG.h"
#include "NVPTX.h"
#include "NVPTXUtilities.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instructions.h"
@@ -964,7 +966,6 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
// Create the machine instruction DAG
SDValue N1 = N->getOperand(1);
- SDValue Addr;
SDValue Offset, Base;
std::optional<unsigned> Opcode;
MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
@@ -974,23 +975,17 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
getI32Imm(VecType, DL), getI32Imm(FromType, DL),
getI32Imm(FromTypeWidth, DL)});
- if (SelectDirectAddr(N1, Addr)) {
- Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar,
- NVPTX::LD_i32_avar, NVPTX::LD_i64_avar,
- NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
- if (!Opcode)
- return false;
- Ops.append({Addr, Chain});
- } else if (PointerSize == 64 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
- : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
+ if (SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
if (!Opcode)
return false;
Ops.append({Base, Offset, Chain});
- } else if (PointerSize == 64 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
- : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
+ } else {
+ PointerSize == 64 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
+ : SelectADDRri(N1.getNode(), N1, Base, Offset);
+
if (PointerSize == 64)
Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
@@ -1003,19 +998,6 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
if (!Opcode)
return false;
Ops.append({Base, Offset, Chain});
- } else {
- if (PointerSize == 64)
- Opcode =
- pickOpcodeForVT(TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
- NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64,
- NVPTX::LD_f32_areg_64, NVPTX::LD_f64_areg_64);
- else
- Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg,
- NVPTX::LD_i32_areg, NVPTX::LD_i64_areg,
- NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
- if (!Opcode)
- return false;
- Ops.append({N1, Chain});
}
SDNode *NVPTXLD =
@@ -1102,7 +1084,7 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
}
SDValue Op1 = N->getOperand(1);
- SDValue Addr, Offset, Base;
+ SDValue Offset, Base;
std::optional<unsigned> Opcode;
SDNode *LD;
@@ -1111,29 +1093,7 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
getI32Imm(VecType, DL), getI32Imm(FromType, DL),
getI32Imm(FromTypeWidth, DL)});
- if (SelectDirectAddr(Op1, Addr)) {
- switch (N->getOpcode()) {
- default:
- return false;
- case NVPTXISD::LoadV2:
- Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
- NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
- NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
- break;
- case NVPTXISD::LoadV4:
- Opcode =
- pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_avar,
- NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar,
- std::nullopt, NVPTX::LDV_f32_v4_avar, std::nullopt);
- break;
- }
- if (!Opcode)
- return false;
- Ops.append({Addr, Chain});
- } else if (PointerSize == 64
- ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
- : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
+ if (SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
switch (N->getOpcode()) {
default:
return false;
@@ -1153,9 +1113,10 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
if (!Opcode)
return false;
Ops.append({Base, Offset, Chain});
- } else if (PointerSize == 64
- ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
- : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
+ } else {
+ PointerSize == 64 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
+ : SelectADDRri(Op1.getNode(), Op1, Base, Offset);
+
if (PointerSize == 64) {
switch (N->getOpcode()) {
default:
@@ -1195,47 +1156,6 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
if (!Opcode)
return false;
Ops.append({Base, Offset, Chain});
- } else {
- if (PointerSize == 64) {
- switch (N->getOpcode()) {
- default:
- return false;
- case NVPTXISD::LoadV2:
- Opcode = pickOpcodeForVT(
- EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg_64,
- NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
- NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
- NVPTX::LDV_f64_v2_areg_64);
- break;
- case NVPTXISD::LoadV4:
- Opcode = pickOpcodeForVT(
- EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg_64,
- NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, std::nullopt,
- NVPTX::LDV_f32_v4_areg_64, std::nullopt);
- break;
- }
- } else {
- switch (N->getOpcode()) {
- default:
- return false;
- case NVPTXISD::LoadV2:
- Opcode =
- pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg,
- NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
- NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f32_v2_areg,
- NVPTX::LDV_f64_v2_areg);
- break;
- case NVPTXISD::LoadV4:
- Opcode =
- pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg,
- NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg,
- std::nullopt, NVPTX::LDV_f32_v4_areg, std::nullopt);
- break;
- }
- }
- if (!Opcode)
- return false;
- Ops.append({Op1, Chain});
}
LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
@@ -1344,8 +1264,10 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
return false;
SDValue Ops[] = { Addr, Chain };
LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops);
- } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
- : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
+ } else {
+ TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
+ : SelectADDRri(Op1.getNode(), Op1, Base, Offset);
+
if (TM.is64Bit()) {
switch (N->getOpcode()) {
default:
@@ -1457,122 +1379,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
return false;
SDValue Ops[] = {Base, Offset, Chain};
LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops);
- } else {
- if (TM.is64Bit()) {
- switch (N->getOpcode()) {
- default:
- return false;
- case ISD::LOAD:
- Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
- NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
- NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
- NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
- NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
- NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
- break;
- case ISD::INTRINSIC_W_CHAIN:
- Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
- NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
- NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
- NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
- NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
- NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
- break;
- case NVPTXISD::LoadV2:
- Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
- NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
- NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
- NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
- NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
- NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
- break;
- case NVPTXISD::LDUV2:
- Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
- NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
- NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
- NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
- NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
- NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
- break;
- case NVPTXISD::LoadV4:
- Opcode = pickOpcodeForVT(
- EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64,
- NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
- NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, std::nullopt,
- NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, std::nullopt);
- break;
- case NVPTXISD::LDUV4:
- Opcode = pickOpcodeForVT(
- EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64,
- NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
- NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, std::nullopt,
- NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, std::nullopt);
- break;
- }
- } else {
- switch (N->getOpcode()) {
- default:
- return false;
- case ISD::LOAD:
- Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
- NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
- NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
- NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
- NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
- NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
- break;
- case ISD::INTRINSIC_W_CHAIN:
- Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
- NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
- NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
- NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
- NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
- NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
- break;
- case NVPTXISD::LoadV2:
- Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
- NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
- NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
- NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
- NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
- NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
- break;
- case NVPTXISD::LDUV2:
- Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
- NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
- NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
- NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
- NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
- NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
- break;
- case NVPTXISD::LoadV4:
- Opcode = pickOpcodeForVT(
- EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32,
- NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
- NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, std::nullopt,
- NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, std::nullopt);
- break;
- case NVPTXISD::LDUV4:
- Opcode = pickOpcodeForVT(
- EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32,
- NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
- NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, std::nullopt,
- NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, std::nullopt);
- break;
- }
- }
- if (!Opcode)
- return false;
- SDValue Ops[] = { Op1, Chain };
- LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops);
}
// For automatic generation of LDG (through SelectLoad[Vector], not the
@@ -1658,7 +1464,6 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
// Create the machine instruction DAG
SDValue Value = PlainStore ? PlainStore->getValue() : AtomicStore->getVal();
SDValue BasePtr = ST->getBasePtr();
- SDValue Addr;
SDValue Offset, Base;
std::optional<unsigned> Opcode;
MVT::SimpleValueType SourceVT =
@@ -1669,25 +1474,16 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
getI32Imm(CodeAddrSpace, DL), getI32Imm(VecType, DL),
getI32Imm(ToType, DL), getI32Imm(ToTypeWidth, DL)});
- if (SelectDirectAddr(BasePtr, Addr)) {
- Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
- NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
- NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
- if (!Opcode)
- return false;
- Ops.append({Addr, Chain});
- } else if (PointerSize == 64
- ? SelectADDRsi64(BasePtr.getNode(), BasePtr, Base, Offset)
- : SelectADDRsi(BasePtr.getNode(), BasePtr, Base, Offset)) {
+ if (SelectADDRsi(BasePtr.getNode(), BasePtr, Base, Offset)) {
Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
if (!Opcode)
return false;
Ops.append({Base, Offset, Chain});
- } else if (PointerSize == 64
- ? SelectADDRri64(BasePtr.getNode(), BasePtr, Base, Offset)
- : SelectADDRri(BasePtr.getNode(), BasePtr, Base, Offset)) {
+ } else {
+ PointerSize == 64 ? SelectADDRri64(BasePtr.getNode(), BasePtr, Base, Offset)
+ : SelectADDRri(BasePtr.getNode(), BasePtr, Base, Offset);
if (PointerSize == 64)
Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
@@ -1700,19 +1496,6 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
if (!Opcode)
return false;
Ops.append({Base, Offset, Chain});
- } else {
- if (PointerSize == 64)
- Opcode =
- pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
- NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
- NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
- else
- Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
- NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
- NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
- if (!Opcode)
- return false;
- Ops.append({BasePtr, Chain});
}
SDNode *NVPTXST = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops);
@@ -1728,7 +1511,7 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
SDValue Op1 = N->getOperand(1);
- SDValue Addr, Offset, Base;
+ SDValue Offset, Base;
std::optional<unsigned> Opcode;
SDNode *ST;
EVT EltVT = Op1.getValueType();
@@ -1785,26 +1568,7 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
getI32Imm(CodeAddrSpace, DL), getI32Imm(VecType, DL),
getI32Imm(ToType, DL), getI32Imm(ToTypeWidth, DL)});
- if (SelectDirectAddr(N2, Addr)) {
- switch (N->getOpcode()) {
- default:
- return false;
- case NVPTXISD::StoreV2:
- Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
- NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
- NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
- break;
- case NVPTXISD::StoreV4:
- Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::STV_i8_v4_avar, NVPTX::STV_i16_v4_avar,
- NVPTX::STV_i32_v4_avar, std::nullopt,
- NVPTX::STV_f32_v4_avar, std::nullopt);
- break;
- }
- Ops.push_back(Addr);
- } else if (PointerSize == 64 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
- : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
+ if (SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
switch (N->getOpcode()) {
default:
return false;
@@ -1822,8 +1586,10 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
break;
}
Ops.append({Base, Offset});
- } else if (PointerSize == 64 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
- : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
+ } else {
+ PointerSize == 64 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
+ : SelectADDRri(N2.getNode(), N2, Base, Offset);
+
if (PointerSize == 64) {
switch (N->getOpcode()) {
default:
@@ -1861,47 +1627,7 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
}
}
Ops.append({Base, Offset});
- } else {
- if (PointerSize == 64) {
- switch (N->getOpcode()) {
- default:
- return false;
- case NVPTXISD::StoreV2:
- Opcode = pickOpcodeForVT(
- EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg_64,
- NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
- NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
- NVPTX::STV_f64_v2_areg_64);
- break;
- case NVPTXISD::StoreV4:
- Opcode = pickOpcodeForVT(
- EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg_64,
- NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, std::nullopt,
- NVPTX::STV_f32_v4_areg_64, std::nullopt);
- break;
- }
- } else {
- switch (N->getOpcode()) {
- default:
- return false;
- case NVPTXISD::StoreV2:
- Opcode =
- pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg,
- NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
- NVPTX::STV_i64_v2_areg, NVPTX::STV_f32_v2_areg,
- NVPTX::STV_f64_v2_areg);
- break;
- case NVPTXISD::StoreV4:
- Opcode =
- pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg,
- NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg,
- std::nullopt, NVPTX::STV_f32_v4_areg, std::nullopt);
- break;
- }
- }
- Ops.push_back(N2);
}
-
if (!Opcode)
return false;
@@ -2581,93 +2307,56 @@ bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
return false;
}
-// symbol+offset
-bool NVPTXDAGToDAGISel::SelectADDRsi_imp(SDNode *OpNode, SDValue Addr,
- SDValue &Base, SDValue &Offset,
- MVT VT) {
- std::function<std::optional<uint64_t>(SDValue, uint64_t)>
- FindRootAddressAndTotalOffset =
- [&](SDValue Addr,
- uint64_t AccumulatedOffset) -> std::optional<uint64_t> {
- if (isAddLike(Addr)) {
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
- SDValue PossibleBaseAddr = Addr.getOperand(0);
- AccumulatedOffset += CN->getZExtValue();
- if (SelectDirectAddr(PossibleBaseAddr, Base))
- return AccumulatedOffset;
- return FindRootAddressAndTotalOffset(PossibleBaseAddr,
- AccumulatedOffset);
- }
- }
- return std::nullopt;
- };
- if (auto AccumulatedOffset = FindRootAddressAndTotalOffset(Addr, 0)) {
- Offset = CurDAG->getTargetConstant(*AccumulatedOffset, SDLoc(OpNode), VT);
- return true;
+static SDValue accumulateOffset(SDValue &Addr, SDLoc DL, SelectionDAG *DAG) {
+ APInt AccumulatedOffset(64u, 0);
+ while (isAddLike(Addr)) {
+ const auto *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ if (!CN)
+ break;
+
+ const APInt CI = CN->getAPIntValue().sext(64);
+ if (!(CI + AccumulatedOffset).isSignedIntN(32))
+ break;
+
+ AccumulatedOffset += CI;
+ Addr = Addr->getOperand(0);
}
- return false;
+ return DAG->getSignedTargetConstant(AccumulatedOffset.getSExtValue(), DL,
+ MVT::i32);
}
// symbol+offset
bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
SDValue &Base, SDValue &Offset) {
- return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
-}
-
-// symbol+offset
-bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
- SDValue &Base, SDValue &Offset) {
- return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
+ Offset = accumulateOffset(Addr, SDLoc(OpNode), CurDAG);
+ return SelectDirectAddr(Addr, Base);
}
// register+offset
-bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr,
+void NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr,
SDValue &Base, SDValue &Offset,
MVT VT) {
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
- Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), VT);
- return true;
- }
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // direct calls.
- if (isAddLike(Addr)) {
- if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
- return false;
- }
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
- if (FrameIndexSDNode *FIN =
- dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
- // Constant offset from frame ref.
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
- else
- Base = Addr.getOperand(0);
-
- // Offset must fit in a 32-bit signed int in PTX [register+offset] address
- // mode
- if (!CN->getAPIntValue().isSignedIntN(32))
- return false;
-
- Offset = CurDAG->getSignedTargetConstant(CN->getSExtValue(),
- SDLoc(OpNode), MVT::i32);
- return true;
- }
+ Offset = accumulateOffset(Addr, SDLoc(OpNode), CurDAG);
+ if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
+ return;
}
- return false;
+ Base = Addr;
}
// register+offset
bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
SDValue &Base, SDValue &Offset) {
- return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
+ SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
+ return true;
}
// register+offset
bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
SDValue &Base, SDValue &Offset) {
- return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
+ SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
+ return true;
}
bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 8dc6bc86c6828..1d02ae333c86b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -107,18 +107,14 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
// Match direct address complex pattern.
bool SelectDirectAddr(SDValue N, SDValue &Address);
- bool SelectADDRri_imp(SDNode *OpNode, SDValue Addr, SDValue &Base,
+ void SelectADDRri_imp(SDNode *OpNode, SDValue Addr, SDValue &Base,
SDValue &Offset, MVT VT);
bool SelectADDRri(SDNode *OpNode, SDValue Addr, SDValue &Base,
SDValue &Offset);
bool SelectADDRri64(SDNode *OpNode, SDValue Addr, SDValue &Base,
SDValue &Offset);
- bool SelectADDRsi_imp(SDNode *OpNode, SDValue Addr, SDValue &Base,
- SDValue &Offset, MVT VT);
bool SelectADDRsi(SDNode *OpNode, SDValue Addr, SDValue &Base,
SDValue &Offset);
- bool SelectADDRsi64(SDNode *OpNode, SDValue Addr, SDValue &Base,
- SDValue &Offset);
bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 7d9697e40e6ab..f75a70409340f 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -2754,24 +2754,6 @@ foreach vt = [v2f16, v2bf16, v2i16, v4i8] in {
// Load / Store Handling
//
multiclass LD<NVPTXRegClass regclass> {
- def _avar : NVPTXInst<
- (outs regclass:$dst),
- (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
- i32imm:$fromWidth, imem:$addr),
- "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t$dst, [$addr];", []>;
- def _areg : NVPTXInst<
- (outs regclass:$dst),
- (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
- i32imm:$fromWidth, Int32Regs:$addr),
- "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t$dst, [$addr];", []>;
- def _areg_64 : NVPTXInst<
- (outs regclass:$dst),
- (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
- i32imm:$fromWidth, Int64Regs:$addr),
- "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t$dst, [$addr];", []>;
def _ari : NVPTXInst<
(outs regclass:$dst),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
@@ -2802,24 +2784,6 @@ let mayLoad=1, hasSideEffects=0 in {
}
multiclass ST<NVPTXRegClass regclass> {
- def _avar : NVPTXInst<
- (outs),
- (ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
- LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, imem:$addr),
- "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
- " \t[$addr], $src;", []>;
- def _areg : NVPTXInst<
- (outs),
- (ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
- LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr),
- "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
- " \t[$addr], $src;", []>;
- def _areg_64 : NVPTXInst<
- (outs),
- (ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
- LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr),
- "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
- " \t[$addr], $src;", []>;
def _ari : NVPTXInst<
(outs),
(ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
@@ -2856,24 +2820,6 @@ let mayStore=1, hasSideEffects=0 in {
// elementization happens at the machine instruction level, so the following
// instructions never appear in the DAG.
multiclass LD_VEC<NVPTXRegClass regclass> {
- def _v2_avar : NVPTXInst<
- (outs regclass:$dst1, regclass:$dst2),
- (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, imem:$addr),
- "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2}}, [$addr];", []>;
- def _v2_areg : NVPTXInst<
- (outs regclass:$dst1, regclass:$dst2),
- (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
- "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2}}, [$addr];", []>;
- def _v2_areg_64 : NVPTXInst<
- (outs regclass:$dst1, regclass:$dst2),
- (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
- "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2}}, [$addr];", []>;
def _v2_ari : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
@@ -2892,24 +2838,6 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t{{$dst1, $dst2}}, [$addr$offset];", []>;
- def _v4_avar : NVPTXInst<
- (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
- (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, imem:$addr),
- "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>;
- def _v4_areg : NVPTXInst<
- (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
- (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
- "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>;
- def _v4_areg_64 : NVPTXInst<
- (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
- (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
- "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>;
def _v4_ari : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
@@ -2939,27 +2867,6 @@ let mayLoad=1, hasSideEffects=0 in {
}
multiclass ST_VEC<NVPTXRegClass regclass> {
- def _v2_avar : NVPTXInst<
- (outs),
- (ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
- LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth,
- imem:$addr),
- "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t[$addr], {{$src1, $src2}};", []>;
- def _v2_areg : NVPTXInst<
- (outs),
- (ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
- LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth,
- Int32Regs:$addr),
- "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t[$addr], {{$src1, $src2}};", []>;
- def _v2_areg_64 : NVPTXInst<
- (outs),
- (ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
- LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth,
- Int64Regs:$addr),
- "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t[$addr], {{$src1, $src2}};", []>;
def _v2_ari : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
@@ -2981,27 +2888,6 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
imem:$addr, Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t[$addr$offset], {{$src1, $src2}};", []>;
- def _v4_avar : NVPTXInst<
- (outs),
- (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
- LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, imem:$addr),
- "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t[$addr], {{$src1, $src2, $src3, $src4}};", []>;
- def _v4_areg : NVPTXInst<
- (outs),
- (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
- LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
- "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t[$addr], {{$src1, $src2, $src3, $src4}};", []>;
- def _v4_areg_64 : NVPTXInst<
- (outs),
- (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
- LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
- "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t[$addr], {{$src1, $src2, $src3, $src4}};", []>;
def _v4_ari : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index f20502521829e..842c403103785 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -2693,12 +2693,6 @@ defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
// Scalar
multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
- def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
- !strconcat("ldu.global.", TyStr),
- []>, Requires<[hasLDU]>;
- def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
- !strconcat("ldu.global.", TyStr),
- []>, Requires<[hasLDU]>;
def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
!strconcat("ldu.global.", TyStr),
[]>, Requires<[hasLDU]>;
@@ -2721,12 +2715,6 @@ defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
// Elementized vector ldu
multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
- def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
- (ins Int32Regs:$src),
- !strconcat("ldu.global.", TyStr), []>;
- def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
- (ins Int64Regs:$src),
- !strconcat("ldu.global.", TyStr), []>;
def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
(ins MEMri:$src),
!strconcat("ldu.global.", TyStr), []>;
@@ -2739,12 +2727,6 @@ multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
}
multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
- def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
- regclass:$dst4), (ins Int32Regs:$src),
- !strconcat("ldu.global.", TyStr), []>;
- def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
- regclass:$dst4), (ins Int64Regs:$src),
- !strconcat("ldu.global.", TyStr), []>;
def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
regclass:$dst4), (ins MEMri:$src),
!strconcat("ldu.global.", TyStr), []>;
@@ -2796,12 +2778,6 @@ defm INT_PTX_LDU_G_v4f32_ELE
// during the lifetime of the kernel.
multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
- def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
- !strconcat("ld.global.nc.", TyStr),
- []>, Requires<[hasLDG]>;
- def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
- !strconcat("ld.global.nc.", TyStr),
- []>, Requires<[hasLDG]>;
def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
!strconcat("ld.global.nc.", TyStr),
[]>, Requires<[hasLDG]>;
@@ -2830,12 +2806,6 @@ defm INT_PTX_LDG_GLOBAL_f64
// Elementized vector ldg
multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
- def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
- (ins Int32Regs:$src),
- !strconcat("ld.global.nc.", TyStr), []>;
- def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
- (ins Int64Regs:$src),
- !strconcat("ld.global.nc.", TyStr), []>;
def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
(ins MEMri:$src),
!strconcat("ld.global.nc.", TyStr), []>;
diff --git a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
index a3e3978cbbfe2..528a1ad288b82 100644
--- a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -1809,7 +1809,7 @@ findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) {
MachineInstr &TexHandleDef = *MRI.getVRegDef(Op.getReg());
switch (TexHandleDef.getOpcode()) {
- case NVPTX::LD_i64_avar: {
+ case NVPTX::LD_i64_asi: {
// The handle is a parameter value being loaded, replace with the
// parameter symbol
const NVPTXTargetMachine &TM =
diff --git a/llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir b/llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir
index d6f792b354e04..62ede3b9eef3b 100644
--- a/llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir
+++ b/llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir
@@ -16,7 +16,7 @@ registers:
- { id: 1, class: float32regs }
body: |
bb.0.entry:
- %0 = LD_f32_avar 0, 4, 1, 2, 32, &test_param_0
+ %0 = LD_f32_asi 0, 4, 1, 2, 32, &test_param_0, 0
; CHECK: [[@LINE+1]]:33: expected a floating point literal
%1 = FADD_rnf32ri %0, float 3
StoreRetvalF32 %1, 0
diff --git a/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir b/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir
index 9122ef7e60136..69c1e25a06024 100644
--- a/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir
+++ b/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir
@@ -40,9 +40,9 @@ registers:
- { id: 7, class: float32regs }
body: |
bb.0.entry:
- %0 = LD_f32_avar 0, 0, 4, 1, 2, 32, &test_param_0
+ %0 = LD_f32_asi 0, 0, 4, 1, 2, 32, &test_param_0, 0
%1 = CVT_f64_f32 %0, 0
- %2 = LD_i32_avar 0, 0, 4, 1, 0, 32, &test_param_1
+ %2 = LD_i32_asi 0, 0, 4, 1, 0, 32, &test_param_1, 0
; CHECK: %3:float64regs = FADD_rnf64ri %1, double 3.250000e+00
%3 = FADD_rnf64ri %1, double 3.250000e+00
%4 = CVT_f32_f64 %3, 5
@@ -66,9 +66,9 @@ registers:
- { id: 7, class: float32regs }
body: |
bb.0.entry:
- %0 = LD_f32_avar 0, 0, 4, 1, 2, 32, &test2_param_0
+ %0 = LD_f32_asi 0, 0, 4, 1, 2, 32, &test2_param_0, 0
%1 = CVT_f64_f32 %0, 0
- %2 = LD_i32_avar 0, 0, 4, 1, 0, 32, &test2_param_1
+ %2 = LD_i32_asi 0, 0, 4, 1, 0, 32, &test2_param_1, 0
; CHECK: %3:float64regs = FADD_rnf64ri %1, double 0x7FF8000000000000
%3 = FADD_rnf64ri %1, double 0x7FF8000000000000
%4 = CVT_f32_f64 %3, 5
diff --git a/llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir b/llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir
index 6280d4e90ebf1..cc9a36509db33 100644
--- a/llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir
+++ b/llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir
@@ -16,7 +16,7 @@ registers:
- { id: 1, class: float32regs }
body: |
bb.0.entry:
- %0 = LD_f32_avar 0, 4, 1, 2, 32, &test_param_0
+ %0 = LD_f32_asi 0, 4, 1, 2, 32, &test_param_0, 0
; CHECK: [[@LINE+1]]:33: floating point constant does not have type 'float'
%1 = FADD_rnf32ri %0, float 0xH3C00
StoreRetvalF32 %1, 0
diff --git a/llvm/test/CodeGen/NVPTX/variadics-backend.ll b/llvm/test/CodeGen/NVPTX/variadics-backend.ll
index f7ed690efabcf..eda4121fee702 100644
--- a/llvm/test/CodeGen/NVPTX/variadics-backend.ll
+++ b/llvm/test/CodeGen/NVPTX/variadics-backend.ll
@@ -215,21 +215,18 @@ define dso_local i32 @bar() {
; CHECK-PTX-NEXT: .reg .b64 %SPL;
; CHECK-PTX-NEXT: .reg .b16 %rs<10>;
; CHECK-PTX-NEXT: .reg .b32 %r<4>;
-; CHECK-PTX-NEXT: .reg .b64 %rd<7>;
+; CHECK-PTX-NEXT: .reg .b64 %rd<4>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot3;
; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK-PTX-NEXT: mov.u64 %rd1, __const_$_bar_$_s1;
-; CHECK-PTX-NEXT: add.s64 %rd2, %rd1, 7;
-; CHECK-PTX-NEXT: ld.global.nc.u8 %rs1, [%rd2];
+; CHECK-PTX-NEXT: ld.global.nc.u8 %rs1, [%rd1+7];
; CHECK-PTX-NEXT: cvt.u16.u8 %rs2, %rs1;
; CHECK-PTX-NEXT: st.u8 [%SP+2], %rs2;
-; CHECK-PTX-NEXT: add.s64 %rd3, %rd1, 5;
-; CHECK-PTX-NEXT: ld.global.nc.u8 %rs3, [%rd3];
+; CHECK-PTX-NEXT: ld.global.nc.u8 %rs3, [%rd1+5];
; CHECK-PTX-NEXT: cvt.u16.u8 %rs4, %rs3;
-; CHECK-PTX-NEXT: add.s64 %rd4, %rd1, 6;
-; CHECK-PTX-NEXT: ld.global.nc.u8 %rs5, [%rd4];
+; CHECK-PTX-NEXT: ld.global.nc.u8 %rs5, [%rd1+6];
; CHECK-PTX-NEXT: cvt.u16.u8 %rs6, %rs5;
; CHECK-PTX-NEXT: shl.b16 %rs7, %rs6, 8;
; CHECK-PTX-NEXT: or.b16 %rs8, %rs7, %rs4;
@@ -238,14 +235,14 @@ define dso_local i32 @bar() {
; CHECK-PTX-NEXT: st.u32 [%SP+8], %r1;
; CHECK-PTX-NEXT: mov.b16 %rs9, 1;
; CHECK-PTX-NEXT: st.u8 [%SP+12], %rs9;
-; CHECK-PTX-NEXT: mov.b64 %rd5, 1;
-; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd5;
-; CHECK-PTX-NEXT: add.u64 %rd6, %SP, 8;
+; CHECK-PTX-NEXT: mov.b64 %rd2, 1;
+; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd2;
+; CHECK-PTX-NEXT: add.u64 %rd3, %SP, 8;
; CHECK-PTX-NEXT: { // callseq 1, 0
; CHECK-PTX-NEXT: .param .b32 param0;
; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
; CHECK-PTX-NEXT: .param .b64 param1;
-; CHECK-PTX-NEXT: st.param.b64 [param1], %rd6;
+; CHECK-PTX-NEXT: st.param.b64 [param1], %rd3;
; CHECK-PTX-NEXT: .param .b32 retval0;
; CHECK-PTX-NEXT: call.uni (retval0),
; CHECK-PTX-NEXT: variadics2,
@@ -384,7 +381,7 @@ define dso_local void @qux() {
; CHECK-PTX-NEXT: .reg .b64 %SP;
; CHECK-PTX-NEXT: .reg .b64 %SPL;
; CHECK-PTX-NEXT: .reg .b32 %r<3>;
-; CHECK-PTX-NEXT: .reg .b64 %rd<7>;
+; CHECK-PTX-NEXT: .reg .b64 %rd<6>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot7;
@@ -392,18 +389,17 @@ define dso_local void @qux() {
; CHECK-PTX-NEXT: ld.global.nc.u64 %rd1, [__const_$_qux_$_s];
; CHECK-PTX-NEXT: st.u64 [%SP], %rd1;
; CHECK-PTX-NEXT: mov.u64 %rd2, __const_$_qux_$_s;
-; CHECK-PTX-NEXT: add.s64 %rd3, %rd2, 8;
-; CHECK-PTX-NEXT: ld.global.nc.u64 %rd4, [%rd3];
-; CHECK-PTX-NEXT: st.u64 [%SP+8], %rd4;
-; CHECK-PTX-NEXT: mov.b64 %rd5, 1;
-; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd5;
-; CHECK-PTX-NEXT: add.u64 %rd6, %SP, 16;
+; CHECK-PTX-NEXT: ld.global.nc.u64 %rd3, [%rd2+8];
+; CHECK-PTX-NEXT: st.u64 [%SP+8], %rd3;
+; CHECK-PTX-NEXT: mov.b64 %rd4, 1;
+; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd4;
+; CHECK-PTX-NEXT: add.u64 %rd5, %SP, 16;
; CHECK-PTX-NEXT: { // callseq 3, 0
; CHECK-PTX-NEXT: .param .align 8 .b8 param0[16];
; CHECK-PTX-NEXT: st.param.b64 [param0], %rd1;
-; CHECK-PTX-NEXT: st.param.b64 [param0+8], %rd4;
+; CHECK-PTX-NEXT: st.param.b64 [param0+8], %rd3;
; CHECK-PTX-NEXT: .param .b64 param1;
-; CHECK-PTX-NEXT: st.param.b64 [param1], %rd6;
+; CHECK-PTX-NEXT: st.param.b64 [param1], %rd5;
; CHECK-PTX-NEXT: .param .b32 retval0;
; CHECK-PTX-NEXT: call.uni (retval0),
; CHECK-PTX-NEXT: variadics4,
>From 29aa2a8489ddc22b69f8d9feb3742d1b06ba5f8b Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Thu, 20 Feb 2025 20:17:22 +0000
Subject: [PATCH 2/2] address comments
---
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 52 +++++++++------------
1 file changed, 22 insertions(+), 30 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 6e990058958e0..6621aa06ac268 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -979,26 +979,23 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
- if (!Opcode)
- return false;
- Ops.append({Base, Offset, Chain});
} else {
- PointerSize == 64 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
- : SelectADDRri(N1.getNode(), N1, Base, Offset);
-
- if (PointerSize == 64)
+ if (PointerSize == 64) {
+ SelectADDRri64(N1.getNode(), N1, Base, Offset);
Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64,
NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
- else
+ } else {
+ SelectADDRri(N1.getNode(), N1, Base, Offset);
Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari,
NVPTX::LD_i32_ari, NVPTX::LD_i64_ari,
NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
- if (!Opcode)
- return false;
- Ops.append({Base, Offset, Chain});
+ }
}
+ if (!Opcode)
+ return false;
+ Ops.append({Base, Offset, Chain});
SDNode *NVPTXLD =
CurDAG->getMachineNode(*Opcode, DL, TargetVT, MVT::Other, Ops);
@@ -1114,10 +1111,8 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
return false;
Ops.append({Base, Offset, Chain});
} else {
- PointerSize == 64 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
- : SelectADDRri(Op1.getNode(), Op1, Base, Offset);
-
if (PointerSize == 64) {
+ SelectADDRri64(Op1.getNode(), Op1, Base, Offset);
switch (N->getOpcode()) {
default:
return false;
@@ -1136,6 +1131,7 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
break;
}
} else {
+ SelectADDRri(Op1.getNode(), Op1, Base, Offset);
switch (N->getOpcode()) {
default:
return false;
@@ -1265,10 +1261,8 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
SDValue Ops[] = { Addr, Chain };
LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops);
} else {
- TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
- : SelectADDRri(Op1.getNode(), Op1, Base, Offset);
-
if (TM.is64Bit()) {
+ SelectADDRri64(Op1.getNode(), Op1, Base, Offset);
switch (N->getOpcode()) {
default:
return false;
@@ -1324,6 +1318,7 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
break;
}
} else {
+ SelectADDRri(Op1.getNode(), Op1, Base, Offset);
switch (N->getOpcode()) {
default:
return false;
@@ -1478,25 +1473,23 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
- if (!Opcode)
- return false;
- Ops.append({Base, Offset, Chain});
} else {
- PointerSize == 64 ? SelectADDRri64(BasePtr.getNode(), BasePtr, Base, Offset)
- : SelectADDRri(BasePtr.getNode(), BasePtr, Base, Offset);
- if (PointerSize == 64)
+ if (PointerSize == 64) {
+ SelectADDRri64(BasePtr.getNode(), BasePtr, Base, Offset);
Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64,
NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
- else
+ } else {
+ SelectADDRri(BasePtr.getNode(), BasePtr, Base, Offset);
Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
- if (!Opcode)
- return false;
- Ops.append({Base, Offset, Chain});
+ }
}
+ if (!Opcode)
+ return false;
+ Ops.append({Base, Offset, Chain});
SDNode *NVPTXST = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops);
@@ -1587,10 +1580,8 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
}
Ops.append({Base, Offset});
} else {
- PointerSize == 64 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
- : SelectADDRri(N2.getNode(), N2, Base, Offset);
-
if (PointerSize == 64) {
+ SelectADDRri64(N2.getNode(), N2, Base, Offset);
switch (N->getOpcode()) {
default:
return false;
@@ -1609,6 +1600,7 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
break;
}
} else {
+ SelectADDRri(N2.getNode(), N2, Base, Offset);
switch (N->getOpcode()) {
default:
return false;
More information about the llvm-commits
mailing list