[llvm] [NVPTX] Combine addressing-mode variants of ld, st, wmma (PR #129102)
Alex MacLean via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 27 12:04:19 PST 2025
https://github.com/AlexMaclean updated https://github.com/llvm/llvm-project/pull/129102
>From ba1129b84d08ff5994253768aaf029ee90138d27 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Tue, 25 Feb 2025 18:03:59 +0000
Subject: [PATCH] [NVPTX] Combine addressing-mode varaints of ld, st, wmma
---
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 567 +++++-------------
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h | 12 +-
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 190 ++----
llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 206 +++----
.../Target/NVPTX/NVPTXReplaceImageHandles.cpp | 2 +-
.../NVPTX/expected-floating-point-literal.mir | 2 +-
.../floating-point-immediate-operands.mir | 8 +-
.../floating-point-invalid-type-error.mir | 2 +-
8 files changed, 294 insertions(+), 695 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 971a128aadfdb..08022104bfedf 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -930,8 +930,6 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
if (canLowerToLDG(LD, *Subtarget, CodeAddrSpace, MF)) {
return tryLDGLDU(N);
}
- unsigned int PointerSize =
- CurDAG->getDataLayout().getPointerSizeInBits(LD->getAddressSpace());
SDLoc DL(N);
SDValue Chain = N->getOperand(0);
@@ -964,37 +962,24 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
FromType = getLdStRegType(ScalarVT);
// Create the machine instruction DAG
- SDValue N1 = N->getOperand(1);
SDValue Offset, Base;
- std::optional<unsigned> Opcode;
- MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
-
- SmallVector<SDValue, 12> Ops({getI32Imm(Ordering, DL), getI32Imm(Scope, DL),
- getI32Imm(CodeAddrSpace, DL),
- getI32Imm(VecType, DL), getI32Imm(FromType, DL),
- getI32Imm(FromTypeWidth, DL)});
-
- if (SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
- Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
- NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
- NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
- } else {
- if (PointerSize == 64) {
- SelectADDRri64(N1.getNode(), N1, Base, Offset);
- Opcode =
- pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
- NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64,
- NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
- } else {
- SelectADDRri(N1.getNode(), N1, Base, Offset);
- Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari,
- NVPTX::LD_i32_ari, NVPTX::LD_i64_ari,
- NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
- }
- }
+ SelectADDR(N->getOperand(1), Base, Offset);
+ SDValue Ops[] = {getI32Imm(Ordering, DL),
+ getI32Imm(Scope, DL),
+ getI32Imm(CodeAddrSpace, DL),
+ getI32Imm(VecType, DL),
+ getI32Imm(FromType, DL),
+ getI32Imm(FromTypeWidth, DL),
+ Base,
+ Offset,
+ Chain};
+
+ const MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
+ const std::optional<unsigned> Opcode =
+ pickOpcodeForVT(TargetVT, NVPTX::LD_i8, NVPTX::LD_i16, NVPTX::LD_i32,
+ NVPTX::LD_i64, NVPTX::LD_f32, NVPTX::LD_f64);
if (!Opcode)
return false;
- Ops.append({Base, Offset, Chain});
SDNode *NVPTXLD =
CurDAG->getMachineNode(*Opcode, DL, TargetVT, MVT::Other, Ops);
@@ -1030,8 +1015,6 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
return tryLDGLDU(N);
}
- unsigned int PointerSize =
- CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace());
SDLoc DL(N);
SDValue Chain = N->getOperand(0);
@@ -1079,77 +1062,38 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
FromTypeWidth = 32;
}
- SDValue Op1 = N->getOperand(1);
SDValue Offset, Base;
- std::optional<unsigned> Opcode;
- SDNode *LD;
+ SelectADDR(N->getOperand(1), Base, Offset);
+ SDValue Ops[] = {getI32Imm(Ordering, DL),
+ getI32Imm(Scope, DL),
+ getI32Imm(CodeAddrSpace, DL),
+ getI32Imm(VecType, DL),
+ getI32Imm(FromType, DL),
+ getI32Imm(FromTypeWidth, DL),
+ Base,
+ Offset,
+ Chain};
- SmallVector<SDValue, 12> Ops({getI32Imm(Ordering, DL), getI32Imm(Scope, DL),
- getI32Imm(CodeAddrSpace, DL),
- getI32Imm(VecType, DL), getI32Imm(FromType, DL),
- getI32Imm(FromTypeWidth, DL)});
-
- if (SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
- switch (N->getOpcode()) {
- default:
- return false;
- case NVPTXISD::LoadV2:
- Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
- NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
- NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
- break;
- case NVPTXISD::LoadV4:
- Opcode =
- pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_asi,
- NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi,
- std::nullopt, NVPTX::LDV_f32_v4_asi, std::nullopt);
- break;
- }
- } else {
- if (PointerSize == 64) {
- SelectADDRri64(Op1.getNode(), Op1, Base, Offset);
- switch (N->getOpcode()) {
- default:
- return false;
- case NVPTXISD::LoadV2:
- Opcode =
- pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::LDV_i8_v2_ari_64, NVPTX::LDV_i16_v2_ari_64,
- NVPTX::LDV_i32_v2_ari_64, NVPTX::LDV_i64_v2_ari_64,
- NVPTX::LDV_f32_v2_ari_64, NVPTX::LDV_f64_v2_ari_64);
- break;
- case NVPTXISD::LoadV4:
- Opcode = pickOpcodeForVT(
- EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64,
- NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, std::nullopt,
- NVPTX::LDV_f32_v4_ari_64, std::nullopt);
- break;
- }
- } else {
- SelectADDRri(Op1.getNode(), Op1, Base, Offset);
- switch (N->getOpcode()) {
- default:
- return false;
- case NVPTXISD::LoadV2:
- Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
- NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
- NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
- break;
- case NVPTXISD::LoadV4:
- Opcode =
- pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari,
- NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari,
- std::nullopt, NVPTX::LDV_f32_v4_ari, std::nullopt);
- break;
- }
- }
+ std::optional<unsigned> Opcode;
+ switch (N->getOpcode()) {
+ default:
+ return false;
+ case NVPTXISD::LoadV2:
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2,
+ NVPTX::LDV_i16_v2, NVPTX::LDV_i32_v2, NVPTX::LDV_i64_v2,
+ NVPTX::LDV_f32_v2, NVPTX::LDV_f64_v2);
+ break;
+ case NVPTXISD::LoadV4:
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4,
+ NVPTX::LDV_i16_v4, NVPTX::LDV_i32_v4, std::nullopt,
+ NVPTX::LDV_f32_v4, std::nullopt);
+ break;
}
if (!Opcode)
return false;
- Ops.append({Base, Offset, Chain});
- LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
+
+ SDNode *LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
CurDAG->setNodeMemRefs(cast<MachineSDNode>(LD), {MemRef});
@@ -1197,176 +1141,58 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
SDValue Chain = N->getOperand(0);
std::optional<unsigned> Opcode;
- SDLoc DL(N);
- SDNode *LD;
- SDValue Base, Offset;
-
- if (SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
- switch (N->getOpcode()) {
- default:
- return false;
- case ISD::LOAD:
- Opcode = pickOpcodeForVT(
- EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_GLOBAL_i8asi,
- NVPTX::INT_PTX_LDG_GLOBAL_i16asi, NVPTX::INT_PTX_LDG_GLOBAL_i32asi,
- NVPTX::INT_PTX_LDG_GLOBAL_i64asi, NVPTX::INT_PTX_LDG_GLOBAL_f32asi,
- NVPTX::INT_PTX_LDG_GLOBAL_f64asi);
- break;
- case ISD::INTRINSIC_W_CHAIN:
- Opcode = pickOpcodeForVT(
- EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_GLOBAL_i8asi,
- NVPTX::INT_PTX_LDU_GLOBAL_i16asi, NVPTX::INT_PTX_LDU_GLOBAL_i32asi,
- NVPTX::INT_PTX_LDU_GLOBAL_i64asi, NVPTX::INT_PTX_LDU_GLOBAL_f32asi,
- NVPTX::INT_PTX_LDU_GLOBAL_f64asi);
- break;
- case NVPTXISD::LoadV2:
- Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::INT_PTX_LDG_G_v2i8_ELE_asi,
- NVPTX::INT_PTX_LDG_G_v2i16_ELE_asi,
- NVPTX::INT_PTX_LDG_G_v2i32_ELE_asi,
- NVPTX::INT_PTX_LDG_G_v2i64_ELE_asi,
- NVPTX::INT_PTX_LDG_G_v2f32_ELE_asi,
- NVPTX::INT_PTX_LDG_G_v2f64_ELE_asi);
- break;
- case NVPTXISD::LDUV2:
- Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::INT_PTX_LDU_G_v2i8_ELE_asi,
- NVPTX::INT_PTX_LDU_G_v2i16_ELE_asi,
- NVPTX::INT_PTX_LDU_G_v2i32_ELE_asi,
- NVPTX::INT_PTX_LDU_G_v2i64_ELE_asi,
- NVPTX::INT_PTX_LDU_G_v2f32_ELE_asi,
- NVPTX::INT_PTX_LDU_G_v2f64_ELE_asi);
- break;
- case NVPTXISD::LoadV4:
- Opcode = pickOpcodeForVT(
- EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_asi,
- NVPTX::INT_PTX_LDG_G_v4i16_ELE_asi,
- NVPTX::INT_PTX_LDG_G_v4i32_ELE_asi, std::nullopt,
- NVPTX::INT_PTX_LDG_G_v4f32_ELE_asi, std::nullopt);
- break;
- case NVPTXISD::LDUV4:
- Opcode = pickOpcodeForVT(
- EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_asi,
- NVPTX::INT_PTX_LDU_G_v4i16_ELE_asi,
- NVPTX::INT_PTX_LDU_G_v4i32_ELE_asi, std::nullopt,
- NVPTX::INT_PTX_LDU_G_v4f32_ELE_asi, std::nullopt);
- break;
- }
- } else {
- if (TM.is64Bit()) {
- SelectADDRri64(Op1.getNode(), Op1, Base, Offset);
- switch (N->getOpcode()) {
- default:
- return false;
- case ISD::LOAD:
- Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
- NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
- NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
- NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
- NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
- NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
- break;
- case ISD::INTRINSIC_W_CHAIN:
- Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
- NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
- NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
- NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
- NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
- NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
- break;
- case NVPTXISD::LoadV2:
- Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
- NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
- NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
- NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
- NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
- NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
- break;
- case NVPTXISD::LDUV2:
- Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
- NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
- NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
- NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
- NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
- NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
- break;
- case NVPTXISD::LoadV4:
- Opcode = pickOpcodeForVT(
- EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64,
- NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
- NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, std::nullopt,
- NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, std::nullopt);
- break;
- case NVPTXISD::LDUV4:
- Opcode = pickOpcodeForVT(
- EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64,
- NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
- NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, std::nullopt,
- NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, std::nullopt);
- break;
- }
- } else {
- SelectADDRri(Op1.getNode(), Op1, Base, Offset);
- switch (N->getOpcode()) {
- default:
- return false;
- case ISD::LOAD:
- Opcode = pickOpcodeForVT(
- EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
- NVPTX::INT_PTX_LDG_GLOBAL_i16ari, NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
- NVPTX::INT_PTX_LDG_GLOBAL_i64ari, NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
- NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
- break;
- case ISD::INTRINSIC_W_CHAIN:
- Opcode = pickOpcodeForVT(
- EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
- NVPTX::INT_PTX_LDU_GLOBAL_i16ari, NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
- NVPTX::INT_PTX_LDU_GLOBAL_i64ari, NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
- NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
- break;
- case NVPTXISD::LoadV2:
- Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
- NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
- NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
- NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
- NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
- NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
- break;
- case NVPTXISD::LDUV2:
- Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
- NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
- NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
- NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
- NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
- NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
- break;
- case NVPTXISD::LoadV4:
- Opcode = pickOpcodeForVT(
- EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32,
- NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
- NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, std::nullopt,
- NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, std::nullopt);
- break;
- case NVPTXISD::LDUV4:
- Opcode = pickOpcodeForVT(
- EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32,
- NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
- NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, std::nullopt,
- NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, std::nullopt);
- break;
- }
- }
+ switch (N->getOpcode()) {
+ default:
+ return false;
+ case ISD::LOAD:
+ Opcode = pickOpcodeForVT(
+ EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_GLOBAL_i8,
+ NVPTX::INT_PTX_LDG_GLOBAL_i16, NVPTX::INT_PTX_LDG_GLOBAL_i32,
+ NVPTX::INT_PTX_LDG_GLOBAL_i64, NVPTX::INT_PTX_LDG_GLOBAL_f32,
+ NVPTX::INT_PTX_LDG_GLOBAL_f64);
+ break;
+ case ISD::INTRINSIC_W_CHAIN:
+ Opcode = pickOpcodeForVT(
+ EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_GLOBAL_i8,
+ NVPTX::INT_PTX_LDU_GLOBAL_i16, NVPTX::INT_PTX_LDU_GLOBAL_i32,
+ NVPTX::INT_PTX_LDU_GLOBAL_i64, NVPTX::INT_PTX_LDU_GLOBAL_f32,
+ NVPTX::INT_PTX_LDU_GLOBAL_f64);
+ break;
+ case NVPTXISD::LoadV2:
+ Opcode = pickOpcodeForVT(
+ EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v2i8_ELE,
+ NVPTX::INT_PTX_LDG_G_v2i16_ELE, NVPTX::INT_PTX_LDG_G_v2i32_ELE,
+ NVPTX::INT_PTX_LDG_G_v2i64_ELE, NVPTX::INT_PTX_LDG_G_v2f32_ELE,
+ NVPTX::INT_PTX_LDG_G_v2f64_ELE);
+ break;
+ case NVPTXISD::LDUV2:
+ Opcode = pickOpcodeForVT(
+ EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v2i8_ELE,
+ NVPTX::INT_PTX_LDU_G_v2i16_ELE, NVPTX::INT_PTX_LDU_G_v2i32_ELE,
+ NVPTX::INT_PTX_LDU_G_v2i64_ELE, NVPTX::INT_PTX_LDU_G_v2f32_ELE,
+ NVPTX::INT_PTX_LDU_G_v2f64_ELE);
+ break;
+ case NVPTXISD::LoadV4:
+ Opcode = pickOpcodeForVT(
+ EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE,
+ NVPTX::INT_PTX_LDG_G_v4i16_ELE, NVPTX::INT_PTX_LDG_G_v4i32_ELE,
+ std::nullopt, NVPTX::INT_PTX_LDG_G_v4f32_ELE, std::nullopt);
+ break;
+ case NVPTXISD::LDUV4:
+ Opcode = pickOpcodeForVT(
+ EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE,
+ NVPTX::INT_PTX_LDU_G_v4i16_ELE, NVPTX::INT_PTX_LDU_G_v4i32_ELE,
+ std::nullopt, NVPTX::INT_PTX_LDU_G_v4f32_ELE, std::nullopt);
+ break;
}
if (!Opcode)
return false;
+
+ SDLoc DL(N);
+ SDValue Base, Offset;
+ SelectADDR(Op1, Base, Offset);
SDValue Ops[] = {Base, Offset, Chain};
- LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops);
+ SDNode *LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops);
// For automatic generation of LDG (through SelectLoad[Vector], not the
// intrinsics), we may have an extending load like:
@@ -1424,8 +1250,6 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
// Address Space Setting
unsigned int CodeAddrSpace = getCodeAddrSpace(ST);
- unsigned int PointerSize =
- CurDAG->getDataLayout().getPointerSizeInBits(ST->getAddressSpace());
SDLoc DL(N);
SDValue Chain = ST->getChain();
@@ -1450,38 +1274,28 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
// Create the machine instruction DAG
SDValue Value = PlainStore ? PlainStore->getValue() : AtomicStore->getVal();
- SDValue BasePtr = ST->getBasePtr();
+
SDValue Offset, Base;
- std::optional<unsigned> Opcode;
- MVT::SimpleValueType SourceVT =
+ SelectADDR(ST->getBasePtr(), Base, Offset);
+
+ SDValue Ops[] = {Value,
+ getI32Imm(Ordering, DL),
+ getI32Imm(Scope, DL),
+ getI32Imm(CodeAddrSpace, DL),
+ getI32Imm(VecType, DL),
+ getI32Imm(ToType, DL),
+ getI32Imm(ToTypeWidth, DL),
+ Base,
+ Offset,
+ Chain};
+
+ const MVT::SimpleValueType SourceVT =
Value.getNode()->getSimpleValueType(0).SimpleTy;
-
- SmallVector<SDValue, 12> Ops(
- {Value, getI32Imm(Ordering, DL), getI32Imm(Scope, DL),
- getI32Imm(CodeAddrSpace, DL), getI32Imm(VecType, DL),
- getI32Imm(ToType, DL), getI32Imm(ToTypeWidth, DL)});
-
- if (SelectADDRsi(BasePtr.getNode(), BasePtr, Base, Offset)) {
- Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
- NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
- NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
- } else {
- if (PointerSize == 64) {
- SelectADDRri64(BasePtr.getNode(), BasePtr, Base, Offset);
- Opcode =
- pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
- NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64,
- NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
- } else {
- SelectADDRri(BasePtr.getNode(), BasePtr, Base, Offset);
- Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
- NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
- NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
- }
- }
+ const std::optional<unsigned> Opcode =
+ pickOpcodeForVT(SourceVT, NVPTX::ST_i8, NVPTX::ST_i16, NVPTX::ST_i32,
+ NVPTX::ST_i64, NVPTX::ST_f32, NVPTX::ST_f64);
if (!Opcode)
return false;
- Ops.append({Base, Offset, Chain});
SDNode *NVPTXST = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops);
@@ -1496,9 +1310,6 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
SDValue Op1 = N->getOperand(1);
- SDValue Offset, Base;
- std::optional<unsigned> Opcode;
- SDNode *ST;
EVT EltVT = Op1.getValueType();
MemSDNode *MemSD = cast<MemSDNode>(N);
EVT StoreVT = MemSD->getMemoryVT();
@@ -1509,8 +1320,6 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
report_fatal_error("Cannot store to pointer that points to constant "
"memory space");
}
- unsigned int PointerSize =
- CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace());
SDLoc DL(N);
SDValue Chain = N->getOperand(0);
@@ -1549,72 +1358,35 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
ToTypeWidth = 32;
}
+ SDValue Offset, Base;
+ SelectADDR(N2, Base, Offset);
+
Ops.append({getI32Imm(Ordering, DL), getI32Imm(Scope, DL),
getI32Imm(CodeAddrSpace, DL), getI32Imm(VecType, DL),
- getI32Imm(ToType, DL), getI32Imm(ToTypeWidth, DL)});
+ getI32Imm(ToType, DL), getI32Imm(ToTypeWidth, DL), Base, Offset,
+ Chain});
- if (SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
- switch (N->getOpcode()) {
- default:
- return false;
- case NVPTXISD::StoreV2:
- Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
- NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
- NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
- break;
- case NVPTXISD::StoreV4:
- Opcode =
- pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_asi,
- NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi,
- std::nullopt, NVPTX::STV_f32_v4_asi, std::nullopt);
- break;
- }
- } else {
- if (PointerSize == 64) {
- SelectADDRri64(N2.getNode(), N2, Base, Offset);
- switch (N->getOpcode()) {
- default:
- return false;
- case NVPTXISD::StoreV2:
- Opcode =
- pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::STV_i8_v2_ari_64, NVPTX::STV_i16_v2_ari_64,
- NVPTX::STV_i32_v2_ari_64, NVPTX::STV_i64_v2_ari_64,
- NVPTX::STV_f32_v2_ari_64, NVPTX::STV_f64_v2_ari_64);
- break;
- case NVPTXISD::StoreV4:
- Opcode = pickOpcodeForVT(
- EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64,
- NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, std::nullopt,
- NVPTX::STV_f32_v4_ari_64, std::nullopt);
- break;
- }
- } else {
- SelectADDRri(N2.getNode(), N2, Base, Offset);
- switch (N->getOpcode()) {
- default:
- return false;
- case NVPTXISD::StoreV2:
- Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
- NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
- NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
- break;
- case NVPTXISD::StoreV4:
- Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::STV_i8_v4_ari, NVPTX::STV_i16_v4_ari,
- NVPTX::STV_i32_v4_ari, std::nullopt,
- NVPTX::STV_f32_v4_ari, std::nullopt);
- break;
- }
- }
+ std::optional<unsigned> Opcode;
+ switch (N->getOpcode()) {
+ default:
+ return false;
+ case NVPTXISD::StoreV2:
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2,
+ NVPTX::STV_i16_v2, NVPTX::STV_i32_v2, NVPTX::STV_i64_v2,
+ NVPTX::STV_f32_v2, NVPTX::STV_f64_v2);
+ break;
+ case NVPTXISD::StoreV4:
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4,
+ NVPTX::STV_i16_v4, NVPTX::STV_i32_v4, std::nullopt,
+ NVPTX::STV_f32_v4, std::nullopt);
+ break;
}
+
if (!Opcode)
return false;
- Ops.append({Base, Offset, Chain});
- ST = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops);
+ SDNode *ST = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops);
MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
CurDAG->setNodeMemRefs(cast<MachineSDNode>(ST), {MemRef});
@@ -2265,27 +2037,28 @@ static inline bool isAddLike(const SDValue V) {
(V->getOpcode() == ISD::OR && V->getFlags().hasDisjoint());
}
-// SelectDirectAddr - Match a direct address for DAG.
-// A direct address could be a globaladdress or externalsymbol.
-bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
+// selectBaseADDR - Match a dag node which will serve as the base address for an
+// ADDR operand pair.
+static SDValue selectBaseADDR(SDValue N, SelectionDAG *DAG) {
// Return true if TGA or ES.
if (N.getOpcode() == ISD::TargetGlobalAddress ||
- N.getOpcode() == ISD::TargetExternalSymbol) {
- Address = N;
- return true;
- }
- if (N.getOpcode() == NVPTXISD::Wrapper) {
- Address = N.getOperand(0);
- return true;
- }
+ N.getOpcode() == ISD::TargetExternalSymbol)
+ return N;
+
+ if (N.getOpcode() == NVPTXISD::Wrapper)
+ return N.getOperand(0);
+
// addrspacecast(MoveParam(arg_symbol) to addrspace(PARAM)) -> arg_symbol
- if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(N)) {
+ if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(N))
if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC &&
CastN->getDestAddressSpace() == ADDRESS_SPACE_PARAM &&
CastN->getOperand(0).getOpcode() == NVPTXISD::MoveParam)
- return SelectDirectAddr(CastN->getOperand(0).getOperand(0), Address);
- }
- return false;
+ return selectBaseADDR(CastN->getOperand(0).getOperand(0), DAG);
+
+ if (auto *FIN = dyn_cast<FrameIndexSDNode>(N))
+ return DAG->getTargetFrameIndex(FIN->getIndex(), FIN->getValueType(0));
+
+ return N;
}
static SDValue accumulateOffset(SDValue &Addr, SDLoc DL, SelectionDAG *DAG) {
@@ -2306,37 +2079,16 @@ static SDValue accumulateOffset(SDValue &Addr, SDLoc DL, SelectionDAG *DAG) {
MVT::i32);
}
-// symbol+offset
-bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
- SDValue &Base, SDValue &Offset) {
- Offset = accumulateOffset(Addr, SDLoc(OpNode), CurDAG);
- return SelectDirectAddr(Addr, Base);
-}
-
-// register+offset
-void NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr,
- SDValue &Base, SDValue &Offset,
- MVT VT) {
-
- Offset = accumulateOffset(Addr, SDLoc(OpNode), CurDAG);
- if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
- return;
- }
- Base = Addr;
-}
-
-// register+offset
-bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
- SDValue &Base, SDValue &Offset) {
- SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
- return true;
-}
-
-// register+offset
-bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
- SDValue &Base, SDValue &Offset) {
- SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
+// Select a pair of operands which represnent a valid PTX address, this could be
+// one of the following things:
+// - [var] - Offset is simply set to 0
+// - [reg] - Offset is simply set to 0
+// - [reg+immOff]
+// - [var+immOff]
+bool NVPTXDAGToDAGISel::SelectADDR(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ Offset = accumulateOffset(Addr, SDLoc(Addr), CurDAG);
+ Base = selectBaseADDR(Addr, CurDAG);
return true;
}
@@ -2365,12 +2117,7 @@ bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
default:
return true;
case InlineAsm::ConstraintCode::m: // memory
- if (SelectDirectAddr(Op, Op0)) {
- OutOps.push_back(Op0);
- OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
- return false;
- }
- if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
+ if (SelectADDR(Op, Op0, Op1)) {
OutOps.push_back(Op0);
OutOps.push_back(Op1);
return false;
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 1d02ae333c86b..0a33001249e7e 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -104,17 +104,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
}
- // Match direct address complex pattern.
- bool SelectDirectAddr(SDValue N, SDValue &Address);
-
- void SelectADDRri_imp(SDNode *OpNode, SDValue Addr, SDValue &Base,
- SDValue &Offset, MVT VT);
- bool SelectADDRri(SDNode *OpNode, SDValue Addr, SDValue &Base,
- SDValue &Offset);
- bool SelectADDRri64(SDNode *OpNode, SDValue Addr, SDValue &Base,
- SDValue &Offset);
- bool SelectADDRsi(SDNode *OpNode, SDValue Addr, SDValue &Base,
- SDValue &Offset);
+ bool SelectADDR(SDValue Addr, SDValue &Base, SDValue &Offset);
bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 6a0f708021a16..36a0a06bdb8aa 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1917,27 +1917,15 @@ defm SET_f64 : SET<"f64", Float64Regs, f64imm>;
// Data Movement (Load / Store, Move)
//-----------------------------------
-let WantsRoot = true in {
- def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex]>;
- def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri64", [frameindex]>;
-}
-def ADDRvar : ComplexPattern<iPTR, 1, "SelectDirectAddr", [], []>;
+def addr : ComplexPattern<pAny, 2, "SelectADDR">;
-def MEMri : Operand<i32> {
- let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops Int32Regs, i32imm);
-}
-def MEMri64 : Operand<i64> {
- let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops Int64Regs, i64imm);
-}
-
-def imem : Operand<iPTR> {
+def ADDR_base : Operand<pAny> {
let PrintMethod = "printOperand";
}
-def imemAny : Operand<pAny> {
- let PrintMethod = "printOperand";
+def ADDR : Operand<pAny> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops ADDR_base, i32imm);
}
def LdStCode : Operand<i32> {
@@ -1956,10 +1944,10 @@ def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
// Load a memory address into a u32 or u64 register.
-def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a),
+def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins ADDR_base:$a),
"mov.u32 \t$dst, $a;",
[(set i32:$dst, (Wrapper tglobaladdr:$a))]>;
-def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a),
+def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins ADDR_base:$a),
"mov.u64 \t$dst, $a;",
[(set i64:$dst, (Wrapper tglobaladdr:$a))]>;
@@ -2021,12 +2009,17 @@ def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOV64ri texternalsym:$dst)>;
//---- Copy Frame Index ----
-def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr),
- "add.u32 \t$dst, ${addr:add};",
- [(set i32:$dst, ADDRri:$addr)]>;
-def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr),
- "add.u64 \t$dst, ${addr:add};",
- [(set i64:$dst, ADDRri64:$addr)]>;
+def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins ADDR:$addr),
+ "add.u32 \t$dst, ${addr:add};", []>;
+def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins ADDR:$addr),
+ "add.u64 \t$dst, ${addr:add};", []>;
+
+def to_tframeindex : SDNodeXForm<frameindex, [{
+ return CurDAG->getTargetFrameIndex(N->getIndex(), N->getValueType(0));
+}]>;
+
+def : Pat<(i32 frameindex:$fi), (LEA_ADDRi (to_tframeindex $fi), 0)>;
+def : Pat<(i64 frameindex:$fi), (LEA_ADDRi64 (to_tframeindex $fi), 0)>;
//-----------------------------------
// Comparison and Selection
@@ -2660,7 +2653,7 @@ def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ",
def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a",
[(LastCallArg (i32 1), (i32 imm:$a))]>;
-def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr), "$addr, ",
+def CallVoidInst : NVPTXInst<(outs), (ins ADDR_base:$addr), "$addr, ",
[(CallVoid (Wrapper tglobaladdr:$addr))]>;
def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr), "$addr, ",
[(CallVoid i32:$addr)]>;
@@ -2753,109 +2746,56 @@ foreach vt = [v2f16, v2bf16, v2i16, v4i8] in {
//
// Load / Store Handling
//
-multiclass LD<NVPTXRegClass regclass> {
- def _ari : NVPTXInst<
+class LD<NVPTXRegClass regclass>
+ : NVPTXInst<
(outs regclass:$dst),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
- i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset),
- "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t$dst, [$addr$offset];", []>;
- def _ari_64 : NVPTXInst<
- (outs regclass:$dst),
- (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset),
- "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t$dst, [$addr$offset];", []>;
- def _asi : NVPTXInst<
- (outs regclass:$dst),
- (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset),
+ i32imm:$fromWidth, ADDR:$addr),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t$dst, [$addr$offset];", []>;
-}
+ "\t$dst, [$addr];", []>;
let mayLoad=1, hasSideEffects=0 in {
- defm LD_i8 : LD<Int16Regs>;
- defm LD_i16 : LD<Int16Regs>;
- defm LD_i32 : LD<Int32Regs>;
- defm LD_i64 : LD<Int64Regs>;
- defm LD_f32 : LD<Float32Regs>;
- defm LD_f64 : LD<Float64Regs>;
+ def LD_i8 : LD<Int16Regs>;
+ def LD_i16 : LD<Int16Regs>;
+ def LD_i32 : LD<Int32Regs>;
+ def LD_i64 : LD<Int64Regs>;
+ def LD_f32 : LD<Float32Regs>;
+ def LD_f64 : LD<Float64Regs>;
}
-multiclass ST<NVPTXRegClass regclass> {
- def _ari : NVPTXInst<
+class ST<NVPTXRegClass regclass>
+ : NVPTXInst<
(outs),
(ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
- LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr,
- Offseti32imm:$offset),
+ LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, ADDR:$addr),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
- " \t[$addr$offset], $src;", []>;
- def _ari_64 : NVPTXInst<
- (outs),
- (ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
- LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr,
- Offseti32imm:$offset),
- "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
- " \t[$addr$offset], $src;", []>;
- def _asi : NVPTXInst<
- (outs),
- (ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
- LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, imem:$addr,
- Offseti32imm:$offset),
- "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
- " \t[$addr$offset], $src;", []>;
-}
+ " \t[$addr], $src;", []>;
let mayStore=1, hasSideEffects=0 in {
- defm ST_i8 : ST<Int16Regs>;
- defm ST_i16 : ST<Int16Regs>;
- defm ST_i32 : ST<Int32Regs>;
- defm ST_i64 : ST<Int64Regs>;
- defm ST_f32 : ST<Float32Regs>;
- defm ST_f64 : ST<Float64Regs>;
+ def ST_i8 : ST<Int16Regs>;
+ def ST_i16 : ST<Int16Regs>;
+ def ST_i32 : ST<Int32Regs>;
+ def ST_i64 : ST<Int64Regs>;
+ def ST_f32 : ST<Float32Regs>;
+ def ST_f64 : ST<Float64Regs>;
}
// The following is used only in and after vector elementizations. Vector
// elementization happens at the machine instruction level, so the following
// instructions never appear in the DAG.
multiclass LD_VEC<NVPTXRegClass regclass> {
- def _v2_ari : NVPTXInst<
- (outs regclass:$dst1, regclass:$dst2),
- (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset),
- "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2}}, [$addr$offset];", []>;
- def _v2_ari_64 : NVPTXInst<
+ def _v2 : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, ADDR:$addr),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2}}, [$addr$offset];", []>;
- def _v2_asi : NVPTXInst<
- (outs regclass:$dst1, regclass:$dst2),
- (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset),
- "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2}}, [$addr$offset];", []>;
- def _v4_ari : NVPTXInst<
+ "\t{{$dst1, $dst2}}, [$addr];", []>;
+ def _v4 : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, ADDR:$addr),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>;
- def _v4_ari_64 : NVPTXInst<
- (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
- (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset),
- "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>;
- def _v4_asi : NVPTXInst<
- (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
- (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset),
- "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>;
+ "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>;
}
let mayLoad=1, hasSideEffects=0 in {
defm LDV_i8 : LD_VEC<Int16Regs>;
@@ -2867,48 +2807,20 @@ let mayLoad=1, hasSideEffects=0 in {
}
multiclass ST_VEC<NVPTXRegClass regclass> {
- def _v2_ari : NVPTXInst<
- (outs),
- (ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
- LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth,
- Int32Regs:$addr, Offseti32imm:$offset),
- "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t[$addr$offset], {{$src1, $src2}};", []>;
- def _v2_ari_64 : NVPTXInst<
+ def _v2 : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth,
- Int64Regs:$addr, Offseti32imm:$offset),
+ ADDR:$addr),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t[$addr$offset], {{$src1, $src2}};", []>;
- def _v2_asi : NVPTXInst<
- (outs),
- (ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
- LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth,
- imem:$addr, Offseti32imm:$offset),
- "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t[$addr$offset], {{$src1, $src2}};", []>;
- def _v4_ari : NVPTXInst<
+ "\t[$addr], {{$src1, $src2}};", []>;
+ def _v4 : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, ADDR:$addr),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>;
- def _v4_ari_64 : NVPTXInst<
- (outs),
- (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
- LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset),
- "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>;
- def _v4_asi : NVPTXInst<
- (outs),
- (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
- LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset),
- "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}"
- "$fromWidth \t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>;
+ "\t[$addr], {{$src1, $src2, $src3, $src4}};", []>;
}
let mayStore=1, hasSideEffects=0 in {
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 3373f9e90524f..d339afa39d891 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -2717,65 +2717,46 @@ defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
// Scalar
-multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
- def asi: NVPTXInst<(outs regclass:$result), (ins imemAny:$src, Offseti32imm:$offset),
- "ldu.global." # TyStr # " \t$result, [$src$offset];",
- []>, Requires<[hasLDU]>;
- def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
+class LDU_G<string TyStr, NVPTXRegClass regclass>
+ : NVPTXInst<(outs regclass:$result), (ins ADDR:$src),
"ldu.global." # TyStr # " \t$result, [$src];",
[]>, Requires<[hasLDU]>;
- def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
- "ldu.global." # TyStr # " \t$result, [$src];",
- []>, Requires<[hasLDU]>;
-}
-defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8", Int16Regs>;
-defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16", Int16Regs>;
-defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32", Int32Regs>;
-defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64", Int64Regs>;
-defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32", Float32Regs>;
-defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64", Float64Regs>;
+def INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8", Int16Regs>;
+def INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16", Int16Regs>;
+def INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32", Int32Regs>;
+def INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64", Int64Regs>;
+def INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32", Float32Regs>;
+def INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64", Float64Regs>;
// vector
// Elementized vector ldu
-multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
- def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
- (ins MEMri:$src),
- "ldu.global.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];", []>;
- def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
- (ins MEMri64:$src),
+class VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass>
+ : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins ADDR:$src),
"ldu.global.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];", []>;
- def _asi: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
- (ins imemAny:$src, Offseti32imm:$offset),
- "ldu.global.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src$offset];", []>;
-}
-multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
- def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
- regclass:$dst4), (ins MEMri:$src),
- "ldu.global.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>;
- def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
- regclass:$dst4), (ins MEMri64:$src),
+
+class VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass>
+ : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
+ regclass:$dst4), (ins ADDR:$src),
"ldu.global.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>;
- def _asi: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
- regclass:$dst4), (ins imemAny:$src, Offseti32imm:$offset),
- "ldu.global.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src$offset];", []>;
-}
-defm INT_PTX_LDU_G_v2i8_ELE : VLDU_G_ELE_V2<"u8", Int16Regs>;
-defm INT_PTX_LDU_G_v2i16_ELE : VLDU_G_ELE_V2<"u16", Int16Regs>;
-defm INT_PTX_LDU_G_v2i32_ELE : VLDU_G_ELE_V2<"u32", Int32Regs>;
-defm INT_PTX_LDU_G_v2f32_ELE : VLDU_G_ELE_V2<"f32", Float32Regs>;
-defm INT_PTX_LDU_G_v2i64_ELE : VLDU_G_ELE_V2<"u64", Int64Regs>;
-defm INT_PTX_LDU_G_v2f64_ELE : VLDU_G_ELE_V2<"f64", Float64Regs>;
-defm INT_PTX_LDU_G_v4i8_ELE : VLDU_G_ELE_V4<"u8", Int16Regs>;
-defm INT_PTX_LDU_G_v4i16_ELE : VLDU_G_ELE_V4<"u16", Int16Regs>;
-defm INT_PTX_LDU_G_v4i32_ELE : VLDU_G_ELE_V4<"u32", Int32Regs>;
-defm INT_PTX_LDU_G_v4f16_ELE : VLDU_G_ELE_V4<"b16", Int16Regs>;
-defm INT_PTX_LDU_G_v4f16x2_ELE : VLDU_G_ELE_V4<"b32", Int32Regs>;
-defm INT_PTX_LDU_G_v4f32_ELE : VLDU_G_ELE_V4<"f32", Float32Regs>;
+def INT_PTX_LDU_G_v2i8_ELE : VLDU_G_ELE_V2<"u8", Int16Regs>;
+def INT_PTX_LDU_G_v2i16_ELE : VLDU_G_ELE_V2<"u16", Int16Regs>;
+def INT_PTX_LDU_G_v2i32_ELE : VLDU_G_ELE_V2<"u32", Int32Regs>;
+def INT_PTX_LDU_G_v2f32_ELE : VLDU_G_ELE_V2<"f32", Float32Regs>;
+def INT_PTX_LDU_G_v2i64_ELE : VLDU_G_ELE_V2<"u64", Int64Regs>;
+def INT_PTX_LDU_G_v2f64_ELE : VLDU_G_ELE_V2<"f64", Float64Regs>;
+
+def INT_PTX_LDU_G_v4i8_ELE : VLDU_G_ELE_V4<"u8", Int16Regs>;
+def INT_PTX_LDU_G_v4i16_ELE : VLDU_G_ELE_V4<"u16", Int16Regs>;
+def INT_PTX_LDU_G_v4i32_ELE : VLDU_G_ELE_V4<"u32", Int32Regs>;
+def INT_PTX_LDU_G_v4f16_ELE : VLDU_G_ELE_V4<"b16", Int16Regs>;
+def INT_PTX_LDU_G_v4f16x2_ELE : VLDU_G_ELE_V4<"b32", Int32Regs>;
+def INT_PTX_LDU_G_v4f32_ELE : VLDU_G_ELE_V4<"f32", Float32Regs>;
//-----------------------------------
@@ -2786,64 +2767,44 @@ defm INT_PTX_LDU_G_v4f32_ELE : VLDU_G_ELE_V4<"f32", Float32Regs>;
// non-coherent texture cache, and therefore the values read must be read-only
// during the lifetime of the kernel.
-multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
- def asi: NVPTXInst<(outs regclass:$result), (ins imemAny:$src, Offseti32imm:$offset),
- "ld.global.nc." # TyStr # " \t$result, [$src$offset];",
- []>, Requires<[hasLDG]>;
- def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
- "ld.global.nc." # TyStr # " \t$result, [$src];",
- []>, Requires<[hasLDG]>;
- def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
+class LDG_G<string TyStr, NVPTXRegClass regclass>
+ : NVPTXInst<(outs regclass:$result), (ins ADDR:$src),
"ld.global.nc." # TyStr # " \t$result, [$src];",
[]>, Requires<[hasLDG]>;
-}
-defm INT_PTX_LDG_GLOBAL_i8 : LDG_G<"u8", Int16Regs>;
-defm INT_PTX_LDG_GLOBAL_i16 : LDG_G<"u16", Int16Regs>;
-defm INT_PTX_LDG_GLOBAL_i32 : LDG_G<"u32", Int32Regs>;
-defm INT_PTX_LDG_GLOBAL_i64 : LDG_G<"u64", Int64Regs>;
-defm INT_PTX_LDG_GLOBAL_f32 : LDG_G<"f32", Float32Regs>;
-defm INT_PTX_LDG_GLOBAL_f64 : LDG_G<"f64", Float64Regs>;
+def INT_PTX_LDG_GLOBAL_i8 : LDG_G<"u8", Int16Regs>;
+def INT_PTX_LDG_GLOBAL_i16 : LDG_G<"u16", Int16Regs>;
+def INT_PTX_LDG_GLOBAL_i32 : LDG_G<"u32", Int32Regs>;
+def INT_PTX_LDG_GLOBAL_i64 : LDG_G<"u64", Int64Regs>;
+def INT_PTX_LDG_GLOBAL_f32 : LDG_G<"f32", Float32Regs>;
+def INT_PTX_LDG_GLOBAL_f64 : LDG_G<"f64", Float64Regs>;
// vector
// Elementized vector ldg
-multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
- def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
- (ins MEMri:$src),
- "ld.global.nc.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];", []>;
- def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
- (ins MEMri64:$src),
- "ld.global.nc.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];", []>;
- def _asi: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
- (ins imemAny:$src, Offseti32imm:$offset),
- "ld.global.nc.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src$offset];", []>;
-}
-
-multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
- def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
- regclass:$dst4), (ins MEMri:$src),
- "ld.global.nc.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>;
- def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
- regclass:$dst4), (ins MEMri64:$src),
- "ld.global.nc.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>;
- def _asi: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
- regclass:$dst4), (ins imemAny:$src, Offseti32imm:$offset),
- "ld.global.nc.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src$offset];", []>;
-}
+class VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> :
+ NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins ADDR:$src),
+ "ld.global.nc.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];", []>;
+
+
+class VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> :
+ NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
+ (ins ADDR:$src),
+ "ld.global.nc.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>;
// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
-defm INT_PTX_LDG_G_v2i8_ELE : VLDG_G_ELE_V2<"u8", Int16Regs>;
-defm INT_PTX_LDG_G_v2i16_ELE : VLDG_G_ELE_V2<"u16", Int16Regs>;
-defm INT_PTX_LDG_G_v2i32_ELE : VLDG_G_ELE_V2<"u32", Int32Regs>;
-defm INT_PTX_LDG_G_v2f32_ELE : VLDG_G_ELE_V2<"f32", Float32Regs>;
-defm INT_PTX_LDG_G_v2i64_ELE : VLDG_G_ELE_V2<"u64", Int64Regs>;
-defm INT_PTX_LDG_G_v2f64_ELE : VLDG_G_ELE_V2<"f64", Float64Regs>;
+def INT_PTX_LDG_G_v2i8_ELE : VLDG_G_ELE_V2<"u8", Int16Regs>;
+def INT_PTX_LDG_G_v2i16_ELE : VLDG_G_ELE_V2<"u16", Int16Regs>;
+def INT_PTX_LDG_G_v2i32_ELE : VLDG_G_ELE_V2<"u32", Int32Regs>;
+def INT_PTX_LDG_G_v2f32_ELE : VLDG_G_ELE_V2<"f32", Float32Regs>;
+def INT_PTX_LDG_G_v2i64_ELE : VLDG_G_ELE_V2<"u64", Int64Regs>;
+def INT_PTX_LDG_G_v2f64_ELE : VLDG_G_ELE_V2<"f64", Float64Regs>;
-defm INT_PTX_LDG_G_v4i8_ELE : VLDG_G_ELE_V4<"u8", Int16Regs>;
-defm INT_PTX_LDG_G_v4i16_ELE : VLDG_G_ELE_V4<"u16", Int16Regs>;
-defm INT_PTX_LDG_G_v4i32_ELE : VLDG_G_ELE_V4<"u32", Int32Regs>;
-defm INT_PTX_LDG_G_v4f32_ELE : VLDG_G_ELE_V4<"f32", Float32Regs>;
+def INT_PTX_LDG_G_v4i8_ELE : VLDG_G_ELE_V4<"u8", Int16Regs>;
+def INT_PTX_LDG_G_v4i16_ELE : VLDG_G_ELE_V4<"u16", Int16Regs>;
+def INT_PTX_LDG_G_v4i32_ELE : VLDG_G_ELE_V4<"u32", Int32Regs>;
+def INT_PTX_LDG_G_v4f32_ELE : VLDG_G_ELE_V4<"f32", Float32Regs>;
multiclass NG_TO_G<string Str> {
@@ -2918,17 +2879,17 @@ def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
// @TODO: Are these actually needed, or will we always just see symbols
// copied to registers first?
-/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
+/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins ADDR_base:$s),
"mov.u32 \t$r, $s;",
[(set Int32Regs:$r,
(int_nvvm_move_ptr texternalsym:$s))]>;
-def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
+def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins ADDR_base:$s),
"mov.u64 \t$r, $s;",
[(set Int64Regs:$r,
(int_nvvm_move_ptr texternalsym:$s))]>;*/
def texsurf_handles
- : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
+ : NVPTXInst<(outs Int64Regs:$result), (ins ADDR_base:$src),
"mov.u64 \t$result, $src;", []>;
//-----------------------------------
@@ -7212,20 +7173,16 @@ class WMMA_REGINFO<WMMA_REGS r, string op>
class BuildPatternI<Intrinsic Intr, dag Ins> {
// Build a dag pattern that matches the intrinsic call.
dag ret = !foreach(tmp, Ins,
- !subst(imem, ADDRvar,
- !subst(MEMri64, ADDRri64,
- !subst(MEMri, ADDRri,
- !subst(ins, Intr, tmp)))));
+ !subst(ADDR, addr,
+ !subst(ins, Intr, tmp)));
}
// Same as above, but uses PatFrag instead of an Intrinsic.
class BuildPatternPF<PatFrag Intr, dag Ins> {
// Build a dag pattern that matches the intrinsic call.
dag ret = !foreach(tmp, Ins,
- !subst(imem, ADDRvar,
- !subst(MEMri64, ADDRri64,
- !subst(MEMri, ADDRri,
- !subst(ins, Intr, tmp)))));
+ !subst(ADDR, addr,
+ !subst(ins, Intr, tmp)));
}
// Common WMMA-related fields used for building patterns for all MMA instructions.
@@ -7242,10 +7199,9 @@ class WMMA_INSTR<string _Intr, list<dag> _Args>
// wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
//
-class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
- DAGOperand SrcOp>
+class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride>
: WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record,
- [!con((ins SrcOp:$src),
+ [!con((ins ADDR:$src),
!if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
Requires<Frag.Predicates> {
// Load/store intrinsics are overloaded on pointer's address space.
@@ -7282,9 +7238,9 @@ class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
// wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
//
class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
- bit WithStride, DAGOperand DstOp>
+ bit WithStride>
: WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record,
- [!con((ins DstOp:$dst),
+ [!con((ins ADDR:$dst),
Frag.Ins,
!if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
Requires<Frag.Predicates> {
@@ -7323,14 +7279,12 @@ defset list<WMMA_INSTR> MMA_LDSTs = {
foreach layout = ["row", "col"] in {
foreach stride = [false, true] in {
foreach space = [".global", ".shared", ""] in {
- foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
- foreach frag = NVVM_MMA_OPS.all_ld_ops in
- if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
- def : WMMA_LOAD<WMMA_REGINFO<frag, "load">, layout, space, stride, addr>;
- foreach frag = NVVM_MMA_OPS.all_st_ops in
- if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
- def : WMMA_STORE_D<WMMA_REGINFO<frag, "store">, layout, space, stride, addr>;
- } // addr
+ foreach frag = NVVM_MMA_OPS.all_ld_ops in
+ if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
+ def : WMMA_LOAD<WMMA_REGINFO<frag, "load">, layout, space, stride>;
+ foreach frag = NVVM_MMA_OPS.all_st_ops in
+ if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
+ def : WMMA_STORE_D<WMMA_REGINFO<frag, "store">, layout, space, stride>;
} // space
} // stride
} // layout
@@ -7457,9 +7411,8 @@ defset list<WMMA_INSTR> MMAs = {
//
// ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16
//
-class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space,
- DAGOperand SrcOp>
- : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins SrcOp:$src)]>,
+class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space>
+ : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins ADDR:$src)]>,
Requires<Frag.Predicates> {
// Build PatFrag that only matches particular address space.
PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src),
@@ -7483,12 +7436,9 @@ class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space,
defset list<WMMA_INSTR> LDMATRIXs = {
foreach transposed = [false, true] in {
foreach space = [".shared", ""] in {
- foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
- foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in
- if NVVM_LDMATRIX_SUPPORTED<frag>.ret then
- def : LDMATRIX<WMMA_REGINFO<frag, "ldmatrix">, transposed, space,
- addr>;
- } // addr
+ foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in
+ if NVVM_LDMATRIX_SUPPORTED<frag>.ret then
+ def : LDMATRIX<WMMA_REGINFO<frag, "ldmatrix">, transposed, space>;
} // space
} // transposed
} // defset
diff --git a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
index 4971d31691c54..46e4a905aa09a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -1800,7 +1800,7 @@ bool NVPTXReplaceImageHandles::replaceImageHandle(MachineOperand &Op,
MachineInstr &TexHandleDef = *MRI.getVRegDef(Op.getReg());
switch (TexHandleDef.getOpcode()) {
- case NVPTX::LD_i64_asi: {
+ case NVPTX::LD_i64: {
// The handle is a parameter value being loaded, replace with the
// parameter symbol
const auto &TM = static_cast<const NVPTXTargetMachine &>(MF.getTarget());
diff --git a/llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir b/llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir
index 62ede3b9eef3b..400bff47c8f2e 100644
--- a/llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir
+++ b/llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir
@@ -16,7 +16,7 @@ registers:
- { id: 1, class: float32regs }
body: |
bb.0.entry:
- %0 = LD_f32_asi 0, 4, 1, 2, 32, &test_param_0, 0
+ %0 = LD_f32 0, 4, 1, 2, 32, &test_param_0, 0
; CHECK: [[@LINE+1]]:33: expected a floating point literal
%1 = FADD_rnf32ri %0, float 3
StoreRetvalF32 %1, 0
diff --git a/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir b/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir
index 69c1e25a06024..486c6ca16a531 100644
--- a/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir
+++ b/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir
@@ -40,9 +40,9 @@ registers:
- { id: 7, class: float32regs }
body: |
bb.0.entry:
- %0 = LD_f32_asi 0, 0, 4, 1, 2, 32, &test_param_0, 0
+ %0 = LD_f32 0, 0, 4, 1, 2, 32, &test_param_0, 0
%1 = CVT_f64_f32 %0, 0
- %2 = LD_i32_asi 0, 0, 4, 1, 0, 32, &test_param_1, 0
+ %2 = LD_i32 0, 0, 4, 1, 0, 32, &test_param_1, 0
; CHECK: %3:float64regs = FADD_rnf64ri %1, double 3.250000e+00
%3 = FADD_rnf64ri %1, double 3.250000e+00
%4 = CVT_f32_f64 %3, 5
@@ -66,9 +66,9 @@ registers:
- { id: 7, class: float32regs }
body: |
bb.0.entry:
- %0 = LD_f32_asi 0, 0, 4, 1, 2, 32, &test2_param_0, 0
+ %0 = LD_f32 0, 0, 4, 1, 2, 32, &test2_param_0, 0
%1 = CVT_f64_f32 %0, 0
- %2 = LD_i32_asi 0, 0, 4, 1, 0, 32, &test2_param_1, 0
+ %2 = LD_i32 0, 0, 4, 1, 0, 32, &test2_param_1, 0
; CHECK: %3:float64regs = FADD_rnf64ri %1, double 0x7FF8000000000000
%3 = FADD_rnf64ri %1, double 0x7FF8000000000000
%4 = CVT_f32_f64 %3, 5
diff --git a/llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir b/llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir
index cc9a36509db33..114b0f9702033 100644
--- a/llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir
+++ b/llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir
@@ -16,7 +16,7 @@ registers:
- { id: 1, class: float32regs }
body: |
bb.0.entry:
- %0 = LD_f32_asi 0, 4, 1, 2, 32, &test_param_0, 0
+ %0 = LD_f32 0, 4, 1, 2, 32, &test_param_0, 0
; CHECK: [[@LINE+1]]:33: floating point constant does not have type 'float'
%1 = FADD_rnf32ri %0, float 0xH3C00
StoreRetvalF32 %1, 0
More information about the llvm-commits
mailing list