[clang] [llvm] [llvm][NVPTX] Strip unneeded '+0' in PTX load/store (PR #113017)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Oct 18 20:41:10 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-nvptx
Author: Youngsuk Kim (JOE1994)
<details>
<summary>Changes</summary>
Remove the extraneous '+0' immediate offset part in PTX load/stores, to improve readability of output PTX code.
---
Patch is 474.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113017.diff
66 Files Affected:
- (modified) clang/test/CodeGenCUDA/bf16.cu (+4-4)
- (modified) llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp (+8)
- (modified) llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h (+2)
- (modified) llvm/lib/Target/NVPTX/NVPTXInstrInfo.td (+59-55)
- (modified) llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll (+6-6)
- (modified) llvm/test/CodeGen/NVPTX/activemask.ll (+2-2)
- (modified) llvm/test/CodeGen/NVPTX/addr-mode.ll (+5-5)
- (modified) llvm/test/CodeGen/NVPTX/aggregate-return.ll (+4-4)
- (modified) llvm/test/CodeGen/NVPTX/bf16-instructions.ll (+95-95)
- (modified) llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll (+2-2)
- (modified) llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll (+30-30)
- (modified) llvm/test/CodeGen/NVPTX/bswap.ll (+4-4)
- (modified) llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll (+2-2)
- (modified) llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll (+2-2)
- (modified) llvm/test/CodeGen/NVPTX/chain-different-as.ll (+1-1)
- (modified) llvm/test/CodeGen/NVPTX/cmpxchg.ll (+8-8)
- (modified) llvm/test/CodeGen/NVPTX/combine-mad.ll (+8-8)
- (modified) llvm/test/CodeGen/NVPTX/compute-ptx-value-vts.ll (+4-4)
- (modified) llvm/test/CodeGen/NVPTX/convert-int-sm20.ll (+6-6)
- (modified) llvm/test/CodeGen/NVPTX/copysign.ll (+6-6)
- (modified) llvm/test/CodeGen/NVPTX/dot-product.ll (+13-13)
- (modified) llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll (+2-2)
- (modified) llvm/test/CodeGen/NVPTX/elect.ll (+3-3)
- (modified) llvm/test/CodeGen/NVPTX/extractelement.ll (+6-6)
- (modified) llvm/test/CodeGen/NVPTX/f16-instructions.ll (+79-79)
- (modified) llvm/test/CodeGen/NVPTX/f16x2-instructions.ll (+79-79)
- (modified) llvm/test/CodeGen/NVPTX/i128-param.ll (+4-4)
- (modified) llvm/test/CodeGen/NVPTX/i128-retval.ll (+2-2)
- (modified) llvm/test/CodeGen/NVPTX/i128-struct.ll (+1-1)
- (modified) llvm/test/CodeGen/NVPTX/i128.ll (+9-9)
- (modified) llvm/test/CodeGen/NVPTX/i16x2-instructions.ll (+50-50)
- (modified) llvm/test/CodeGen/NVPTX/i8x4-instructions.ll (+59-59)
- (modified) llvm/test/CodeGen/NVPTX/indirect_byval.ll (+8-8)
- (modified) llvm/test/CodeGen/NVPTX/jump-table.ll (+7-7)
- (modified) llvm/test/CodeGen/NVPTX/ldparam-v4.ll (+1-1)
- (modified) llvm/test/CodeGen/NVPTX/local-stack-frame.ll (+2-2)
- (modified) llvm/test/CodeGen/NVPTX/lower-alloca.ll (+1-1)
- (modified) llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll (+14-14)
- (modified) llvm/test/CodeGen/NVPTX/lower-args.ll (+3-3)
- (modified) llvm/test/CodeGen/NVPTX/math-intrins.ll (+87-87)
- (modified) llvm/test/CodeGen/NVPTX/mulhi-intrins.ll (+6-6)
- (modified) llvm/test/CodeGen/NVPTX/nvvm-reflect-arch-O0.ll (+15-15)
- (modified) llvm/test/CodeGen/NVPTX/param-load-store.ll (+172-172)
- (modified) llvm/test/CodeGen/NVPTX/param-overalign.ll (+8-8)
- (modified) llvm/test/CodeGen/NVPTX/param-vectorize-device.ll (+38-38)
- (modified) llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll (+31-31)
- (modified) llvm/test/CodeGen/NVPTX/rcp-opt.ll (+3-3)
- (modified) llvm/test/CodeGen/NVPTX/rotate.ll (+24-24)
- (modified) llvm/test/CodeGen/NVPTX/rotate_64.ll (+2-2)
- (modified) llvm/test/CodeGen/NVPTX/sad-intrins.ll (+6-6)
- (modified) llvm/test/CodeGen/NVPTX/sext-setcc.ll (+2-2)
- (modified) llvm/test/CodeGen/NVPTX/st-param-imm.ll (+83-83)
- (modified) llvm/test/CodeGen/NVPTX/store-undef.ll (+1-1)
- (modified) llvm/test/CodeGen/NVPTX/tex-read-cuda.ll (+2-2)
- (modified) llvm/test/CodeGen/NVPTX/tid-range.ll (+1-1)
- (modified) llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll (+21-21)
- (modified) llvm/test/CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll (+29-29)
- (modified) llvm/test/CodeGen/NVPTX/vaargs.ll (+11-11)
- (modified) llvm/test/CodeGen/NVPTX/variadics-backend.ll (+24-24)
- (modified) llvm/test/CodeGen/NVPTX/vec-param-load.ll (+7-7)
- (modified) llvm/test/CodeGen/NVPTX/vector-args.ll (+1-1)
- (modified) llvm/test/CodeGen/NVPTX/vector-call.ll (+2-2)
- (modified) llvm/test/CodeGen/NVPTX/vector-returns.ll (+38-38)
- (modified) llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll (+1-1)
- (modified) llvm/test/Transforms/NaryReassociate/NVPTX/nary-slsr.ll (+3-3)
- (modified) llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected (+4-4)
``````````diff
diff --git a/clang/test/CodeGenCUDA/bf16.cu b/clang/test/CodeGenCUDA/bf16.cu
index 3c443420dbd36a..f794b83239f14a 100644
--- a/clang/test/CodeGenCUDA/bf16.cu
+++ b/clang/test/CodeGenCUDA/bf16.cu
@@ -25,7 +25,7 @@ __device__ void test_arg(__bf16 *out, __bf16 in) {
__device__ __bf16 test_ret( __bf16 in) {
// CHECK: ld.param.b16 %[[R:rs[0-9]+]], [_Z8test_retDF16b_param_0];
return in;
-// CHECK: st.param.b16 [func_retval0+0], %[[R]]
+// CHECK: st.param.b16 [func_retval0], %[[R]]
// CHECK: ret;
}
@@ -35,15 +35,15 @@ __device__ __bf16 external_func( __bf16 in);
// CHECK: .param .align 2 .b8 _Z9test_callDF16b_param_0[2]
__device__ __bf16 test_call( __bf16 in) {
// CHECK: ld.param.b16 %[[R:rs[0-9]+]], [_Z9test_callDF16b_param_0];
-// CHECK: st.param.b16 [param0+0], %[[R]];
+// CHECK: st.param.b16 [param0], %[[R]];
// CHECK: .param .align 2 .b8 retval0[2];
// CHECK: call.uni (retval0),
// CHECK-NEXT: _Z13external_funcDF16b,
// CHECK-NEXT: (
// CHECK-NEXT: param0
// CHECK-NEXT );
-// CHECK: ld.param.b16 %[[RET:rs[0-9]+]], [retval0+0];
+// CHECK: ld.param.b16 %[[RET:rs[0-9]+]], [retval0];
return external_func(in);
-// CHECK: st.param.b16 [func_retval0+0], %[[RET]]
+// CHECK: st.param.b16 [func_retval0], %[[RET]]
// CHECK: ret;
}
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
index 7d6442a611125f..3bda3b72674276 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
@@ -363,6 +363,14 @@ void NVPTXInstPrinter::printMemOperand(const MCInst *MI, int OpNum,
}
}
+void NVPTXInstPrinter::printOffseti32imm(const MCInst *MI, int OpNum,
+ raw_ostream &O, const char *Modifier) {
+ if (auto &Op = MI->getOperand(OpNum); Op.isImm() && Op.getImm() == 0)
+ return; // don't print '+0'
+ O << "+";
+ printOperand(MI, OpNum, O);
+}
+
void NVPTXInstPrinter::printProtoIdent(const MCInst *MI, int OpNum,
raw_ostream &O, const char *Modifier) {
const MCOperand &Op = MI->getOperand(OpNum);
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
index e6954f861cd10e..e8a4a6dbdd5324 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
@@ -45,6 +45,8 @@ class NVPTXInstPrinter : public MCInstPrinter {
const char *Modifier = nullptr);
void printMemOperand(const MCInst *MI, int OpNum,
raw_ostream &O, const char *Modifier = nullptr);
+ void printOffseti32imm(const MCInst *MI, int OpNum, raw_ostream &O,
+ const char *Modifier = nullptr);
void printProtoIdent(const MCInst *MI, int OpNum,
raw_ostream &O, const char *Modifier = nullptr);
void printPrmtMode(const MCInst *MI, int OpNum, raw_ostream &O,
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 8b34ce4f1001c1..b5478b8f09ceb4 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1934,6 +1934,10 @@ def MmaCode : Operand<i32> {
let PrintMethod = "printMmaCode";
}
+def Offseti32imm : Operand<i32> {
+ let PrintMethod = "printOffseti32imm";
+}
+
def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
@@ -2482,21 +2486,21 @@ def ProxyReg :
let mayLoad = true in {
class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
- NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
- !strconcat("ld.param", opstr, " \t$dst, [retval0+$b];"),
+ NVPTXInst<(outs regclass:$dst), (ins Offseti32imm:$b),
+ !strconcat("ld.param", opstr, " \t$dst, [retval0$b];"),
[]>;
class LoadParamV2MemInst<NVPTXRegClass regclass, string opstr> :
- NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins i32imm:$b),
+ NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins Offseti32imm:$b),
!strconcat("ld.param.v2", opstr,
- " \t{{$dst, $dst2}}, [retval0+$b];"), []>;
+ " \t{{$dst, $dst2}}, [retval0$b];"), []>;
class LoadParamV4MemInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs regclass:$dst, regclass:$dst2, regclass:$dst3,
regclass:$dst4),
- (ins i32imm:$b),
+ (ins Offseti32imm:$b),
!strconcat("ld.param.v4", opstr,
- " \t{{$dst, $dst2, $dst3, $dst4}}, [retval0+$b];"),
+ " \t{{$dst, $dst2, $dst3, $dst4}}, [retval0$b];"),
[]>;
}
@@ -2512,8 +2516,8 @@ let mayStore = true in {
if !or(support_imm, !isa<NVPTXRegClass>(op)) then
def _ # !if(!isa<NVPTXRegClass>(op), "r", "i")
: NVPTXInst<(outs),
- (ins op:$val, i32imm:$a, i32imm:$b),
- "st.param" # opstr # " \t[param$a+$b], $val;",
+ (ins op:$val, i32imm:$a, Offseti32imm:$b),
+ "st.param" # opstr # " \t[param$a$b], $val;",
[]>;
}
@@ -2524,8 +2528,8 @@ let mayStore = true in {
# !if(!isa<NVPTXRegClass>(op2), "r", "i")
: NVPTXInst<(outs),
(ins op1:$val1, op2:$val2,
- i32imm:$a, i32imm:$b),
- "st.param.v2" # opstr # " \t[param$a+$b], {{$val1, $val2}};",
+ i32imm:$a, Offseti32imm:$b),
+ "st.param.v2" # opstr # " \t[param$a$b], {{$val1, $val2}};",
[]>;
}
@@ -2541,29 +2545,29 @@ let mayStore = true in {
: NVPTXInst<(outs),
(ins op1:$val1, op2:$val2, op3:$val3, op4:$val4,
- i32imm:$a, i32imm:$b),
+ i32imm:$a, Offseti32imm:$b),
"st.param.v4" # opstr #
- " \t[param$a+$b], {{$val1, $val2, $val3, $val4}};",
+ " \t[param$a$b], {{$val1, $val2, $val3, $val4}};",
[]>;
}
class StoreRetvalInst<NVPTXRegClass regclass, string opstr> :
- NVPTXInst<(outs), (ins regclass:$val, i32imm:$a),
- !strconcat("st.param", opstr, " \t[func_retval0+$a], $val;"),
+ NVPTXInst<(outs), (ins regclass:$val, Offseti32imm:$a),
+ !strconcat("st.param", opstr, " \t[func_retval0$a], $val;"),
[]>;
class StoreRetvalV2Inst<NVPTXRegClass regclass, string opstr> :
- NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a),
+ NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, Offseti32imm:$a),
!strconcat("st.param.v2", opstr,
- " \t[func_retval0+$a], {{$val, $val2}};"),
+ " \t[func_retval0$a], {{$val, $val2}};"),
[]>;
class StoreRetvalV4Inst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs),
(ins regclass:$val, regclass:$val2, regclass:$val3,
- regclass:$val4, i32imm:$a),
+ regclass:$val4, Offseti32imm:$a),
!strconcat("st.param.v4", opstr,
- " \t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"),
+ " \t[func_retval0$a], {{$val, $val2, $val3, $val4}};"),
[]>;
}
@@ -2827,21 +2831,21 @@ multiclass LD<NVPTXRegClass regclass> {
def _ari : NVPTXInst<
(outs regclass:$dst),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
- i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
+ i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t$dst, [$addr+$offset];", []>;
+ "\t$dst, [$addr$offset];", []>;
def _ari_64 : NVPTXInst<
(outs regclass:$dst),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t$dst, [$addr+$offset];", []>;
+ "\t$dst, [$addr$offset];", []>;
def _asi : NVPTXInst<
(outs regclass:$dst),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t$dst, [$addr+$offset];", []>;
+ "\t$dst, [$addr$offset];", []>;
}
let mayLoad=1, hasSideEffects=0 in {
@@ -2876,23 +2880,23 @@ multiclass ST<NVPTXRegClass regclass> {
(outs),
(ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr,
- i32imm:$offset),
+ Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
- " \t[$addr+$offset], $src;", []>;
+ " \t[$addr$offset], $src;", []>;
def _ari_64 : NVPTXInst<
(outs),
(ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr,
- i32imm:$offset),
+ Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
- " \t[$addr+$offset], $src;", []>;
+ " \t[$addr$offset], $src;", []>;
def _asi : NVPTXInst<
(outs),
(ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, imem:$addr,
- i32imm:$offset),
+ Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
- " \t[$addr+$offset], $src;", []>;
+ " \t[$addr$offset], $src;", []>;
}
let mayStore=1, hasSideEffects=0 in {
@@ -2929,21 +2933,21 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
def _v2_ari : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2}}, [$addr+$offset];", []>;
+ "\t{{$dst1, $dst2}}, [$addr$offset];", []>;
def _v2_ari_64 : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2}}, [$addr+$offset];", []>;
+ "\t{{$dst1, $dst2}}, [$addr$offset];", []>;
def _v2_asi : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2}}, [$addr+$offset];", []>;
+ "\t{{$dst1, $dst2}}, [$addr$offset];", []>;
def _v4_avar : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
@@ -2965,21 +2969,21 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
def _v4_ari : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>;
+ "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>;
def _v4_ari_64 : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>;
+ "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>;
def _v4_asi : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>;
+ "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>;
}
let mayLoad=1, hasSideEffects=0 in {
defm LDV_i8 : LD_VEC<Int16Regs>;
@@ -3016,23 +3020,23 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
(outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth,
- Int32Regs:$addr, i32imm:$offset),
+ Int32Regs:$addr, Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t[$addr+$offset], {{$src1, $src2}};", []>;
+ "\t[$addr$offset], {{$src1, $src2}};", []>;
def _v2_ari_64 : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth,
- Int64Regs:$addr, i32imm:$offset),
+ Int64Regs:$addr, Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t[$addr+$offset], {{$src1, $src2}};", []>;
+ "\t[$addr$offset], {{$src1, $src2}};", []>;
def _v2_asi : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth,
- imem:$addr, i32imm:$offset),
+ imem:$addr, Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t[$addr+$offset], {{$src1, $src2}};", []>;
+ "\t[$addr$offset], {{$src1, $src2}};", []>;
def _v4_avar : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
@@ -3058,23 +3062,23 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>;
+ "\t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>;
def _v4_ari_64 : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>;
+ "\t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>;
def _v4_asi : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}"
- "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>;
+ "$fromWidth \t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>;
}
let mayStore=1, hasSideEffects=0 in {
@@ -3903,4 +3907,4 @@ def atomic_thread_fence_seq_cst_cta :
Requires<[hasPTX<60>, hasSM<70>]>;
def atomic_thread_fence_acq_rel_cta :
NVPTXInst<(outs), (ins), "fence.acq_rel.cta;", []>,
- Requires<[hasPTX<60>, hasSM<70>]>;
\ No newline at end of file
+ Requires<[hasPTX<60>, hasSM<70>]>;
diff --git a/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll b/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll
index bc58a700cb9828..028fab7ae54d6a 100644
--- a/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll
+++ b/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll
@@ -19,7 +19,7 @@ define i32 @f(ptr %p) {
; ENABLED-NEXT: ld.param.u64 %rd1, [f_param_0];
; ENABLED-NEXT: ld.v2.u32 {%r1, %r2}, [%rd1];
; ENABLED-NEXT: add.s32 %r3, %r1, %r2;
-; ENABLED-NEXT: st.param.b32 [func_retval0+0], %r3;
+; ENABLED-NEXT: st.param.b32 [func_retval0], %r3;
; ENABLED-NEXT: ret;
;
; DISABLED-LABEL: f(
@@ -32,7 +32,7 @@ define i32 @f(ptr %p) {
; DISABLED-NEXT: ld.u32 %r1, [%rd1];
; DISABLED-NEXT: ld.u32 %r2, [%rd1+4];
; DISABLED-NEXT: add.s32 %r3, %r1, %r2;
-; DISABLED-NEXT: st.param.b32 [func_retval0+0], %r3;
+; DISABLED-NEXT: st.param.b32 [func_retval0], %r3;
; DISABLED-NEXT: ret;
%p.1 = getelementptr i32, ptr %p, i32 1
%v0 = load i32, ptr %p, align 8
@@ -68,7 +68,7 @@ define half @fh(ptr %p) {
; ENABLED-NEXT: cvt.f32.f16 %f11, %rs5;
; ENABLED-NEXT: add.rn.f32 %f12, %f10, %f11;
; ENABLED-NEXT: cvt.rn.f16.f32 %rs9, %f12;
-; ENABLED-NEXT: st.param.b16 [func_retval0+0], %rs9;
+; ENABLED-NEXT: st.param.b16 [func_retval0], %rs9;
; ENABLED-NEXT: ret;
;
; DISABLED-LABEL: fh(
@@ -100,7 +100,7 @@ define half @fh(ptr %p) {
; DISABLED-NEXT: cvt.f32.f16 %f11, %rs5;
; DISABLED-NEXT: add.rn.f32 %f12, %f10, %f11;
; DISABLED-NEXT: cvt.rn.f16.f32 %rs9, %f12;
-; DISABLED-NEXT: st.param.b16 [func_retval0+0], %rs9;
+; DISABLED-NEXT: st.param.b16 [func_retval0], %rs9;
; DISABLED-NEXT: ret;
%p.1 = getelementptr half, ptr %p, i32 1
%p.2 = getelementptr half, ptr %p, i32 2
@@ -132,7 +132,7 @@ define float @ff(ptr %p) {
; ENABLED-NEXT: add.rn.f32 %f7, %f3, %f4;
; ENABLED-NEXT: add.rn.f32 %f8, %f6, %f7;
; ENABLED-NEXT: add.rn.f32 %f9, %f8, %f5;
-; ENABLED-NEXT: st.param.f32 [func_retval0+0], %f9;
+; ENABLED-NEXT: st.param.f32 [func_retval0], %f9;
; ENABLED-NEXT: ret;
;
; DISABLED-LABEL: ff(
@@ -151,7 +151,7 @@ define float @ff(ptr %p) {
; DISABLED-NEXT: add.rn.f32 %f7, %f3, %f4;
; DISABLED-NEXT: add.rn.f32 %f8, %f6, %f7;
; DISABLED-NEXT: add.rn.f32 %f9, %f8, %f5;
-; DISABLED-NEXT: st.param.f32 [func_retval0+0], %f9;
+; DISABLED-NEXT: st.param.f32 [func_retval0], %f9;
; DISABLED-NEXT: ret;
%p.1 = getelementptr float, ptr %p, i32 1
%p.2 = getelementptr float, ptr %p, i32 2
diff --git a/llvm/test/CodeGen/NVPTX/activemask.ll b/llvm/test/CodeGen/NVPTX/activemask.ll
index 1496b2ebdd4427..e1d169d17c60e9 100644
--- a/llvm/test/CodeGen/NVPTX/activemask.ll
+++ b/llvm/test/CodeGen/NVPTX/activemask.ll
@@ -6,7 +6,7 @@ declare i32 @llvm.nvvm.activemask()
; CHECK-LABEL: activemask(
;
; CHECK: ac...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/113017
More information about the cfe-commits
mailing list