[clang] [llvm] [NVPTX] Support i256 load/store with 256-bit vector load (PR #155198)
Alex MacLean via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 25 11:04:25 PDT 2025
================
@@ -1506,3 +1506,69 @@ define void @local_volatile_4xdouble(ptr addrspace(5) %a, ptr addrspace(5) %b) {
store volatile <4 x double> %a.load, ptr addrspace(5) %b
ret void
}
+
+define void @test_i256_global(ptr addrspace(1) %a, ptr addrspace(1) %b) {
+; SM90-LABEL: test_i256_global(
+; SM90: {
+; SM90-NEXT: .reg .b64 %rd<7>;
+; SM90-EMPTY:
+; SM90-NEXT: // %bb.0:
+; SM90-NEXT: ld.param.b64 %rd1, [test_i256_global_param_0];
+; SM90-NEXT: ld.global.v2.b64 {%rd2, %rd3}, [%rd1];
+; SM90-NEXT: ld.global.v2.b64 {%rd4, %rd5}, [%rd1+16];
+; SM90-NEXT: ld.param.b64 %rd6, [test_i256_global_param_1];
+; SM90-NEXT: st.global.v2.b64 [%rd6+16], {%rd4, %rd5};
+; SM90-NEXT: st.global.v2.b64 [%rd6], {%rd2, %rd3};
+; SM90-NEXT: ret;
+;
+; SM100-LABEL: test_i256_global(
+; SM100: {
+; SM100-NEXT: .reg .b64 %rd<7>;
+; SM100-EMPTY:
+; SM100-NEXT: // %bb.0:
+; SM100-NEXT: ld.param.b64 %rd1, [test_i256_global_param_0];
+; SM100-NEXT: ld.global.v4.b64 {%rd2, %rd3, %rd4, %rd5}, [%rd1];
+; SM100-NEXT: ld.param.b64 %rd6, [test_i256_global_param_1];
+; SM100-NEXT: st.global.v4.b64 [%rd6], {%rd2, %rd3, %rd4, %rd5};
+; SM100-NEXT: ret;
+ %a.load = load i256, ptr addrspace(1) %a, align 32
+ store i256 %a.load, ptr addrspace(1) %b, align 32
+ ret void
+}
+
+
+define void @test_i256_global_unaligned(ptr addrspace(1) %a, ptr addrspace(1) %b) {
+; CHECK-LABEL: test_i256_global_unaligned(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_i256_global_unaligned_param_0];
+; CHECK-NEXT: ld.global.v2.b64 {%rd2, %rd3}, [%rd1];
+; CHECK-NEXT: ld.global.v2.b64 {%rd4, %rd5}, [%rd1+16];
+; CHECK-NEXT: ld.param.b64 %rd6, [test_i256_global_unaligned_param_1];
+; CHECK-NEXT: st.global.v2.b64 [%rd6+16], {%rd4, %rd5};
+; CHECK-NEXT: st.global.v2.b64 [%rd6], {%rd2, %rd3};
+; CHECK-NEXT: ret;
+ %a.load = load i256, ptr addrspace(1) %a, align 16
+ store i256 %a.load, ptr addrspace(1) %b, align 16
+ ret void
+}
+
+define void @test_i256_generic(ptr %a, ptr %b) {
+; CHECK-LABEL: test_i256_generic(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_i256_generic_param_0];
+; CHECK-NEXT: ld.v2.b64 {%rd2, %rd3}, [%rd1];
+; CHECK-NEXT: ld.v2.b64 {%rd4, %rd5}, [%rd1+16];
+; CHECK-NEXT: ld.param.b64 %rd6, [test_i256_generic_param_1];
+; CHECK-NEXT: st.v2.b64 [%rd6+16], {%rd4, %rd5};
+; CHECK-NEXT: st.v2.b64 [%rd6], {%rd2, %rd3};
+; CHECK-NEXT: ret;
+ %a.load = load i256, ptr %a, align 32
+ store i256 %a.load, ptr %b, align 32
+ ret void
+}
----------------
AlexMaclean wrote:
I've added tests for both `atomic` and `volatile`. We don't support any `atomic` loads/stores of size greater than 64-bits.
https://github.com/llvm/llvm-project/pull/155198
More information about the llvm-commits
mailing list