[llvm] [NVPTX] Use PRMT instruction to lower i16 bswap (PR #168968)

Alex MacLean via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 21 15:03:25 PST 2025


================
@@ -10,16 +10,12 @@ target triple = "nvptx64-nvidia-cuda"
 define i16 @bswap16(i16 %a) {
 ; CHECK-LABEL: bswap16(
 ; CHECK:       {
-; CHECK-NEXT:    .reg .b16 %rs<5>;
-; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-NEXT:    .reg .b32 %r<3>;
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
-; CHECK-NEXT:    ld.param.b16 %rs1, [bswap16_param_0];
-; CHECK-NEXT:    shr.u16 %rs2, %rs1, 8;
-; CHECK-NEXT:    shl.b16 %rs3, %rs1, 8;
-; CHECK-NEXT:    or.b16 %rs4, %rs3, %rs2;
-; CHECK-NEXT:    cvt.u32.u16 %r1, %rs4;
-; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ld.param.b16 %r1, [bswap16_param_0];
+; CHECK-NEXT:    prmt.b32 %r2, %r1, 0, 0x7701U;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
----------------
AlexMaclean wrote:

Nice

https://github.com/llvm/llvm-project/pull/168968


More information about the llvm-commits mailing list