[llvm] [NVPTX] Use PRMT more widely, and improve folding around this instruction (PR #148261)

Kevin McAfee via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 11 11:15:48 PDT 2025


================
@@ -1474,19 +1460,26 @@ def : Pat<(fshr i32:$hi, i32:$lo, (shl i32:$amt, (i32 3))),
           (PRMT_B32rrr $lo, $hi, $amt, PrmtF4E)>;
 
 
+def byte_extract_prmt : ImmLeaf<i32, [{
+  return (Imm == 0x7770) || (Imm == 0x7771) || (Imm == 0x7772) || (Imm == 0x7773);
+}]>;
+
+def to_sign_extend_selector : SDNodeXForm<imm, [{
+  const APInt &V = N->getAPIntValue();
+  const APInt B = V.trunc(4);
+  const APInt BSext = B | 8;
+  const APInt R = BSext.concat(BSext).concat(BSext).concat(B).zext(32);
+  return CurDAG->getTargetConstant(R, SDLoc(N), MVT::i32);
+}]>;
+
+
 // byte extraction + signed/unsigned extension to i32.
-def : Pat<(i32 (sext_inreg (bfe i32:$s, i32:$o, 8), i8)),
-          (BFE_S32rri $s, $o, 8)>;
-def : Pat<(i32 (sext_inreg (bfe i32:$s, imm:$o, 8), i8)),
-          (BFE_S32rii $s, imm:$o, 8)>;
-def : Pat<(i32 (and (bfe i32:$s, i32:$o, 8), 255)),
-          (BFE_U32rri $s, $o, 8)>;
-def : Pat<(i32 (and (bfe i32:$s, imm:$o, 8), 255)),
-          (BFE_U32rii $s, imm:$o, 8)>;
+def : Pat<(i32 (sext_inreg (prmt i32:$s, 0, byte_extract_prmt:$sel, PrmtNONE), i8)),
+          (PRMT_B32rii $s, 0, (to_sign_extend_selector $sel), PrmtNONE)>;
 
 // byte extraction + signed extension to i16
-def : Pat<(i16 (sext_inreg (trunc (bfe i32:$s, imm:$o, 8)), i8)),
-          (CVT_s8_s32 (BFE_S32rii $s, imm:$o, 8), CvtNONE)>;
+def : Pat<(i16 (sext_inreg (trunc (prmt i32:$s, 0, byte_extract_prmt:$sel, PrmtNONE)), i8)),
+          (CVT_u16_u32 (PRMT_B32rii $s, 0, (to_sign_extend_selector $sel), PrmtNONE), CvtNONE)>;
----------------
kalxr wrote:

What's the reason for using an unsigned CVT instead of signed?

https://github.com/llvm/llvm-project/pull/148261


More information about the llvm-commits mailing list