[llvm] [DAG] Combine (sext (sext_in_reg x)) to (sext_in_reg (any_extend x)) (PR #132386)
Pierre van Houtryve via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 21 05:16:04 PDT 2025
https://github.com/Pierre-vh created https://github.com/llvm/llvm-project/pull/132386
None
>From fbb014793375b140426fc993b2cfd1b8efb5618b Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Fri, 21 Mar 2025 12:48:34 +0100
Subject: [PATCH] [DAG] Combine (sext (sext_in_reg x)) to (sext_in_reg
(any_extend x))
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 18 +++++--
llvm/test/CodeGen/AMDGPU/permute_i8.ll | 54 ++++++++-----------
llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll | 3 +-
3 files changed, 37 insertions(+), 38 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a54857e1037e2..4c53801f0af3e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13936,14 +13936,22 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
N0.getOperand(0));
- // fold (sext (sext_inreg x)) -> (sext (trunc x))
if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
SDValue N00 = N0.getOperand(0);
EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
- if ((N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) &&
- (!LegalTypes || TLI.isTypeLegal(ExtVT))) {
- SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00);
- return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
+ if (N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) {
+ // fold (sext (sext_inreg x)) -> (sext (trunc x))
+ if ((!LegalTypes || TLI.isTypeLegal(ExtVT))) {
+ SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00);
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
+ }
+
+ // If the trunc wasn't legal, try to fold to (sext_inreg (anyext x))
+ if ((!LegalTypes || TLI.isTypeLegal(VT))) {
+ SDValue ExtSrc = DAG.getAnyExtOrTrunc(N00, DL, VT);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT,
+ {ExtSrc, N0->getOperand(1)});
+ }
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/permute_i8.ll b/llvm/test/CodeGen/AMDGPU/permute_i8.ll
index d8bdb0c45e7ca..120aebf2bf7c8 100644
--- a/llvm/test/CodeGen/AMDGPU/permute_i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/permute_i8.ll
@@ -1791,25 +1791,21 @@ define hidden void @sitofp_store_div(ptr addrspace(1) %in0, ptr addrspace(1) %in
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4
-; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
-; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
-; GFX10-NEXT: global_load_dword v4, v[2:3], off
-; GFX10-NEXT: global_load_dword v9, v[0:1], off
+; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
+; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
+; GFX10-NEXT: global_load_dword v4, v[0:1], off
+; GFX10-NEXT: global_load_dword v9, v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(1)
-; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v4
-; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v9
-; GFX10-NEXT: v_ashrrev_i16 v2, 8, v9
-; GFX10-NEXT: v_ashrrev_i16 v3, 8, v4
-; GFX10-NEXT: v_perm_b32 v4, v4, v9, 0x6010205
-; GFX10-NEXT: v_bfe_i32 v10, v0, 0, 8
-; GFX10-NEXT: v_bfe_i32 v1, v1, 0, 8
-; GFX10-NEXT: v_cvt_f32_i32_sdwa v2, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
-; GFX10-NEXT: v_cvt_f32_i32_sdwa v0, sext(v3) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
-; GFX10-NEXT: v_cvt_f32_i32_sdwa v3, sext(v10) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
-; GFX10-NEXT: v_cvt_f32_i32_sdwa v1, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
+; GFX10-NEXT: v_ashrrev_i16 v0, 8, v4
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_ashrrev_i16 v10, 8, v9
+; GFX10-NEXT: v_cvt_f32_i32_sdwa v3, sext(v9) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2
+; GFX10-NEXT: v_cvt_f32_i32_sdwa v1, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2
+; GFX10-NEXT: v_perm_b32 v4, v9, v4, 0x6010205
+; GFX10-NEXT: v_cvt_f32_i32_sdwa v2, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
+; GFX10-NEXT: v_cvt_f32_i32_sdwa v0, sext(v10) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
; GFX10-NEXT: global_store_dwordx4 v[7:8], v[0:3], off
; GFX10-NEXT: global_store_dword v[5:6], v4, off
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -1821,24 +1817,20 @@ define hidden void @sitofp_store_div(ptr addrspace(1) %in0, ptr addrspace(1) %in
; GFX9-NEXT: v_lshlrev_b32_e32 v4, 2, v4
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v4
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
-; GFX9-NEXT: global_load_dword v9, v[0:1], off
-; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v2, v4
-; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
+; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
; GFX9-NEXT: global_load_dword v4, v[0:1], off
+; GFX9-NEXT: global_load_dword v9, v[2:3], off
; GFX9-NEXT: s_mov_b32 s4, 0x6010205
; GFX9-NEXT: s_waitcnt vmcnt(1)
-; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v9
-; GFX9-NEXT: v_ashrrev_i16_e32 v1, 8, v9
-; GFX9-NEXT: v_bfe_i32 v10, v0, 0, 8
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v4
-; GFX9-NEXT: v_ashrrev_i16_e32 v3, 8, v4
-; GFX9-NEXT: v_bfe_i32 v11, v2, 0, 8
-; GFX9-NEXT: v_cvt_f32_i32_sdwa v2, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
-; GFX9-NEXT: v_cvt_f32_i32_sdwa v0, sext(v3) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
-; GFX9-NEXT: v_cvt_f32_i32_sdwa v3, sext(v11) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
-; GFX9-NEXT: v_cvt_f32_i32_sdwa v1, sext(v10) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
-; GFX9-NEXT: v_perm_b32 v4, v4, v9, s4
+; GFX9-NEXT: v_ashrrev_i16_e32 v0, 8, v4
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_ashrrev_i16_e32 v10, 8, v9
+; GFX9-NEXT: v_cvt_f32_i32_sdwa v3, sext(v9) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2
+; GFX9-NEXT: v_cvt_f32_i32_sdwa v1, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2
+; GFX9-NEXT: v_cvt_f32_i32_sdwa v2, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
+; GFX9-NEXT: v_cvt_f32_i32_sdwa v0, sext(v10) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
+; GFX9-NEXT: v_perm_b32 v4, v9, v4, s4
; GFX9-NEXT: global_store_dwordx4 v[7:8], v[0:3], off
; GFX9-NEXT: global_store_dword v[5:6], v4, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
index 6b40df0345ebe..8129a7ac51df9 100644
--- a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
@@ -196,8 +196,7 @@ define amdgpu_kernel void @s_sint_to_fp_i8_to_f64(ptr addrspace(1) %out, i8 %in)
; VI-NEXT: s_load_dword s2, s[8:9], 0x8
; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: s_bfe_i32 s2, s2, 0x80000
-; VI-NEXT: s_sext_i32_i16 s2, s2
+; VI-NEXT: s_sext_i32_i8 s2, s2
; VI-NEXT: v_cvt_f64_i32_e32 v[0:1], s2
; VI-NEXT: v_mov_b32_e32 v3, s1
; VI-NEXT: v_mov_b32_e32 v2, s0
More information about the llvm-commits
mailing list