[llvm] 7ca3e23 - [SDAG] narrow truncated sign_extend_inreg
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 16 13:29:25 PDT 2022
Author: Sanjay Patel
Date: 2022-07-16T16:29:15-04:00
New Revision: 7ca3e23f250dc679bdd6660fd6877e1e5c275871
URL: https://github.com/llvm/llvm-project/commit/7ca3e23f250dc679bdd6660fd6877e1e5c275871
DIFF: https://github.com/llvm/llvm-project/commit/7ca3e23f250dc679bdd6660fd6877e1e5c275871.diff
LOG: [SDAG] narrow truncated sign_extend_inreg
trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
There are improvements on existing tests from this, and there are a pair
of large regressions in D127115 for Thumb2 caused by not folding this
pattern.
Differential Revision: https://reviews.llvm.org/D129890
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/AMDGPU/mul_int24.ll
llvm/test/CodeGen/X86/pmulh.ll
llvm/test/CodeGen/X86/sar_fold64.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 915aca11ec709..03384ce867705 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13153,6 +13153,19 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
return N0.getOperand(0);
}
+ // Try to narrow a truncate-of-sext_in_reg to the destination type:
+ // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
+ if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ N0.hasOneUse()) {
+ SDValue X = N0.getOperand(0);
+ SDValue ExtVal = N0.getOperand(1);
+ EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
+ if (ExtVT.bitsLT(VT)) {
+ SDValue TrX = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, TrX, ExtVal);
+ }
+ }
+
// If this is anyext(trunc), don't fold it, allow ourselves to be folded.
if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
return SDValue();
diff --git a/llvm/test/CodeGen/AMDGPU/mul_int24.ll b/llvm/test/CodeGen/AMDGPU/mul_int24.ll
index 478701cc97486..c5778464ba76f 100644
--- a/llvm/test/CodeGen/AMDGPU/mul_int24.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul_int24.ll
@@ -181,34 +181,25 @@ define i64 @test_smul48_i64(i64 %lhs, i64 %rhs) {
; SI-LABEL: test_smul48_i64:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_lshlrev_b32_e32 v1, 8, v0
-; SI-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; SI-NEXT: v_ashr_i64 v[3:4], v[0:1], 40
-; SI-NEXT: v_ashr_i64 v[1:2], v[1:2], 40
-; SI-NEXT: v_mul_i32_i24_e32 v0, v3, v1
-; SI-NEXT: v_mul_hi_i32_i24_e32 v1, v3, v1
+; SI-NEXT: v_mul_i32_i24_e32 v3, v0, v2
+; SI-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v2
+; SI-NEXT: v_mov_b32_e32 v0, v3
; SI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: test_smul48_i64:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_lshlrev_b32_e32 v1, 8, v0
-; VI-NEXT: v_ashrrev_i64 v[3:4], 40, v[0:1]
-; VI-NEXT: v_lshlrev_b32_e32 v1, 8, v2
-; VI-NEXT: v_ashrrev_i64 v[1:2], 40, v[0:1]
-; VI-NEXT: v_mul_i32_i24_e32 v0, v3, v1
-; VI-NEXT: v_mul_hi_i32_i24_e32 v1, v3, v1
+; VI-NEXT: v_mul_i32_i24_e32 v3, v0, v2
+; VI-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v2
+; VI-NEXT: v_mov_b32_e32 v0, v3
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: test_smul48_i64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_lshlrev_b32_e32 v1, 8, v0
-; GFX9-NEXT: v_ashrrev_i64 v[3:4], 40, v[0:1]
-; GFX9-NEXT: v_lshlrev_b32_e32 v1, 8, v2
-; GFX9-NEXT: v_ashrrev_i64 v[1:2], 40, v[0:1]
-; GFX9-NEXT: v_mul_i32_i24_e32 v0, v3, v1
-; GFX9-NEXT: v_mul_hi_i32_i24_e32 v1, v3, v1
+; GFX9-NEXT: v_mul_i32_i24_e32 v3, v0, v2
+; GFX9-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v2
+; GFX9-NEXT: v_mov_b32_e32 v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; EG-LABEL: test_smul48_i64:
diff --git a/llvm/test/CodeGen/X86/pmulh.ll b/llvm/test/CodeGen/X86/pmulh.ll
index 7eb192dca1211..cac800de1a6f5 100644
--- a/llvm/test/CodeGen/X86/pmulh.ll
+++ b/llvm/test/CodeGen/X86/pmulh.ll
@@ -261,12 +261,14 @@ define <8 x i16> @sextinreg_mulhw_v8i16(<8 x i32> %a, <8 x i32> %b) {
;
; AVX512-LABEL: sextinreg_mulhw_v8i16:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpslld $24, %ymm0, %ymm0
-; AVX512-NEXT: vpsrad $24, %ymm0, %ymm0
-; AVX512-NEXT: vpslld $25, %ymm1, %ymm1
-; AVX512-NEXT: vpsrad $25, %ymm1, %ymm1
+; AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vpmovdw %zmm1, %ymm1
+; AVX512-NEXT: vpsllw $9, %xmm1, %xmm1
+; AVX512-NEXT: vpsraw $9, %xmm1, %xmm1
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512-NEXT: vpsllw $8, %xmm0, %xmm0
+; AVX512-NEXT: vpsraw $8, %xmm0, %xmm0
; AVX512-NEXT: vpmulhw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/sar_fold64.ll b/llvm/test/CodeGen/X86/sar_fold64.ll
index a23d0cb4962f1..245af74c23891 100644
--- a/llvm/test/CodeGen/X86/sar_fold64.ll
+++ b/llvm/test/CodeGen/X86/sar_fold64.ll
@@ -6,9 +6,8 @@
define i32 @shl48sar47(i64 %a) #0 {
; CHECK-LABEL: shl48sar47:
; CHECK: # %bb.0:
-; CHECK-NEXT: movswq %di, %rax
+; CHECK-NEXT: movswl %di, %eax
; CHECK-NEXT: addl %eax, %eax
-; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-NEXT: retq
%1 = shl i64 %a, 48
%2 = ashr exact i64 %1, 47
@@ -32,9 +31,8 @@ define i32 @shl48sar49(i64 %a) #0 {
define i32 @shl56sar55(i64 %a) #0 {
; CHECK-LABEL: shl56sar55:
; CHECK: # %bb.0:
-; CHECK-NEXT: movsbq %dil, %rax
+; CHECK-NEXT: movsbl %dil, %eax
; CHECK-NEXT: addl %eax, %eax
-; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-NEXT: retq
%1 = shl i64 %a, 56
%2 = ashr exact i64 %1, 55
More information about the llvm-commits
mailing list