[llvm] abc1acf - [TargetLowering][AMDGPU][ARM][RISCV][X86] Teach SimplifyDemandedBits to combine (srl (sra X, C1), ShAmt) -> sra(X, C1+ShAmt) (#101751)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 14 08:45:00 PDT 2024
Author: Craig Topper
Date: 2024-08-14T08:44:57-07:00
New Revision: abc1acf8df3b212a03650c314b7832b3aa7ccd42
URL: https://github.com/llvm/llvm-project/commit/abc1acf8df3b212a03650c314b7832b3aa7ccd42
DIFF: https://github.com/llvm/llvm-project/commit/abc1acf8df3b212a03650c314b7832b3aa7ccd42.diff
LOG: [TargetLowering][AMDGPU][ARM][RISCV][X86] Teach SimplifyDemandedBits to combine (srl (sra X, C1), ShAmt) -> sra(X, C1+ShAmt) (#101751)
If the upper bits of the shr aren't demanded.
This helps with cases where the outer srl was originally an sra and was
converted to a srl by SimplifyDemandedBits before it had a chance to
combine with the inner sra. This can occur when the inner sra was part
of a sign_extend_inreg expansion.
There are some regressions in ARM and Thumb2.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll
llvm/test/CodeGen/AMDGPU/permute_i8.ll
llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll
llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll
llvm/test/CodeGen/Mips/srem-seteq-illegal-types.ll
llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll
llvm/test/CodeGen/RISCV/div.ll
llvm/test/CodeGen/RISCV/rv64zba.ll
llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll
llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index c4f4261a708fda..b5bca5937477be 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1958,6 +1958,22 @@ bool TargetLowering::SimplifyDemandedBits(
}
}
+ // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
+ // single sra. We can do this if the top bits are never demanded.
+ if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
+ if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
+ if (std::optional<uint64_t> InnerSA =
+ TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
+ unsigned C1 = *InnerSA;
+ // Clamp the combined shift amount if it exceeds the bit width.
+ unsigned Combined = std::min(C1 + ShAmt, BitWidth - 1);
+ SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,
+ Op0.getOperand(0), NewSA));
+ }
+ }
+ }
+
APInt InDemandedMask = (DemandedBits << ShAmt);
// If the shift is exact, then it does demand the low bits (and knows that
diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll
index 595991e86a91c7..9fbce05eee1775 100644
--- a/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll
@@ -41,8 +41,8 @@ define i1 @test_srem_even(i4 %X) nounwind {
define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; CHECK-LABEL: test_srem_pow2_setne:
; CHECK: // %bb.0:
-; CHECK-NEXT: sbfx w8, w0, #0, #6
-; CHECK-NEXT: ubfx w8, w8, #9, #2
+; CHECK-NEXT: sbfx w8, w0, #5, #1
+; CHECK-NEXT: and w8, w8, #0x3
; CHECK-NEXT: add w8, w0, w8
; CHECK-NEXT: and w8, w8, #0x3c
; CHECK-NEXT: sub w8, w0, w8
diff --git a/llvm/test/CodeGen/AMDGPU/permute_i8.ll b/llvm/test/CodeGen/AMDGPU/permute_i8.ll
index bf98af33dc7b08..050300a69c46bb 100644
--- a/llvm/test/CodeGen/AMDGPU/permute_i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/permute_i8.ll
@@ -1049,15 +1049,14 @@ define hidden void @ashr_store_div(ptr addrspace(1) %in0, ptr addrspace(1) %in1,
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX10-NEXT: v_bfe_i32 v1, v9, 0, 8
-; GFX10-NEXT: v_ashrrev_i32_e32 v3, 24, v9
; GFX10-NEXT: v_ashrrev_i32_sdwa v2, v2, v9 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX10-NEXT: v_ashrrev_i32_e32 v3, 25, v9
; GFX10-NEXT: v_lshlrev_b16 v1, 7, v1
-; GFX10-NEXT: v_lshrrev_b16 v3, 1, v3
+; GFX10-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_ashrrev_i16 v4, 10, v0
; GFX10-NEXT: v_perm_b32 v0, v9, v0, 0x4010707
; GFX10-NEXT: v_and_b32_e32 v1, 0xffffff00, v1
-; GFX10-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX10-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX10-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX10-NEXT: global_store_dword v[5:6], v1, off
@@ -1075,23 +1074,22 @@ define hidden void @ashr_store_div(ptr addrspace(1) %in0, ptr addrspace(1) %in1,
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
; GFX9-NEXT: global_load_dword v4, v[0:1], off
; GFX9-NEXT: global_load_dword v9, v[2:3], off
-; GFX9-NEXT: v_mov_b32_e32 v0, 26
-; GFX9-NEXT: v_mov_b32_e32 v1, 1
-; GFX9-NEXT: v_mov_b32_e32 v2, 7
+; GFX9-NEXT: v_mov_b32_e32 v1, 7
; GFX9-NEXT: s_mov_b32 s4, 0x4010707
+; GFX9-NEXT: v_mov_b32_e32 v0, 26
; GFX9-NEXT: s_waitcnt vmcnt(1)
-; GFX9-NEXT: v_ashrrev_i32_sdwa v0, v0, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v1, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
-; GFX9-NEXT: v_lshlrev_b16_sdwa v2, v2, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX9-NEXT: v_lshlrev_b16_sdwa v1, v1, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_perm_b32 v3, v4, v9, s4
+; GFX9-NEXT: v_perm_b32 v2, v4, v9, s4
+; GFX9-NEXT: v_ashrrev_i32_sdwa v0, v0, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX9-NEXT: v_ashrrev_i32_e32 v3, 25, v4
; GFX9-NEXT: v_ashrrev_i16_e32 v9, 10, v9
-; GFX9-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff00, v2
+; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff00, v1
+; GFX9-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX9-NEXT: v_or_b32_sdwa v1, v9, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX9-NEXT: global_store_dword v[5:6], v0, off
-; GFX9-NEXT: global_store_dword v[7:8], v3, off
+; GFX9-NEXT: global_store_dword v[7:8], v2, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
%tid = call i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll
index 126b17e718b59f..2efe27df2d10d1 100644
--- a/llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll
@@ -43,8 +43,8 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; CHECK-LABEL: test_srem_pow2_setne:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: v_bfe_i32 v1, v0, 0, 6
-; CHECK-NEXT: v_bfe_u32 v1, v1, 9, 2
+; CHECK-NEXT: v_bfe_i32 v1, v0, 5, 1
+; CHECK-NEXT: v_and_b32_e32 v1, 3, v1
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v0, v1
; CHECK-NEXT: v_and_b32_e32 v1, 60, v1
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
diff --git a/llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll
index a4e081d5384e5e..7f56215b9b4123 100644
--- a/llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll
@@ -209,8 +209,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; ARM5: @ %bb.0:
; ARM5-NEXT: lsl r1, r0, #26
; ARM5-NEXT: mov r2, #3
-; ARM5-NEXT: asr r1, r1, #26
-; ARM5-NEXT: and r1, r2, r1, lsr #9
+; ARM5-NEXT: and r1, r2, r1, asr #31
; ARM5-NEXT: add r1, r0, r1
; ARM5-NEXT: and r1, r1, #60
; ARM5-NEXT: sub r0, r0, r1
@@ -222,8 +221,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; ARM6: @ %bb.0:
; ARM6-NEXT: lsl r1, r0, #26
; ARM6-NEXT: mov r2, #3
-; ARM6-NEXT: asr r1, r1, #26
-; ARM6-NEXT: and r1, r2, r1, lsr #9
+; ARM6-NEXT: and r1, r2, r1, asr #31
; ARM6-NEXT: add r1, r0, r1
; ARM6-NEXT: and r1, r1, #60
; ARM6-NEXT: sub r0, r0, r1
@@ -233,8 +231,9 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
;
; ARM7-LABEL: test_srem_pow2_setne:
; ARM7: @ %bb.0:
-; ARM7-NEXT: sbfx r1, r0, #0, #6
-; ARM7-NEXT: ubfx r1, r1, #9, #2
+; ARM7-NEXT: lsl r1, r0, #26
+; ARM7-NEXT: mov r2, #3
+; ARM7-NEXT: and r1, r2, r1, asr #31
; ARM7-NEXT: add r1, r0, r1
; ARM7-NEXT: and r1, r1, #60
; ARM7-NEXT: sub r0, r0, r1
@@ -244,8 +243,9 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
;
; ARM8-LABEL: test_srem_pow2_setne:
; ARM8: @ %bb.0:
-; ARM8-NEXT: sbfx r1, r0, #0, #6
-; ARM8-NEXT: ubfx r1, r1, #9, #2
+; ARM8-NEXT: lsl r1, r0, #26
+; ARM8-NEXT: mov r2, #3
+; ARM8-NEXT: and r1, r2, r1, asr #31
; ARM8-NEXT: add r1, r0, r1
; ARM8-NEXT: and r1, r1, #60
; ARM8-NEXT: sub r0, r0, r1
@@ -255,8 +255,9 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
;
; NEON7-LABEL: test_srem_pow2_setne:
; NEON7: @ %bb.0:
-; NEON7-NEXT: sbfx r1, r0, #0, #6
-; NEON7-NEXT: ubfx r1, r1, #9, #2
+; NEON7-NEXT: lsl r1, r0, #26
+; NEON7-NEXT: mov r2, #3
+; NEON7-NEXT: and r1, r2, r1, asr #31
; NEON7-NEXT: add r1, r0, r1
; NEON7-NEXT: and r1, r1, #60
; NEON7-NEXT: sub r0, r0, r1
@@ -266,8 +267,9 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
;
; NEON8-LABEL: test_srem_pow2_setne:
; NEON8: @ %bb.0:
-; NEON8-NEXT: sbfx r1, r0, #0, #6
-; NEON8-NEXT: ubfx r1, r1, #9, #2
+; NEON8-NEXT: lsl r1, r0, #26
+; NEON8-NEXT: mov r2, #3
+; NEON8-NEXT: and r1, r2, r1, asr #31
; NEON8-NEXT: add r1, r0, r1
; NEON8-NEXT: and r1, r1, #60
; NEON8-NEXT: sub r0, r0, r1
diff --git a/llvm/test/CodeGen/Mips/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/Mips/srem-seteq-illegal-types.ll
index 1a9fa27c263deb..37cca8687890a6 100644
--- a/llvm/test/CodeGen/Mips/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/Mips/srem-seteq-illegal-types.ll
@@ -90,8 +90,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; MIPSEL-LABEL: test_srem_pow2_setne:
; MIPSEL: # %bb.0:
; MIPSEL-NEXT: sll $1, $4, 26
-; MIPSEL-NEXT: sra $1, $1, 26
-; MIPSEL-NEXT: srl $1, $1, 9
+; MIPSEL-NEXT: sra $1, $1, 31
; MIPSEL-NEXT: andi $1, $1, 3
; MIPSEL-NEXT: addu $1, $4, $1
; MIPSEL-NEXT: andi $1, $1, 60
@@ -104,8 +103,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; MIPS64EL: # %bb.0:
; MIPS64EL-NEXT: sll $1, $4, 0
; MIPS64EL-NEXT: sll $2, $1, 26
-; MIPS64EL-NEXT: sra $2, $2, 26
-; MIPS64EL-NEXT: srl $2, $2, 9
+; MIPS64EL-NEXT: sra $2, $2, 31
; MIPS64EL-NEXT: andi $2, $2, 3
; MIPS64EL-NEXT: addu $2, $1, $2
; MIPS64EL-NEXT: andi $2, $2, 60
diff --git a/llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll
index b0cc89d1828eda..2b07f27be021b1 100644
--- a/llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll
@@ -85,8 +85,8 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; PPC-LABEL: test_srem_pow2_setne:
; PPC: # %bb.0:
; PPC-NEXT: slwi 4, 3, 26
-; PPC-NEXT: srawi 4, 4, 26
-; PPC-NEXT: rlwinm 4, 4, 23, 30, 31
+; PPC-NEXT: srawi 4, 4, 31
+; PPC-NEXT: clrlwi 4, 4, 30
; PPC-NEXT: add 4, 3, 4
; PPC-NEXT: rlwinm 4, 4, 0, 26, 29
; PPC-NEXT: sub 3, 3, 4
@@ -99,8 +99,8 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; PPC64LE-LABEL: test_srem_pow2_setne:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: slwi 4, 3, 26
-; PPC64LE-NEXT: srawi 4, 4, 26
-; PPC64LE-NEXT: rlwinm 4, 4, 23, 30, 31
+; PPC64LE-NEXT: srawi 4, 4, 31
+; PPC64LE-NEXT: clrlwi 4, 4, 30
; PPC64LE-NEXT: add 4, 3, 4
; PPC64LE-NEXT: rlwinm 4, 4, 0, 26, 29
; PPC64LE-NEXT: sub 3, 3, 4
diff --git a/llvm/test/CodeGen/RISCV/div.ll b/llvm/test/CodeGen/RISCV/div.ll
index 99c83b99497dd3..f4e67698473151 100644
--- a/llvm/test/CodeGen/RISCV/div.ll
+++ b/llvm/test/CodeGen/RISCV/div.ll
@@ -1017,8 +1017,7 @@ define i8 @sdiv8_pow2(i8 %a) nounwind {
; RV32I-LABEL: sdiv8_pow2:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a1, a0, 24
-; RV32I-NEXT: srai a1, a1, 24
-; RV32I-NEXT: slli a1, a1, 17
+; RV32I-NEXT: srai a1, a1, 2
; RV32I-NEXT: srli a1, a1, 29
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: slli a0, a0, 24
@@ -1028,8 +1027,7 @@ define i8 @sdiv8_pow2(i8 %a) nounwind {
; RV32IM-LABEL: sdiv8_pow2:
; RV32IM: # %bb.0:
; RV32IM-NEXT: slli a1, a0, 24
-; RV32IM-NEXT: srai a1, a1, 24
-; RV32IM-NEXT: slli a1, a1, 17
+; RV32IM-NEXT: srai a1, a1, 2
; RV32IM-NEXT: srli a1, a1, 29
; RV32IM-NEXT: add a0, a0, a1
; RV32IM-NEXT: slli a0, a0, 24
@@ -1039,8 +1037,7 @@ define i8 @sdiv8_pow2(i8 %a) nounwind {
; RV64I-LABEL: sdiv8_pow2:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a0, 56
-; RV64I-NEXT: srai a1, a1, 56
-; RV64I-NEXT: slli a1, a1, 49
+; RV64I-NEXT: srai a1, a1, 2
; RV64I-NEXT: srli a1, a1, 61
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: slli a0, a0, 56
@@ -1050,8 +1047,7 @@ define i8 @sdiv8_pow2(i8 %a) nounwind {
; RV64IM-LABEL: sdiv8_pow2:
; RV64IM: # %bb.0:
; RV64IM-NEXT: slli a1, a0, 56
-; RV64IM-NEXT: srai a1, a1, 56
-; RV64IM-NEXT: slli a1, a1, 49
+; RV64IM-NEXT: srai a1, a1, 2
; RV64IM-NEXT: srli a1, a1, 61
; RV64IM-NEXT: add a0, a0, a1
; RV64IM-NEXT: slli a0, a0, 56
@@ -1209,8 +1205,7 @@ define i16 @sdiv16_pow2(i16 %a) nounwind {
; RV32I-LABEL: sdiv16_pow2:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a1, a0, 16
-; RV32I-NEXT: srai a1, a1, 16
-; RV32I-NEXT: slli a1, a1, 1
+; RV32I-NEXT: srai a1, a1, 2
; RV32I-NEXT: srli a1, a1, 29
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: slli a0, a0, 16
@@ -1220,8 +1215,7 @@ define i16 @sdiv16_pow2(i16 %a) nounwind {
; RV32IM-LABEL: sdiv16_pow2:
; RV32IM: # %bb.0:
; RV32IM-NEXT: slli a1, a0, 16
-; RV32IM-NEXT: srai a1, a1, 16
-; RV32IM-NEXT: slli a1, a1, 1
+; RV32IM-NEXT: srai a1, a1, 2
; RV32IM-NEXT: srli a1, a1, 29
; RV32IM-NEXT: add a0, a0, a1
; RV32IM-NEXT: slli a0, a0, 16
@@ -1231,8 +1225,7 @@ define i16 @sdiv16_pow2(i16 %a) nounwind {
; RV64I-LABEL: sdiv16_pow2:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a0, 48
-; RV64I-NEXT: srai a1, a1, 48
-; RV64I-NEXT: slli a1, a1, 33
+; RV64I-NEXT: srai a1, a1, 2
; RV64I-NEXT: srli a1, a1, 61
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: slli a0, a0, 48
@@ -1242,8 +1235,7 @@ define i16 @sdiv16_pow2(i16 %a) nounwind {
; RV64IM-LABEL: sdiv16_pow2:
; RV64IM: # %bb.0:
; RV64IM-NEXT: slli a1, a0, 48
-; RV64IM-NEXT: srai a1, a1, 48
-; RV64IM-NEXT: slli a1, a1, 33
+; RV64IM-NEXT: srai a1, a1, 2
; RV64IM-NEXT: srli a1, a1, 61
; RV64IM-NEXT: add a0, a0, a1
; RV64IM-NEXT: slli a0, a0, 48
diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
index 20a04844640188..87796e2c7b72e9 100644
--- a/llvm/test/CodeGen/RISCV/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zba.ll
@@ -1555,16 +1555,14 @@ define zeroext i32 @sext_ashr_zext_i8(i8 %a) nounwind {
; RV64I-LABEL: sext_ashr_zext_i8:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 56
-; RV64I-NEXT: srai a0, a0, 56
-; RV64I-NEXT: slli a0, a0, 23
+; RV64I-NEXT: srai a0, a0, 31
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: ret
;
; RV64ZBANOZBB-LABEL: sext_ashr_zext_i8:
; RV64ZBANOZBB: # %bb.0:
; RV64ZBANOZBB-NEXT: slli a0, a0, 56
-; RV64ZBANOZBB-NEXT: srai a0, a0, 56
-; RV64ZBANOZBB-NEXT: slli a0, a0, 23
+; RV64ZBANOZBB-NEXT: srai a0, a0, 31
; RV64ZBANOZBB-NEXT: srli a0, a0, 32
; RV64ZBANOZBB-NEXT: ret
;
@@ -1674,16 +1672,14 @@ define zeroext i32 @sext_ashr_zext_i16(i16 %a) nounwind {
; RV64I-LABEL: sext_ashr_zext_i16:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srai a0, a0, 48
-; RV64I-NEXT: slli a0, a0, 23
+; RV64I-NEXT: srai a0, a0, 25
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: ret
;
; RV64ZBANOZBB-LABEL: sext_ashr_zext_i16:
; RV64ZBANOZBB: # %bb.0:
; RV64ZBANOZBB-NEXT: slli a0, a0, 48
-; RV64ZBANOZBB-NEXT: srai a0, a0, 48
-; RV64ZBANOZBB-NEXT: slli a0, a0, 23
+; RV64ZBANOZBB-NEXT: srai a0, a0, 25
; RV64ZBANOZBB-NEXT: srli a0, a0, 32
; RV64ZBANOZBB-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index 457d0380ca8a83..dc27158cfb31f3 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -222,8 +222,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; RV32-LABEL: test_srem_pow2_setne:
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a0, 26
-; RV32-NEXT: srai a1, a1, 26
-; RV32-NEXT: slli a1, a1, 21
+; RV32-NEXT: srai a1, a1, 1
; RV32-NEXT: srli a1, a1, 30
; RV32-NEXT: add a1, a0, a1
; RV32-NEXT: andi a1, a1, 60
@@ -235,8 +234,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; RV64-LABEL: test_srem_pow2_setne:
; RV64: # %bb.0:
; RV64-NEXT: slli a1, a0, 58
-; RV64-NEXT: srai a1, a1, 58
-; RV64-NEXT: slli a1, a1, 53
+; RV64-NEXT: srai a1, a1, 1
; RV64-NEXT: srli a1, a1, 62
; RV64-NEXT: add a1, a0, a1
; RV64-NEXT: andi a1, a1, 60
@@ -248,8 +246,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; RV32M-LABEL: test_srem_pow2_setne:
; RV32M: # %bb.0:
; RV32M-NEXT: slli a1, a0, 26
-; RV32M-NEXT: srai a1, a1, 26
-; RV32M-NEXT: slli a1, a1, 21
+; RV32M-NEXT: srai a1, a1, 1
; RV32M-NEXT: srli a1, a1, 30
; RV32M-NEXT: add a1, a0, a1
; RV32M-NEXT: andi a1, a1, 60
@@ -261,8 +258,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; RV64M-LABEL: test_srem_pow2_setne:
; RV64M: # %bb.0:
; RV64M-NEXT: slli a1, a0, 58
-; RV64M-NEXT: srai a1, a1, 58
-; RV64M-NEXT: slli a1, a1, 53
+; RV64M-NEXT: srai a1, a1, 1
; RV64M-NEXT: srli a1, a1, 62
; RV64M-NEXT: add a1, a0, a1
; RV64M-NEXT: andi a1, a1, 60
@@ -274,8 +270,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; RV32MV-LABEL: test_srem_pow2_setne:
; RV32MV: # %bb.0:
; RV32MV-NEXT: slli a1, a0, 26
-; RV32MV-NEXT: srai a1, a1, 26
-; RV32MV-NEXT: slli a1, a1, 21
+; RV32MV-NEXT: srai a1, a1, 1
; RV32MV-NEXT: srli a1, a1, 30
; RV32MV-NEXT: add a1, a0, a1
; RV32MV-NEXT: andi a1, a1, 60
@@ -287,8 +282,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; RV64MV-LABEL: test_srem_pow2_setne:
; RV64MV: # %bb.0:
; RV64MV-NEXT: slli a1, a0, 58
-; RV64MV-NEXT: srai a1, a1, 58
-; RV64MV-NEXT: slli a1, a1, 53
+; RV64MV-NEXT: srai a1, a1, 1
; RV64MV-NEXT: srli a1, a1, 62
; RV64MV-NEXT: add a1, a0, a1
; RV64MV-NEXT: andi a1, a1, 60
diff --git a/llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll
index 58bafebd5b702f..e3d65a336978b3 100644
--- a/llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll
@@ -44,8 +44,9 @@ define i1 @test_srem_even(i4 %X) nounwind {
define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; CHECK-LABEL: test_srem_pow2_setne:
; CHECK: @ %bb.0:
-; CHECK-NEXT: sbfx r1, r0, #0, #6
-; CHECK-NEXT: ubfx r1, r1, #9, #2
+; CHECK-NEXT: lsls r1, r0, #26
+; CHECK-NEXT: movs r2, #3
+; CHECK-NEXT: and.w r1, r2, r1, asr #31
; CHECK-NEXT: add r1, r0
; CHECK-NEXT: and r1, r1, #60
; CHECK-NEXT: subs r0, r0, r1
diff --git a/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll
index d644ed87c3c108..cc4bda81bef527 100644
--- a/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll
@@ -82,8 +82,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: shlb $2, %cl
-; X86-NEXT: sarb $5, %cl
-; X86-NEXT: shrb $4, %cl
+; X86-NEXT: sarb $7, %cl
; X86-NEXT: andb $3, %cl
; X86-NEXT: addb %al, %cl
; X86-NEXT: andb $60, %cl
@@ -96,8 +95,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; X64: # %bb.0:
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: leal (,%rdi,4), %eax
-; X64-NEXT: sarb $5, %al
-; X64-NEXT: shrb $4, %al
+; X64-NEXT: sarb $7, %al
; X64-NEXT: andb $3, %al
; X64-NEXT: addb %dil, %al
; X64-NEXT: andb $60, %al
More information about the llvm-commits
mailing list