[llvm] abc1acf - [TargetLowering][AMDGPU][ARM][RISCV][X86] Teach SimplifyDemandedBits to combine (srl (sra X, C1), ShAmt) -> sra(X, C1+ShAmt) (#101751)

via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 14 08:45:00 PDT 2024


Author: Craig Topper
Date: 2024-08-14T08:44:57-07:00
New Revision: abc1acf8df3b212a03650c314b7832b3aa7ccd42

URL: https://github.com/llvm/llvm-project/commit/abc1acf8df3b212a03650c314b7832b3aa7ccd42
DIFF: https://github.com/llvm/llvm-project/commit/abc1acf8df3b212a03650c314b7832b3aa7ccd42.diff

LOG: [TargetLowering][AMDGPU][ARM][RISCV][X86] Teach SimplifyDemandedBits to combine (srl (sra X, C1), ShAmt) -> sra(X, C1+ShAmt) (#101751)

If the upper bits of the shr aren't demanded.

This helps with cases where the outer srl was originally an sra and was
converted to a srl by SimplifyDemandedBits before it had a chance to
combine with the inner sra. This can occur when the inner sra was part
of a sign_extend_inreg expansion.

There are some regressions in ARM and Thumb2.

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll
    llvm/test/CodeGen/AMDGPU/permute_i8.ll
    llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll
    llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll
    llvm/test/CodeGen/Mips/srem-seteq-illegal-types.ll
    llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll
    llvm/test/CodeGen/RISCV/div.ll
    llvm/test/CodeGen/RISCV/rv64zba.ll
    llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
    llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll
    llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index c4f4261a708fda..b5bca5937477be 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1958,6 +1958,22 @@ bool TargetLowering::SimplifyDemandedBits(
         }
       }
 
+      // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
+      // single sra. We can do this if the top bits are never demanded.
+      if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
+        if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
+          if (std::optional<uint64_t> InnerSA =
+                  TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
+            unsigned C1 = *InnerSA;
+            // Clamp the combined shift amount if it exceeds the bit width.
+            unsigned Combined = std::min(C1 + ShAmt, BitWidth - 1);
+            SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);
+            return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,
+                                                     Op0.getOperand(0), NewSA));
+          }
+        }
+      }
+
       APInt InDemandedMask = (DemandedBits << ShAmt);
 
       // If the shift is exact, then it does demand the low bits (and knows that

diff  --git a/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll
index 595991e86a91c7..9fbce05eee1775 100644
--- a/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll
@@ -41,8 +41,8 @@ define i1 @test_srem_even(i4 %X) nounwind {
 define i1 @test_srem_pow2_setne(i6 %X) nounwind {
 ; CHECK-LABEL: test_srem_pow2_setne:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sbfx w8, w0, #0, #6
-; CHECK-NEXT:    ubfx w8, w8, #9, #2
+; CHECK-NEXT:    sbfx w8, w0, #5, #1
+; CHECK-NEXT:    and w8, w8, #0x3
 ; CHECK-NEXT:    add w8, w0, w8
 ; CHECK-NEXT:    and w8, w8, #0x3c
 ; CHECK-NEXT:    sub w8, w0, w8

diff  --git a/llvm/test/CodeGen/AMDGPU/permute_i8.ll b/llvm/test/CodeGen/AMDGPU/permute_i8.ll
index bf98af33dc7b08..050300a69c46bb 100644
--- a/llvm/test/CodeGen/AMDGPU/permute_i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/permute_i8.ll
@@ -1049,15 +1049,14 @@ define hidden void @ashr_store_div(ptr addrspace(1) %in0, ptr addrspace(1) %in1,
 ; GFX10-NEXT:    global_load_dword v0, v[0:1], off
 ; GFX10-NEXT:    s_waitcnt vmcnt(1)
 ; GFX10-NEXT:    v_bfe_i32 v1, v9, 0, 8
-; GFX10-NEXT:    v_ashrrev_i32_e32 v3, 24, v9
 ; GFX10-NEXT:    v_ashrrev_i32_sdwa v2, v2, v9 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX10-NEXT:    v_ashrrev_i32_e32 v3, 25, v9
 ; GFX10-NEXT:    v_lshlrev_b16 v1, 7, v1
-; GFX10-NEXT:    v_lshrrev_b16 v3, 1, v3
+; GFX10-NEXT:    v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    v_ashrrev_i16 v4, 10, v0
 ; GFX10-NEXT:    v_perm_b32 v0, v9, v0, 0x4010707
 ; GFX10-NEXT:    v_and_b32_e32 v1, 0xffffff00, v1
-; GFX10-NEXT:    v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
 ; GFX10-NEXT:    v_or_b32_sdwa v1, v4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
 ; GFX10-NEXT:    v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; GFX10-NEXT:    global_store_dword v[5:6], v1, off
@@ -1075,23 +1074,22 @@ define hidden void @ashr_store_div(ptr addrspace(1) %in0, ptr addrspace(1) %in1,
 ; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
 ; GFX9-NEXT:    global_load_dword v4, v[0:1], off
 ; GFX9-NEXT:    global_load_dword v9, v[2:3], off
-; GFX9-NEXT:    v_mov_b32_e32 v0, 26
-; GFX9-NEXT:    v_mov_b32_e32 v1, 1
-; GFX9-NEXT:    v_mov_b32_e32 v2, 7
+; GFX9-NEXT:    v_mov_b32_e32 v1, 7
 ; GFX9-NEXT:    s_mov_b32 s4, 0x4010707
+; GFX9-NEXT:    v_mov_b32_e32 v0, 26
 ; GFX9-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-NEXT:    v_ashrrev_i32_sdwa v0, v0, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX9-NEXT:    v_lshrrev_b16_sdwa v1, v1, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
-; GFX9-NEXT:    v_lshlrev_b16_sdwa v2, v2, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX9-NEXT:    v_lshlrev_b16_sdwa v1, v1, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_perm_b32 v3, v4, v9, s4
+; GFX9-NEXT:    v_perm_b32 v2, v4, v9, s4
+; GFX9-NEXT:    v_ashrrev_i32_sdwa v0, v0, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 25, v4
 ; GFX9-NEXT:    v_ashrrev_i16_e32 v9, 10, v9
-; GFX9-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX9-NEXT:    v_and_b32_e32 v1, 0xffffff00, v2
+; GFX9-NEXT:    v_and_b32_e32 v1, 0xffffff00, v1
+; GFX9-NEXT:    v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
 ; GFX9-NEXT:    v_or_b32_sdwa v1, v9, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
 ; GFX9-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; GFX9-NEXT:    global_store_dword v[5:6], v0, off
-; GFX9-NEXT:    global_store_dword v[7:8], v3, off
+; GFX9-NEXT:    global_store_dword v[7:8], v2, off
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %tid = call i32 @llvm.amdgcn.workitem.id.x()

diff  --git a/llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll
index 126b17e718b59f..2efe27df2d10d1 100644
--- a/llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll
@@ -43,8 +43,8 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
 ; CHECK-LABEL: test_srem_pow2_setne:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_bfe_i32 v1, v0, 0, 6
-; CHECK-NEXT:    v_bfe_u32 v1, v1, 9, 2
+; CHECK-NEXT:    v_bfe_i32 v1, v0, 5, 1
+; CHECK-NEXT:    v_and_b32_e32 v1, 3, v1
 ; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v0, v1
 ; CHECK-NEXT:    v_and_b32_e32 v1, 60, v1
 ; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1

diff  --git a/llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll
index a4e081d5384e5e..7f56215b9b4123 100644
--- a/llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll
@@ -209,8 +209,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
 ; ARM5:       @ %bb.0:
 ; ARM5-NEXT:    lsl r1, r0, #26
 ; ARM5-NEXT:    mov r2, #3
-; ARM5-NEXT:    asr r1, r1, #26
-; ARM5-NEXT:    and r1, r2, r1, lsr #9
+; ARM5-NEXT:    and r1, r2, r1, asr #31
 ; ARM5-NEXT:    add r1, r0, r1
 ; ARM5-NEXT:    and r1, r1, #60
 ; ARM5-NEXT:    sub r0, r0, r1
@@ -222,8 +221,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
 ; ARM6:       @ %bb.0:
 ; ARM6-NEXT:    lsl r1, r0, #26
 ; ARM6-NEXT:    mov r2, #3
-; ARM6-NEXT:    asr r1, r1, #26
-; ARM6-NEXT:    and r1, r2, r1, lsr #9
+; ARM6-NEXT:    and r1, r2, r1, asr #31
 ; ARM6-NEXT:    add r1, r0, r1
 ; ARM6-NEXT:    and r1, r1, #60
 ; ARM6-NEXT:    sub r0, r0, r1
@@ -233,8 +231,9 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
 ;
 ; ARM7-LABEL: test_srem_pow2_setne:
 ; ARM7:       @ %bb.0:
-; ARM7-NEXT:    sbfx r1, r0, #0, #6
-; ARM7-NEXT:    ubfx r1, r1, #9, #2
+; ARM7-NEXT:    lsl r1, r0, #26
+; ARM7-NEXT:    mov r2, #3
+; ARM7-NEXT:    and r1, r2, r1, asr #31
 ; ARM7-NEXT:    add r1, r0, r1
 ; ARM7-NEXT:    and r1, r1, #60
 ; ARM7-NEXT:    sub r0, r0, r1
@@ -244,8 +243,9 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
 ;
 ; ARM8-LABEL: test_srem_pow2_setne:
 ; ARM8:       @ %bb.0:
-; ARM8-NEXT:    sbfx r1, r0, #0, #6
-; ARM8-NEXT:    ubfx r1, r1, #9, #2
+; ARM8-NEXT:    lsl r1, r0, #26
+; ARM8-NEXT:    mov r2, #3
+; ARM8-NEXT:    and r1, r2, r1, asr #31
 ; ARM8-NEXT:    add r1, r0, r1
 ; ARM8-NEXT:    and r1, r1, #60
 ; ARM8-NEXT:    sub r0, r0, r1
@@ -255,8 +255,9 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
 ;
 ; NEON7-LABEL: test_srem_pow2_setne:
 ; NEON7:       @ %bb.0:
-; NEON7-NEXT:    sbfx r1, r0, #0, #6
-; NEON7-NEXT:    ubfx r1, r1, #9, #2
+; NEON7-NEXT:    lsl r1, r0, #26
+; NEON7-NEXT:    mov r2, #3
+; NEON7-NEXT:    and r1, r2, r1, asr #31
 ; NEON7-NEXT:    add r1, r0, r1
 ; NEON7-NEXT:    and r1, r1, #60
 ; NEON7-NEXT:    sub r0, r0, r1
@@ -266,8 +267,9 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
 ;
 ; NEON8-LABEL: test_srem_pow2_setne:
 ; NEON8:       @ %bb.0:
-; NEON8-NEXT:    sbfx r1, r0, #0, #6
-; NEON8-NEXT:    ubfx r1, r1, #9, #2
+; NEON8-NEXT:    lsl r1, r0, #26
+; NEON8-NEXT:    mov r2, #3
+; NEON8-NEXT:    and r1, r2, r1, asr #31
 ; NEON8-NEXT:    add r1, r0, r1
 ; NEON8-NEXT:    and r1, r1, #60
 ; NEON8-NEXT:    sub r0, r0, r1

diff  --git a/llvm/test/CodeGen/Mips/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/Mips/srem-seteq-illegal-types.ll
index 1a9fa27c263deb..37cca8687890a6 100644
--- a/llvm/test/CodeGen/Mips/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/Mips/srem-seteq-illegal-types.ll
@@ -90,8 +90,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
 ; MIPSEL-LABEL: test_srem_pow2_setne:
 ; MIPSEL:       # %bb.0:
 ; MIPSEL-NEXT:    sll $1, $4, 26
-; MIPSEL-NEXT:    sra $1, $1, 26
-; MIPSEL-NEXT:    srl $1, $1, 9
+; MIPSEL-NEXT:    sra $1, $1, 31
 ; MIPSEL-NEXT:    andi $1, $1, 3
 ; MIPSEL-NEXT:    addu $1, $4, $1
 ; MIPSEL-NEXT:    andi $1, $1, 60
@@ -104,8 +103,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
 ; MIPS64EL:       # %bb.0:
 ; MIPS64EL-NEXT:    sll $1, $4, 0
 ; MIPS64EL-NEXT:    sll $2, $1, 26
-; MIPS64EL-NEXT:    sra $2, $2, 26
-; MIPS64EL-NEXT:    srl $2, $2, 9
+; MIPS64EL-NEXT:    sra $2, $2, 31
 ; MIPS64EL-NEXT:    andi $2, $2, 3
 ; MIPS64EL-NEXT:    addu $2, $1, $2
 ; MIPS64EL-NEXT:    andi $2, $2, 60

diff  --git a/llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll
index b0cc89d1828eda..2b07f27be021b1 100644
--- a/llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll
@@ -85,8 +85,8 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
 ; PPC-LABEL: test_srem_pow2_setne:
 ; PPC:       # %bb.0:
 ; PPC-NEXT:    slwi 4, 3, 26
-; PPC-NEXT:    srawi 4, 4, 26
-; PPC-NEXT:    rlwinm 4, 4, 23, 30, 31
+; PPC-NEXT:    srawi 4, 4, 31
+; PPC-NEXT:    clrlwi 4, 4, 30
 ; PPC-NEXT:    add 4, 3, 4
 ; PPC-NEXT:    rlwinm 4, 4, 0, 26, 29
 ; PPC-NEXT:    sub 3, 3, 4
@@ -99,8 +99,8 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
 ; PPC64LE-LABEL: test_srem_pow2_setne:
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    slwi 4, 3, 26
-; PPC64LE-NEXT:    srawi 4, 4, 26
-; PPC64LE-NEXT:    rlwinm 4, 4, 23, 30, 31
+; PPC64LE-NEXT:    srawi 4, 4, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 30
 ; PPC64LE-NEXT:    add 4, 3, 4
 ; PPC64LE-NEXT:    rlwinm 4, 4, 0, 26, 29
 ; PPC64LE-NEXT:    sub 3, 3, 4

diff  --git a/llvm/test/CodeGen/RISCV/div.ll b/llvm/test/CodeGen/RISCV/div.ll
index 99c83b99497dd3..f4e67698473151 100644
--- a/llvm/test/CodeGen/RISCV/div.ll
+++ b/llvm/test/CodeGen/RISCV/div.ll
@@ -1017,8 +1017,7 @@ define i8 @sdiv8_pow2(i8 %a) nounwind {
 ; RV32I-LABEL: sdiv8_pow2:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    slli a1, a0, 24
-; RV32I-NEXT:    srai a1, a1, 24
-; RV32I-NEXT:    slli a1, a1, 17
+; RV32I-NEXT:    srai a1, a1, 2
 ; RV32I-NEXT:    srli a1, a1, 29
 ; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    slli a0, a0, 24
@@ -1028,8 +1027,7 @@ define i8 @sdiv8_pow2(i8 %a) nounwind {
 ; RV32IM-LABEL: sdiv8_pow2:
 ; RV32IM:       # %bb.0:
 ; RV32IM-NEXT:    slli a1, a0, 24
-; RV32IM-NEXT:    srai a1, a1, 24
-; RV32IM-NEXT:    slli a1, a1, 17
+; RV32IM-NEXT:    srai a1, a1, 2
 ; RV32IM-NEXT:    srli a1, a1, 29
 ; RV32IM-NEXT:    add a0, a0, a1
 ; RV32IM-NEXT:    slli a0, a0, 24
@@ -1039,8 +1037,7 @@ define i8 @sdiv8_pow2(i8 %a) nounwind {
 ; RV64I-LABEL: sdiv8_pow2:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    slli a1, a0, 56
-; RV64I-NEXT:    srai a1, a1, 56
-; RV64I-NEXT:    slli a1, a1, 49
+; RV64I-NEXT:    srai a1, a1, 2
 ; RV64I-NEXT:    srli a1, a1, 61
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    slli a0, a0, 56
@@ -1050,8 +1047,7 @@ define i8 @sdiv8_pow2(i8 %a) nounwind {
 ; RV64IM-LABEL: sdiv8_pow2:
 ; RV64IM:       # %bb.0:
 ; RV64IM-NEXT:    slli a1, a0, 56
-; RV64IM-NEXT:    srai a1, a1, 56
-; RV64IM-NEXT:    slli a1, a1, 49
+; RV64IM-NEXT:    srai a1, a1, 2
 ; RV64IM-NEXT:    srli a1, a1, 61
 ; RV64IM-NEXT:    add a0, a0, a1
 ; RV64IM-NEXT:    slli a0, a0, 56
@@ -1209,8 +1205,7 @@ define i16 @sdiv16_pow2(i16 %a) nounwind {
 ; RV32I-LABEL: sdiv16_pow2:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    slli a1, a0, 16
-; RV32I-NEXT:    srai a1, a1, 16
-; RV32I-NEXT:    slli a1, a1, 1
+; RV32I-NEXT:    srai a1, a1, 2
 ; RV32I-NEXT:    srli a1, a1, 29
 ; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    slli a0, a0, 16
@@ -1220,8 +1215,7 @@ define i16 @sdiv16_pow2(i16 %a) nounwind {
 ; RV32IM-LABEL: sdiv16_pow2:
 ; RV32IM:       # %bb.0:
 ; RV32IM-NEXT:    slli a1, a0, 16
-; RV32IM-NEXT:    srai a1, a1, 16
-; RV32IM-NEXT:    slli a1, a1, 1
+; RV32IM-NEXT:    srai a1, a1, 2
 ; RV32IM-NEXT:    srli a1, a1, 29
 ; RV32IM-NEXT:    add a0, a0, a1
 ; RV32IM-NEXT:    slli a0, a0, 16
@@ -1231,8 +1225,7 @@ define i16 @sdiv16_pow2(i16 %a) nounwind {
 ; RV64I-LABEL: sdiv16_pow2:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    slli a1, a0, 48
-; RV64I-NEXT:    srai a1, a1, 48
-; RV64I-NEXT:    slli a1, a1, 33
+; RV64I-NEXT:    srai a1, a1, 2
 ; RV64I-NEXT:    srli a1, a1, 61
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    slli a0, a0, 48
@@ -1242,8 +1235,7 @@ define i16 @sdiv16_pow2(i16 %a) nounwind {
 ; RV64IM-LABEL: sdiv16_pow2:
 ; RV64IM:       # %bb.0:
 ; RV64IM-NEXT:    slli a1, a0, 48
-; RV64IM-NEXT:    srai a1, a1, 48
-; RV64IM-NEXT:    slli a1, a1, 33
+; RV64IM-NEXT:    srai a1, a1, 2
 ; RV64IM-NEXT:    srli a1, a1, 61
 ; RV64IM-NEXT:    add a0, a0, a1
 ; RV64IM-NEXT:    slli a0, a0, 48

diff  --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
index 20a04844640188..87796e2c7b72e9 100644
--- a/llvm/test/CodeGen/RISCV/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zba.ll
@@ -1555,16 +1555,14 @@ define zeroext i32 @sext_ashr_zext_i8(i8 %a) nounwind {
 ; RV64I-LABEL: sext_ashr_zext_i8:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    slli a0, a0, 56
-; RV64I-NEXT:    srai a0, a0, 56
-; RV64I-NEXT:    slli a0, a0, 23
+; RV64I-NEXT:    srai a0, a0, 31
 ; RV64I-NEXT:    srli a0, a0, 32
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBANOZBB-LABEL: sext_ashr_zext_i8:
 ; RV64ZBANOZBB:       # %bb.0:
 ; RV64ZBANOZBB-NEXT:    slli a0, a0, 56
-; RV64ZBANOZBB-NEXT:    srai a0, a0, 56
-; RV64ZBANOZBB-NEXT:    slli a0, a0, 23
+; RV64ZBANOZBB-NEXT:    srai a0, a0, 31
 ; RV64ZBANOZBB-NEXT:    srli a0, a0, 32
 ; RV64ZBANOZBB-NEXT:    ret
 ;
@@ -1674,16 +1672,14 @@ define zeroext i32 @sext_ashr_zext_i16(i16 %a) nounwind {
 ; RV64I-LABEL: sext_ashr_zext_i16:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    slli a0, a0, 48
-; RV64I-NEXT:    srai a0, a0, 48
-; RV64I-NEXT:    slli a0, a0, 23
+; RV64I-NEXT:    srai a0, a0, 25
 ; RV64I-NEXT:    srli a0, a0, 32
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBANOZBB-LABEL: sext_ashr_zext_i16:
 ; RV64ZBANOZBB:       # %bb.0:
 ; RV64ZBANOZBB-NEXT:    slli a0, a0, 48
-; RV64ZBANOZBB-NEXT:    srai a0, a0, 48
-; RV64ZBANOZBB-NEXT:    slli a0, a0, 23
+; RV64ZBANOZBB-NEXT:    srai a0, a0, 25
 ; RV64ZBANOZBB-NEXT:    srli a0, a0, 32
 ; RV64ZBANOZBB-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index 457d0380ca8a83..dc27158cfb31f3 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -222,8 +222,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
 ; RV32-LABEL: test_srem_pow2_setne:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    slli a1, a0, 26
-; RV32-NEXT:    srai a1, a1, 26
-; RV32-NEXT:    slli a1, a1, 21
+; RV32-NEXT:    srai a1, a1, 1
 ; RV32-NEXT:    srli a1, a1, 30
 ; RV32-NEXT:    add a1, a0, a1
 ; RV32-NEXT:    andi a1, a1, 60
@@ -235,8 +234,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
 ; RV64-LABEL: test_srem_pow2_setne:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    slli a1, a0, 58
-; RV64-NEXT:    srai a1, a1, 58
-; RV64-NEXT:    slli a1, a1, 53
+; RV64-NEXT:    srai a1, a1, 1
 ; RV64-NEXT:    srli a1, a1, 62
 ; RV64-NEXT:    add a1, a0, a1
 ; RV64-NEXT:    andi a1, a1, 60
@@ -248,8 +246,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
 ; RV32M-LABEL: test_srem_pow2_setne:
 ; RV32M:       # %bb.0:
 ; RV32M-NEXT:    slli a1, a0, 26
-; RV32M-NEXT:    srai a1, a1, 26
-; RV32M-NEXT:    slli a1, a1, 21
+; RV32M-NEXT:    srai a1, a1, 1
 ; RV32M-NEXT:    srli a1, a1, 30
 ; RV32M-NEXT:    add a1, a0, a1
 ; RV32M-NEXT:    andi a1, a1, 60
@@ -261,8 +258,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
 ; RV64M-LABEL: test_srem_pow2_setne:
 ; RV64M:       # %bb.0:
 ; RV64M-NEXT:    slli a1, a0, 58
-; RV64M-NEXT:    srai a1, a1, 58
-; RV64M-NEXT:    slli a1, a1, 53
+; RV64M-NEXT:    srai a1, a1, 1
 ; RV64M-NEXT:    srli a1, a1, 62
 ; RV64M-NEXT:    add a1, a0, a1
 ; RV64M-NEXT:    andi a1, a1, 60
@@ -274,8 +270,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
 ; RV32MV-LABEL: test_srem_pow2_setne:
 ; RV32MV:       # %bb.0:
 ; RV32MV-NEXT:    slli a1, a0, 26
-; RV32MV-NEXT:    srai a1, a1, 26
-; RV32MV-NEXT:    slli a1, a1, 21
+; RV32MV-NEXT:    srai a1, a1, 1
 ; RV32MV-NEXT:    srli a1, a1, 30
 ; RV32MV-NEXT:    add a1, a0, a1
 ; RV32MV-NEXT:    andi a1, a1, 60
@@ -287,8 +282,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
 ; RV64MV-LABEL: test_srem_pow2_setne:
 ; RV64MV:       # %bb.0:
 ; RV64MV-NEXT:    slli a1, a0, 58
-; RV64MV-NEXT:    srai a1, a1, 58
-; RV64MV-NEXT:    slli a1, a1, 53
+; RV64MV-NEXT:    srai a1, a1, 1
 ; RV64MV-NEXT:    srli a1, a1, 62
 ; RV64MV-NEXT:    add a1, a0, a1
 ; RV64MV-NEXT:    andi a1, a1, 60

diff  --git a/llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll
index 58bafebd5b702f..e3d65a336978b3 100644
--- a/llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll
@@ -44,8 +44,9 @@ define i1 @test_srem_even(i4 %X) nounwind {
 define i1 @test_srem_pow2_setne(i6 %X) nounwind {
 ; CHECK-LABEL: test_srem_pow2_setne:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    sbfx r1, r0, #0, #6
-; CHECK-NEXT:    ubfx r1, r1, #9, #2
+; CHECK-NEXT:    lsls r1, r0, #26
+; CHECK-NEXT:    movs r2, #3
+; CHECK-NEXT:    and.w r1, r2, r1, asr #31
 ; CHECK-NEXT:    add r1, r0
 ; CHECK-NEXT:    and r1, r1, #60
 ; CHECK-NEXT:    subs r0, r0, r1

diff  --git a/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll
index d644ed87c3c108..cc4bda81bef527 100644
--- a/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll
@@ -82,8 +82,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    shlb $2, %cl
-; X86-NEXT:    sarb $5, %cl
-; X86-NEXT:    shrb $4, %cl
+; X86-NEXT:    sarb $7, %cl
 ; X86-NEXT:    andb $3, %cl
 ; X86-NEXT:    addb %al, %cl
 ; X86-NEXT:    andb $60, %cl
@@ -96,8 +95,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    leal (,%rdi,4), %eax
-; X64-NEXT:    sarb $5, %al
-; X64-NEXT:    shrb $4, %al
+; X64-NEXT:    sarb $7, %al
 ; X64-NEXT:    andb $3, %al
 ; X64-NEXT:    addb %dil, %al
 ; X64-NEXT:    andb $60, %al


        


More information about the llvm-commits mailing list