[llvm] 76f90a9 - [SelectionDAG] Clear promoted bits before UREM on shift amount in PromoteIntRes_FunnelShift.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri May 6 09:26:45 PDT 2022
Author: Craig Topper
Date: 2022-05-06T09:26:30-07:00
New Revision: 76f90a9d71ee0e6d7ad1f9d67a66d97112328f82
URL: https://github.com/llvm/llvm-project/commit/76f90a9d71ee0e6d7ad1f9d67a66d97112328f82
DIFF: https://github.com/llvm/llvm-project/commit/76f90a9d71ee0e6d7ad1f9d67a66d97112328f82.diff
LOG: [SelectionDAG] Clear promoted bits before UREM on shift amount in PromoteIntRes_FunnelShift.
Otherwise we have garbage in the upper bits that can affect the
results of the UREM.
Fixes PR55296.
Differential Revision: https://reviews.llvm.org/D125076
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
llvm/test/CodeGen/AArch64/funnel-shift.ll
llvm/test/CodeGen/AMDGPU/fshr.ll
llvm/test/CodeGen/ARM/funnel-shift.ll
llvm/test/CodeGen/Mips/funnel-shift.ll
llvm/test/CodeGen/PowerPC/funnel-shift.ll
llvm/test/CodeGen/X86/funnel-shift.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index a1ddb02563e3b..ba76a46961467 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -1277,7 +1277,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) {
SDValue Hi = GetPromotedInteger(N->getOperand(0));
SDValue Lo = GetPromotedInteger(N->getOperand(1));
- SDValue Amt = GetPromotedInteger(N->getOperand(2));
+ SDValue Amt = ZExtPromotedInteger(N->getOperand(2));
SDLoc DL(N);
EVT OldVT = N->getOperand(0).getValueType();
diff --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll
index fbf00a59f3cf1..6dfc61046c5e8 100644
--- a/llvm/test/CodeGen/AArch64/funnel-shift.ll
+++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll
@@ -69,13 +69,14 @@ declare i37 @llvm.fshl.i37(i37, i37, i37)
define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
; CHECK-LABEL: fshl_i37:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #31883
-; CHECK-NEXT: mov w9, #37
-; CHECK-NEXT: movk x8, #3542, lsl #16
+; CHECK-NEXT: mov x9, #31883
+; CHECK-NEXT: and x8, x2, #0x1fffffffff
+; CHECK-NEXT: movk x9, #3542, lsl #16
; CHECK-NEXT: ubfiz x10, x1, #26, #37
-; CHECK-NEXT: movk x8, #51366, lsl #32
-; CHECK-NEXT: movk x8, #56679, lsl #48
-; CHECK-NEXT: umulh x8, x2, x8
+; CHECK-NEXT: movk x9, #51366, lsl #32
+; CHECK-NEXT: movk x9, #56679, lsl #48
+; CHECK-NEXT: umulh x8, x8, x9
+; CHECK-NEXT: mov w9, #37
; CHECK-NEXT: ubfx x8, x8, #5, #27
; CHECK-NEXT: msub w8, w8, w9, w2
; CHECK-NEXT: mvn w9, w8
@@ -206,14 +207,15 @@ declare i37 @llvm.fshr.i37(i37, i37, i37)
define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
; CHECK-LABEL: fshr_i37:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #31883
-; CHECK-NEXT: mov w9, #37
-; CHECK-NEXT: movk x8, #3542, lsl #16
+; CHECK-NEXT: mov x9, #31883
+; CHECK-NEXT: and x8, x2, #0x1fffffffff
+; CHECK-NEXT: movk x9, #3542, lsl #16
; CHECK-NEXT: lsl x10, x1, #27
-; CHECK-NEXT: movk x8, #51366, lsl #32
+; CHECK-NEXT: movk x9, #51366, lsl #32
; CHECK-NEXT: lsl x11, x0, #1
-; CHECK-NEXT: movk x8, #56679, lsl #48
-; CHECK-NEXT: umulh x8, x2, x8
+; CHECK-NEXT: movk x9, #56679, lsl #48
+; CHECK-NEXT: umulh x8, x8, x9
+; CHECK-NEXT: mov w9, #37
; CHECK-NEXT: lsr x8, x8, #5
; CHECK-NEXT: msub w8, w8, w9, w2
; CHECK-NEXT: add w8, w8, #27
diff --git a/llvm/test/CodeGen/AMDGPU/fshr.ll b/llvm/test/CodeGen/AMDGPU/fshr.ll
index 3eeec671fbc4b..994cc78ec7917 100644
--- a/llvm/test/CodeGen/AMDGPU/fshr.ll
+++ b/llvm/test/CodeGen/AMDGPU/fshr.ll
@@ -1101,11 +1101,12 @@ define i24 @v_fshr_i24(i24 %src0, i24 %src1, i24 %src2) {
; SI-LABEL: v_fshr_i24:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT: v_and_b32_e32 v3, 0xffffff, v2
; SI-NEXT: s_mov_b32 s4, 0xaaaaaaab
-; SI-NEXT: v_mul_hi_u32 v3, v2, s4
+; SI-NEXT: v_mul_hi_u32 v3, v3, s4
; SI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; SI-NEXT: v_lshrrev_b32_e32 v3, 4, v3
-; SI-NEXT: v_mul_lo_u32 v3, v3, 24
+; SI-NEXT: v_mul_u32_u24_e32 v3, 24, v3
; SI-NEXT: v_sub_i32_e32 v2, vcc, v2, v3
; SI-NEXT: v_add_i32_e32 v2, vcc, 8, v2
; SI-NEXT: v_alignbit_b32 v0, v0, v1, v2
@@ -1114,11 +1115,12 @@ define i24 @v_fshr_i24(i24 %src0, i24 %src1, i24 %src2) {
; VI-LABEL: v_fshr_i24:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_and_b32_e32 v3, 0xffffff, v2
; VI-NEXT: s_mov_b32 s4, 0xaaaaaaab
-; VI-NEXT: v_mul_hi_u32 v3, v2, s4
+; VI-NEXT: v_mul_hi_u32 v3, v3, s4
; VI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; VI-NEXT: v_lshrrev_b32_e32 v3, 4, v3
-; VI-NEXT: v_mul_lo_u32 v3, v3, 24
+; VI-NEXT: v_mul_u32_u24_e32 v3, 24, v3
; VI-NEXT: v_sub_u32_e32 v2, vcc, v2, v3
; VI-NEXT: v_add_u32_e32 v2, vcc, 8, v2
; VI-NEXT: v_alignbit_b32 v0, v0, v1, v2
@@ -1127,11 +1129,12 @@ define i24 @v_fshr_i24(i24 %src0, i24 %src1, i24 %src2) {
; GFX9-LABEL: v_fshr_i24:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_and_b32_e32 v3, 0xffffff, v2
; GFX9-NEXT: s_mov_b32 s4, 0xaaaaaaab
-; GFX9-NEXT: v_mul_hi_u32 v3, v2, s4
+; GFX9-NEXT: v_mul_hi_u32 v3, v3, s4
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 4, v3
-; GFX9-NEXT: v_mul_lo_u32 v3, v3, 24
+; GFX9-NEXT: v_mul_u32_u24_e32 v3, 24, v3
; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3
; GFX9-NEXT: v_add_u32_e32 v2, 8, v2
; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, v2
@@ -1146,10 +1149,11 @@ define i24 @v_fshr_i24(i24 %src0, i24 %src1, i24 %src2) {
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_mul_hi_u32 v3, 0xaaaaaaab, v2
+; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v2
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 8, v1
+; GFX10-NEXT: v_mul_hi_u32 v3, 0xaaaaaaab, v3
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 4, v3
-; GFX10-NEXT: v_mul_lo_u32 v3, v3, 24
+; GFX10-NEXT: v_mul_u32_u24_e32 v3, 24, v3
; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3
; GFX10-NEXT: v_add_nc_u32_e32 v2, 8, v2
; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, v2
@@ -1162,19 +1166,22 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %src0, <2 x i24> %src1, <2 x i24> %src2
; SI-LABEL: v_fshr_v2i24:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: s_mov_b32 s4, 0xaaaaaaab
-; SI-NEXT: v_mul_hi_u32 v6, v4, s4
-; SI-NEXT: v_mul_hi_u32 v7, v5, s4
+; SI-NEXT: s_mov_b32 s4, 0xffffff
+; SI-NEXT: v_and_b32_e32 v6, s4, v4
+; SI-NEXT: s_mov_b32 s5, 0xaaaaaaab
+; SI-NEXT: v_mul_hi_u32 v6, v6, s5
+; SI-NEXT: v_and_b32_e32 v7, s4, v5
; SI-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; SI-NEXT: v_lshrrev_b32_e32 v6, 4, v6
-; SI-NEXT: v_mul_lo_u32 v6, v6, 24
+; SI-NEXT: v_mul_u32_u24_e32 v6, 24, v6
; SI-NEXT: v_sub_i32_e32 v4, vcc, v4, v6
-; SI-NEXT: v_lshrrev_b32_e32 v6, 4, v7
-; SI-NEXT: v_mul_lo_u32 v6, v6, 24
+; SI-NEXT: v_mul_hi_u32 v6, v7, s5
; SI-NEXT: v_add_i32_e32 v4, vcc, 8, v4
; SI-NEXT: v_alignbit_b32 v0, v0, v2, v4
; SI-NEXT: v_lshlrev_b32_e32 v2, 8, v3
-; SI-NEXT: v_sub_i32_e32 v3, vcc, v5, v6
+; SI-NEXT: v_lshrrev_b32_e32 v3, 4, v6
+; SI-NEXT: v_mul_u32_u24_e32 v3, 24, v3
+; SI-NEXT: v_sub_i32_e32 v3, vcc, v5, v3
; SI-NEXT: v_add_i32_e32 v3, vcc, 8, v3
; SI-NEXT: v_alignbit_b32 v1, v1, v2, v3
; SI-NEXT: s_setpc_b64 s[30:31]
@@ -1182,19 +1189,22 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %src0, <2 x i24> %src1, <2 x i24> %src2
; VI-LABEL: v_fshr_v2i24:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: s_mov_b32 s4, 0xaaaaaaab
-; VI-NEXT: v_mul_hi_u32 v6, v4, s4
-; VI-NEXT: v_mul_hi_u32 v7, v5, s4
+; VI-NEXT: s_mov_b32 s4, 0xffffff
+; VI-NEXT: v_and_b32_e32 v6, s4, v4
+; VI-NEXT: s_mov_b32 s5, 0xaaaaaaab
+; VI-NEXT: v_mul_hi_u32 v6, v6, s5
+; VI-NEXT: v_and_b32_e32 v7, s4, v5
; VI-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; VI-NEXT: v_lshrrev_b32_e32 v6, 4, v6
-; VI-NEXT: v_mul_lo_u32 v6, v6, 24
+; VI-NEXT: v_mul_u32_u24_e32 v6, 24, v6
; VI-NEXT: v_sub_u32_e32 v4, vcc, v4, v6
-; VI-NEXT: v_lshrrev_b32_e32 v6, 4, v7
-; VI-NEXT: v_mul_lo_u32 v6, v6, 24
+; VI-NEXT: v_mul_hi_u32 v6, v7, s5
; VI-NEXT: v_add_u32_e32 v4, vcc, 8, v4
; VI-NEXT: v_alignbit_b32 v0, v0, v2, v4
; VI-NEXT: v_lshlrev_b32_e32 v2, 8, v3
-; VI-NEXT: v_sub_u32_e32 v3, vcc, v5, v6
+; VI-NEXT: v_lshrrev_b32_e32 v3, 4, v6
+; VI-NEXT: v_mul_u32_u24_e32 v3, 24, v3
+; VI-NEXT: v_sub_u32_e32 v3, vcc, v5, v3
; VI-NEXT: v_add_u32_e32 v3, vcc, 8, v3
; VI-NEXT: v_alignbit_b32 v1, v1, v2, v3
; VI-NEXT: s_setpc_b64 s[30:31]
@@ -1202,19 +1212,22 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %src0, <2 x i24> %src1, <2 x i24> %src2
; GFX9-LABEL: v_fshr_v2i24:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, 0xaaaaaaab
-; GFX9-NEXT: v_mul_hi_u32 v6, v4, s4
-; GFX9-NEXT: v_mul_hi_u32 v7, v5, s4
+; GFX9-NEXT: s_mov_b32 s4, 0xffffff
+; GFX9-NEXT: v_and_b32_e32 v6, s4, v4
+; GFX9-NEXT: s_mov_b32 s5, 0xaaaaaaab
+; GFX9-NEXT: v_mul_hi_u32 v6, v6, s5
+; GFX9-NEXT: v_and_b32_e32 v7, s4, v5
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 4, v6
-; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24
+; GFX9-NEXT: v_mul_u32_u24_e32 v6, 24, v6
; GFX9-NEXT: v_sub_u32_e32 v4, v4, v6
-; GFX9-NEXT: v_lshrrev_b32_e32 v6, 4, v7
-; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24
+; GFX9-NEXT: v_mul_hi_u32 v6, v7, s5
; GFX9-NEXT: v_add_u32_e32 v4, 8, v4
; GFX9-NEXT: v_alignbit_b32 v0, v0, v2, v4
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 8, v3
-; GFX9-NEXT: v_sub_u32_e32 v3, v5, v6
+; GFX9-NEXT: v_lshrrev_b32_e32 v3, 4, v6
+; GFX9-NEXT: v_mul_u32_u24_e32 v3, 24, v3
+; GFX9-NEXT: v_sub_u32_e32 v3, v5, v3
; GFX9-NEXT: v_add_u32_e32 v3, 8, v3
; GFX9-NEXT: v_alignbit_b32 v1, v1, v2, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -1228,15 +1241,18 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %src0, <2 x i24> %src1, <2 x i24> %src2
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_mov_b32 s4, 0xaaaaaaab
+; GFX10-NEXT: s_mov_b32 s4, 0xffffff
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; GFX10-NEXT: v_mul_hi_u32 v6, v4, s4
-; GFX10-NEXT: v_mul_hi_u32 v7, v5, s4
+; GFX10-NEXT: v_and_b32_e32 v6, s4, v4
+; GFX10-NEXT: v_and_b32_e32 v7, s4, v5
+; GFX10-NEXT: s_mov_b32 s4, 0xaaaaaaab
; GFX10-NEXT: v_lshlrev_b32_e32 v3, 8, v3
+; GFX10-NEXT: v_mul_hi_u32 v6, v6, s4
+; GFX10-NEXT: v_mul_hi_u32 v7, v7, s4
; GFX10-NEXT: v_lshrrev_b32_e32 v6, 4, v6
; GFX10-NEXT: v_lshrrev_b32_e32 v7, 4, v7
-; GFX10-NEXT: v_mul_lo_u32 v6, v6, 24
-; GFX10-NEXT: v_mul_lo_u32 v7, v7, 24
+; GFX10-NEXT: v_mul_u32_u24_e32 v6, 24, v6
+; GFX10-NEXT: v_mul_u32_u24_e32 v7, 24, v7
; GFX10-NEXT: v_sub_nc_u32_e32 v4, v4, v6
; GFX10-NEXT: v_sub_nc_u32_e32 v5, v5, v7
; GFX10-NEXT: v_add_nc_u32_e32 v4, 8, v4
diff --git a/llvm/test/CodeGen/ARM/funnel-shift.ll b/llvm/test/CodeGen/ARM/funnel-shift.ll
index 25e210d819147..5a7c4384428e1 100644
--- a/llvm/test/CodeGen/ARM/funnel-shift.ll
+++ b/llvm/test/CodeGen/ARM/funnel-shift.ll
@@ -47,64 +47,66 @@ declare i37 @llvm.fshl.i37(i37, i37, i37)
define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
; SCALAR-LABEL: fshl_i37:
; SCALAR: @ %bb.0:
-; SCALAR-NEXT: .save {r4, r5, r6, r7, r8, lr}
-; SCALAR-NEXT: push {r4, r5, r6, r7, r8, lr}
-; SCALAR-NEXT: mov r4, r1
+; SCALAR-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; SCALAR-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
; SCALAR-NEXT: mov r8, r0
-; SCALAR-NEXT: ldr r0, [sp, #24]
-; SCALAR-NEXT: mov r5, r3
-; SCALAR-NEXT: ldr r1, [sp, #28]
-; SCALAR-NEXT: mov r6, r2
+; SCALAR-NEXT: ldr r0, [sp, #36]
+; SCALAR-NEXT: mov r4, r1
+; SCALAR-NEXT: mov r6, r3
+; SCALAR-NEXT: and r1, r0, #31
+; SCALAR-NEXT: ldr r0, [sp, #32]
+; SCALAR-NEXT: mov r9, r2
; SCALAR-NEXT: mov r2, #37
; SCALAR-NEXT: mov r3, #0
; SCALAR-NEXT: bl __aeabi_uldivmod
-; SCALAR-NEXT: lsl r1, r5, #27
-; SCALAR-NEXT: ands r12, r2, #32
-; SCALAR-NEXT: orr r1, r1, r6, lsr #5
+; SCALAR-NEXT: lsl r1, r6, #27
+; SCALAR-NEXT: ands r0, r2, #32
+; SCALAR-NEXT: orr r1, r1, r9, lsr #5
; SCALAR-NEXT: mov r3, r8
-; SCALAR-NEXT: and r5, r2, #31
-; SCALAR-NEXT: mov r0, #31
+; SCALAR-NEXT: and r6, r2, #31
+; SCALAR-NEXT: mov r7, #31
; SCALAR-NEXT: movne r3, r1
-; SCALAR-NEXT: cmp r12, #0
-; SCALAR-NEXT: bic r2, r0, r2
-; SCALAR-NEXT: lslne r1, r6, #27
+; SCALAR-NEXT: cmp r0, #0
+; SCALAR-NEXT: lslne r1, r9, #27
+; SCALAR-NEXT: bic r2, r7, r2
; SCALAR-NEXT: movne r4, r8
-; SCALAR-NEXT: lsl r7, r3, r5
+; SCALAR-NEXT: lsl r5, r3, r6
; SCALAR-NEXT: lsr r0, r1, #1
-; SCALAR-NEXT: lsl r1, r4, r5
+; SCALAR-NEXT: lsl r1, r4, r6
; SCALAR-NEXT: lsr r3, r3, #1
-; SCALAR-NEXT: orr r0, r7, r0, lsr r2
+; SCALAR-NEXT: orr r0, r5, r0, lsr r2
; SCALAR-NEXT: orr r1, r1, r3, lsr r2
-; SCALAR-NEXT: pop {r4, r5, r6, r7, r8, pc}
+; SCALAR-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
;
; NEON-LABEL: fshl_i37:
; NEON: @ %bb.0:
; NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
; NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
; NEON-NEXT: mov r4, r1
-; NEON-NEXT: mov r5, r0
-; NEON-NEXT: ldr r0, [sp, #24]
-; NEON-NEXT: mov r7, r3
; NEON-NEXT: ldr r1, [sp, #28]
-; NEON-NEXT: mov r6, r2
+; NEON-NEXT: mov r6, r0
+; NEON-NEXT: ldr r0, [sp, #24]
+; NEON-NEXT: and r1, r1, #31
+; NEON-NEXT: mov r5, r3
+; NEON-NEXT: mov r7, r2
; NEON-NEXT: mov r2, #37
; NEON-NEXT: mov r3, #0
; NEON-NEXT: bl __aeabi_uldivmod
; NEON-NEXT: mov r0, #31
; NEON-NEXT: bic r1, r0, r2
-; NEON-NEXT: lsl r0, r7, #27
+; NEON-NEXT: lsl r0, r5, #27
; NEON-NEXT: ands r12, r2, #32
-; NEON-NEXT: orr r0, r0, r6, lsr #5
-; NEON-NEXT: mov r7, r5
+; NEON-NEXT: orr r0, r0, r7, lsr #5
+; NEON-NEXT: mov r5, r6
; NEON-NEXT: and r2, r2, #31
-; NEON-NEXT: movne r7, r0
-; NEON-NEXT: lslne r0, r6, #27
+; NEON-NEXT: movne r5, r0
+; NEON-NEXT: lslne r0, r7, #27
; NEON-NEXT: cmp r12, #0
-; NEON-NEXT: lsl r3, r7, r2
+; NEON-NEXT: lsl r3, r5, r2
; NEON-NEXT: lsr r0, r0, #1
-; NEON-NEXT: movne r4, r5
+; NEON-NEXT: movne r4, r6
; NEON-NEXT: orr r0, r3, r0, lsr r1
-; NEON-NEXT: lsr r3, r7, #1
+; NEON-NEXT: lsr r3, r5, #1
; NEON-NEXT: lsl r2, r4, r2
; NEON-NEXT: orr r1, r2, r3, lsr r1
; NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
@@ -233,38 +235,73 @@ define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) {
; Verify that weird types are minimally supported.
declare i37 @llvm.fshr.i37(i37, i37, i37)
define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
-; CHECK-LABEL: fshr_i37:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
-; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
-; CHECK-NEXT: mov r4, r1
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: ldr r0, [sp, #24]
-; CHECK-NEXT: mov r5, r3
-; CHECK-NEXT: ldr r1, [sp, #28]
-; CHECK-NEXT: mov r7, r2
-; CHECK-NEXT: mov r2, #37
-; CHECK-NEXT: mov r3, #0
-; CHECK-NEXT: bl __aeabi_uldivmod
-; CHECK-NEXT: lsl r3, r5, #27
-; CHECK-NEXT: add r0, r2, #27
-; CHECK-NEXT: orr r3, r3, r7, lsr #5
-; CHECK-NEXT: mov r1, #31
-; CHECK-NEXT: ands r12, r0, #32
-; CHECK-NEXT: mov r5, r6
-; CHECK-NEXT: moveq r5, r3
-; CHECK-NEXT: bic r1, r1, r0
-; CHECK-NEXT: lsl r2, r5, #1
-; CHECK-NEXT: lsleq r3, r7, #27
-; CHECK-NEXT: cmp r12, #0
-; CHECK-NEXT: and r7, r0, #31
-; CHECK-NEXT: lsl r2, r2, r1
-; CHECK-NEXT: moveq r4, r6
-; CHECK-NEXT: orr r0, r2, r3, lsr r7
-; CHECK-NEXT: lsl r2, r4, #1
-; CHECK-NEXT: lsl r1, r2, r1
-; CHECK-NEXT: orr r1, r1, r5, lsr r7
-; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
+; SCALAR-LABEL: fshr_i37:
+; SCALAR: @ %bb.0:
+; SCALAR-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; SCALAR-NEXT: push {r4, r5, r6, r7, r8, lr}
+; SCALAR-NEXT: mov r8, r0
+; SCALAR-NEXT: ldr r0, [sp, #28]
+; SCALAR-NEXT: mov r4, r1
+; SCALAR-NEXT: mov r5, r3
+; SCALAR-NEXT: and r1, r0, #31
+; SCALAR-NEXT: ldr r0, [sp, #24]
+; SCALAR-NEXT: mov r7, r2
+; SCALAR-NEXT: mov r2, #37
+; SCALAR-NEXT: mov r3, #0
+; SCALAR-NEXT: bl __aeabi_uldivmod
+; SCALAR-NEXT: lsl r3, r5, #27
+; SCALAR-NEXT: add r0, r2, #27
+; SCALAR-NEXT: orr r3, r3, r7, lsr #5
+; SCALAR-NEXT: ands r2, r0, #32
+; SCALAR-NEXT: mov r5, r8
+; SCALAR-NEXT: mov r1, #31
+; SCALAR-NEXT: moveq r5, r3
+; SCALAR-NEXT: lsleq r3, r7, #27
+; SCALAR-NEXT: cmp r2, #0
+; SCALAR-NEXT: bic r1, r1, r0
+; SCALAR-NEXT: moveq r4, r8
+; SCALAR-NEXT: lsl r6, r5, #1
+; SCALAR-NEXT: and r7, r0, #31
+; SCALAR-NEXT: lsl r2, r4, #1
+; SCALAR-NEXT: lsl r6, r6, r1
+; SCALAR-NEXT: lsl r1, r2, r1
+; SCALAR-NEXT: orr r0, r6, r3, lsr r7
+; SCALAR-NEXT: orr r1, r1, r5, lsr r7
+; SCALAR-NEXT: pop {r4, r5, r6, r7, r8, pc}
+;
+; NEON-LABEL: fshr_i37:
+; NEON: @ %bb.0:
+; NEON-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; NEON-NEXT: push {r4, r5, r6, r7, r8, lr}
+; NEON-NEXT: mov r4, r1
+; NEON-NEXT: ldr r1, [sp, #28]
+; NEON-NEXT: mov r8, r0
+; NEON-NEXT: ldr r0, [sp, #24]
+; NEON-NEXT: and r1, r1, #31
+; NEON-NEXT: mov r5, r3
+; NEON-NEXT: mov r7, r2
+; NEON-NEXT: mov r2, #37
+; NEON-NEXT: mov r3, #0
+; NEON-NEXT: bl __aeabi_uldivmod
+; NEON-NEXT: lsl r3, r5, #27
+; NEON-NEXT: add r0, r2, #27
+; NEON-NEXT: orr r3, r3, r7, lsr #5
+; NEON-NEXT: ands r2, r0, #32
+; NEON-NEXT: mov r5, r8
+; NEON-NEXT: mov r1, #31
+; NEON-NEXT: moveq r5, r3
+; NEON-NEXT: lsleq r3, r7, #27
+; NEON-NEXT: cmp r2, #0
+; NEON-NEXT: bic r1, r1, r0
+; NEON-NEXT: moveq r4, r8
+; NEON-NEXT: lsl r6, r5, #1
+; NEON-NEXT: and r7, r0, #31
+; NEON-NEXT: lsl r2, r4, #1
+; NEON-NEXT: lsl r6, r6, r1
+; NEON-NEXT: lsl r1, r2, r1
+; NEON-NEXT: orr r0, r6, r3, lsr r7
+; NEON-NEXT: orr r1, r1, r5, lsr r7
+; NEON-NEXT: pop {r4, r5, r6, r7, r8, pc}
%f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
ret i37 %f
}
diff --git a/llvm/test/CodeGen/Mips/funnel-shift.ll b/llvm/test/CodeGen/Mips/funnel-shift.ll
index d4f47318ebb18..737e95c8262a3 100644
--- a/llvm/test/CodeGen/Mips/funnel-shift.ll
+++ b/llvm/test/CodeGen/Mips/funnel-shift.ll
@@ -66,7 +66,8 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
; CHECK-BE-NEXT: move $17, $6
; CHECK-BE-NEXT: move $18, $5
; CHECK-BE-NEXT: move $19, $4
-; CHECK-BE-NEXT: lw $4, 56($sp)
+; CHECK-BE-NEXT: lw $1, 56($sp)
+; CHECK-BE-NEXT: andi $4, $1, 31
; CHECK-BE-NEXT: lw $5, 60($sp)
; CHECK-BE-NEXT: addiu $6, $zero, 0
; CHECK-BE-NEXT: jal __umoddi3
@@ -117,8 +118,9 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
; CHECK-LE-NEXT: move $17, $6
; CHECK-LE-NEXT: move $18, $5
; CHECK-LE-NEXT: move $19, $4
+; CHECK-LE-NEXT: lw $1, 60($sp)
+; CHECK-LE-NEXT: andi $5, $1, 31
; CHECK-LE-NEXT: lw $4, 56($sp)
-; CHECK-LE-NEXT: lw $5, 60($sp)
; CHECK-LE-NEXT: addiu $6, $zero, 37
; CHECK-LE-NEXT: jal __umoddi3
; CHECK-LE-NEXT: addiu $7, $zero, 0
@@ -309,7 +311,8 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
; CHECK-BE-NEXT: move $17, $6
; CHECK-BE-NEXT: move $18, $5
; CHECK-BE-NEXT: move $19, $4
-; CHECK-BE-NEXT: lw $4, 56($sp)
+; CHECK-BE-NEXT: lw $1, 56($sp)
+; CHECK-BE-NEXT: andi $4, $1, 31
; CHECK-BE-NEXT: lw $5, 60($sp)
; CHECK-BE-NEXT: addiu $6, $zero, 0
; CHECK-BE-NEXT: jal __umoddi3
@@ -327,9 +330,9 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
; CHECK-BE-NEXT: andi $1, $1, 31
; CHECK-BE-NEXT: sll $6, $19, 1
; CHECK-BE-NEXT: sllv $6, $6, $1
+; CHECK-BE-NEXT: sll $7, $16, 27
; CHECK-BE-NEXT: or $2, $6, $2
-; CHECK-BE-NEXT: sll $6, $16, 27
-; CHECK-BE-NEXT: movz $4, $6, $3
+; CHECK-BE-NEXT: movz $4, $7, $3
; CHECK-BE-NEXT: srlv $3, $4, $5
; CHECK-BE-NEXT: sll $4, $18, 1
; CHECK-BE-NEXT: sllv $1, $4, $1
@@ -360,8 +363,9 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
; CHECK-LE-NEXT: move $17, $6
; CHECK-LE-NEXT: move $18, $5
; CHECK-LE-NEXT: move $19, $4
+; CHECK-LE-NEXT: lw $1, 60($sp)
+; CHECK-LE-NEXT: andi $5, $1, 31
; CHECK-LE-NEXT: lw $4, 56($sp)
-; CHECK-LE-NEXT: lw $5, 60($sp)
; CHECK-LE-NEXT: addiu $6, $zero, 37
; CHECK-LE-NEXT: jal __umoddi3
; CHECK-LE-NEXT: addiu $7, $zero, 0
diff --git a/llvm/test/CodeGen/PowerPC/funnel-shift.ll b/llvm/test/CodeGen/PowerPC/funnel-shift.ll
index 62b68e0b2cadd..10e2fc0326f95 100644
--- a/llvm/test/CodeGen/PowerPC/funnel-shift.ll
+++ b/llvm/test/CodeGen/PowerPC/funnel-shift.ll
@@ -250,7 +250,7 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
; CHECK32_32-NEXT: mr 29, 5
; CHECK32_32-NEXT: stw 30, 24(1) # 4-byte Folded Spill
; CHECK32_32-NEXT: mr 30, 6
-; CHECK32_32-NEXT: mr 3, 7
+; CHECK32_32-NEXT: clrlwi 3, 7, 27
; CHECK32_32-NEXT: mr 4, 8
; CHECK32_32-NEXT: li 5, 0
; CHECK32_32-NEXT: li 6, 37
@@ -299,7 +299,7 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
; CHECK32_64-NEXT: .cfi_offset r30, -8
; CHECK32_64-NEXT: stw 27, 12(1) # 4-byte Folded Spill
; CHECK32_64-NEXT: mr 27, 3
-; CHECK32_64-NEXT: mr 3, 7
+; CHECK32_64-NEXT: clrlwi 3, 7, 27
; CHECK32_64-NEXT: stw 28, 16(1) # 4-byte Folded Spill
; CHECK32_64-NEXT: mr 28, 4
; CHECK32_64-NEXT: mr 4, 8
@@ -353,12 +353,13 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
; CHECK64-LABEL: fshl_i37:
; CHECK64: # %bb.0:
; CHECK64-NEXT: lis 6, 28339
-; CHECK64-NEXT: sldi 4, 4, 27
+; CHECK64-NEXT: clrldi 7, 5, 27
; CHECK64-NEXT: ori 6, 6, 58451
+; CHECK64-NEXT: sldi 4, 4, 27
; CHECK64-NEXT: rldic 6, 6, 33, 0
; CHECK64-NEXT: oris 6, 6, 3542
; CHECK64-NEXT: ori 6, 6, 31883
-; CHECK64-NEXT: mulhdu 6, 5, 6
+; CHECK64-NEXT: mulhdu 6, 7, 6
; CHECK64-NEXT: rldicl 6, 6, 59, 5
; CHECK64-NEXT: mulli 6, 6, 37
; CHECK64-NEXT: sub 5, 5, 6
@@ -549,7 +550,7 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
; CHECK32_32-NEXT: mr 29, 5
; CHECK32_32-NEXT: stw 30, 24(1) # 4-byte Folded Spill
; CHECK32_32-NEXT: mr 30, 6
-; CHECK32_32-NEXT: mr 3, 7
+; CHECK32_32-NEXT: clrlwi 3, 7, 27
; CHECK32_32-NEXT: mr 4, 8
; CHECK32_32-NEXT: li 5, 0
; CHECK32_32-NEXT: li 6, 37
@@ -599,7 +600,7 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
; CHECK32_64-NEXT: .cfi_offset r30, -8
; CHECK32_64-NEXT: stw 27, 12(1) # 4-byte Folded Spill
; CHECK32_64-NEXT: mr 27, 3
-; CHECK32_64-NEXT: mr 3, 7
+; CHECK32_64-NEXT: clrlwi 3, 7, 27
; CHECK32_64-NEXT: stw 28, 16(1) # 4-byte Folded Spill
; CHECK32_64-NEXT: mr 28, 4
; CHECK32_64-NEXT: mr 4, 8
@@ -649,12 +650,13 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
; CHECK64-LABEL: fshr_i37:
; CHECK64: # %bb.0:
; CHECK64-NEXT: lis 6, 28339
-; CHECK64-NEXT: sldi 4, 4, 27
+; CHECK64-NEXT: clrldi 7, 5, 27
; CHECK64-NEXT: ori 6, 6, 58451
+; CHECK64-NEXT: sldi 4, 4, 27
; CHECK64-NEXT: rldic 6, 6, 33, 0
; CHECK64-NEXT: oris 6, 6, 3542
; CHECK64-NEXT: ori 6, 6, 31883
-; CHECK64-NEXT: mulhdu 6, 5, 6
+; CHECK64-NEXT: mulhdu 6, 7, 6
; CHECK64-NEXT: rldicl 6, 6, 59, 5
; CHECK64-NEXT: mulli 6, 6, 37
; CHECK64-NEXT: sub 5, 5, 6
diff --git a/llvm/test/CodeGen/X86/funnel-shift.ll b/llvm/test/CodeGen/X86/funnel-shift.ll
index 2e9deb51dbe07..820b66f6179c2 100644
--- a/llvm/test/CodeGen/X86/funnel-shift.ll
+++ b/llvm/test/CodeGen/X86/funnel-shift.ll
@@ -140,13 +140,15 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) nounwind {
; X86-SSE2-NEXT: pushl %ebx
; X86-SSE2-NEXT: pushl %edi
; X86-SSE2-NEXT: pushl %esi
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: andl $31, %eax
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-SSE2-NEXT: shldl $27, %ebx, %edi
; X86-SSE2-NEXT: pushl $0
; X86-SSE2-NEXT: pushl $37
-; X86-SSE2-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT: pushl %eax
; X86-SSE2-NEXT: pushl {{[0-9]+}}(%esp)
; X86-SSE2-NEXT: calll __umoddi3
; X86-SSE2-NEXT: addl $16, %esp
@@ -174,8 +176,9 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) nounwind {
; X64-AVX2-LABEL: fshl_i37:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: movq %rdx, %rcx
+; X64-AVX2-NEXT: movabsq $137438953471, %rax # imm = 0x1FFFFFFFFF
+; X64-AVX2-NEXT: andq %rdx, %rax
; X64-AVX2-NEXT: movabsq $-2492803253203993461, %rdx # imm = 0xDD67C8A60DD67C8B
-; X64-AVX2-NEXT: movq %rcx, %rax
; X64-AVX2-NEXT: mulq %rdx
; X64-AVX2-NEXT: shrq $5, %rdx
; X64-AVX2-NEXT: leal (%rdx,%rdx,8), %eax
@@ -304,13 +307,15 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) nounwind {
; X86-SSE2-NEXT: pushl %ebx
; X86-SSE2-NEXT: pushl %edi
; X86-SSE2-NEXT: pushl %esi
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: andl $31, %eax
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SSE2-NEXT: shldl $27, %ebx, %esi
; X86-SSE2-NEXT: pushl $0
; X86-SSE2-NEXT: pushl $37
-; X86-SSE2-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT: pushl %eax
; X86-SSE2-NEXT: pushl {{[0-9]+}}(%esp)
; X86-SSE2-NEXT: calll __umoddi3
; X86-SSE2-NEXT: addl $16, %esp
@@ -339,8 +344,9 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) nounwind {
; X64-AVX2-LABEL: fshr_i37:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: movq %rdx, %rcx
+; X64-AVX2-NEXT: movabsq $137438953471, %rax # imm = 0x1FFFFFFFFF
+; X64-AVX2-NEXT: andq %rdx, %rax
; X64-AVX2-NEXT: movabsq $-2492803253203993461, %rdx # imm = 0xDD67C8A60DD67C8B
-; X64-AVX2-NEXT: movq %rcx, %rax
; X64-AVX2-NEXT: mulq %rdx
; X64-AVX2-NEXT: shrq $5, %rdx
; X64-AVX2-NEXT: leal (%rdx,%rdx,8), %eax
More information about the llvm-commits
mailing list