[llvm] 2b3b453 - [TargetLowering] Only demand a funnelshift's modulo amount bits
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 16 06:52:37 PDT 2020
Author: Simon Pilgrim
Date: 2020-03-16T13:52:17Z
New Revision: 2b3b453a827b2867b38b47acd17edafa314c9d7e
URL: https://github.com/llvm/llvm-project/commit/2b3b453a827b2867b38b47acd17edafa314c9d7e
DIFF: https://github.com/llvm/llvm-project/commit/2b3b453a827b2867b38b47acd17edafa314c9d7e.diff
LOG: [TargetLowering] Only demand a funnelshift's modulo amount bits
ISD::FSHL/FSHR shift amount values are guaranteed to act as a modulo amount, so for power-of-2 bitwidths we only need the lowest bits.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/AMDGPU/fshl.ll
llvm/test/CodeGen/X86/shift-by-signext.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 7b27eccbfc6c..a6d9bfde61bd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1634,6 +1634,14 @@ bool TargetLowering::SimplifyDemandedBits(
Known.One |= Known2.One;
Known.Zero |= Known2.Zero;
}
+
+ // For pow-2 bitwidths we only demand the bottom modulo amt bits.
+ if (isPowerOf2_32(BitWidth)) {
+ APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
+ if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
+ Known2, TLO, Depth + 1))
+ return true;
+ }
break;
}
case ISD::ROTL:
diff --git a/llvm/test/CodeGen/AMDGPU/fshl.ll b/llvm/test/CodeGen/AMDGPU/fshl.ll
index f91472967a1d..0f2d16ebcc2c 100644
--- a/llvm/test/CodeGen/AMDGPU/fshl.ll
+++ b/llvm/test/CodeGen/AMDGPU/fshl.ll
@@ -16,13 +16,13 @@ define amdgpu_kernel void @fshl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y, i32 %
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: s_and_b32 s2, s2, 31
+; SI-NEXT: s_sub_i32 s3, 32, s2
; SI-NEXT: v_mov_b32_e32 v0, s1
-; SI-NEXT: s_sub_i32 s1, 32, s2
-; SI-NEXT: v_mov_b32_e32 v1, s1
+; SI-NEXT: v_mov_b32_e32 v1, s3
+; SI-NEXT: s_and_b32 s1, s2, 31
; SI-NEXT: v_alignbit_b32 v0, s0, v0, v1
; SI-NEXT: v_mov_b32_e32 v1, s0
-; SI-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0
+; SI-NEXT: v_cmp_eq_u32_e64 vcc, s1, 0
; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
@@ -32,13 +32,13 @@ define amdgpu_kernel void @fshl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y, i32 %
; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2c
; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: s_and_b32 s2, s2, 31
+; VI-NEXT: s_sub_i32 s3, 32, s2
; VI-NEXT: v_mov_b32_e32 v0, s1
-; VI-NEXT: s_sub_i32 s1, 32, s2
-; VI-NEXT: v_mov_b32_e32 v2, s1
+; VI-NEXT: s_and_b32 s1, s2, 31
+; VI-NEXT: v_mov_b32_e32 v2, s3
; VI-NEXT: v_mov_b32_e32 v1, s0
; VI-NEXT: v_alignbit_b32 v0, s0, v0, v2
-; VI-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0
+; VI-NEXT: v_cmp_eq_u32_e64 vcc, s1, 0
; VI-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
; VI-NEXT: v_mov_b32_e32 v0, s4
; VI-NEXT: v_mov_b32_e32 v1, s5
@@ -50,13 +50,13 @@ define amdgpu_kernel void @fshl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y, i32 %
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2c
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_and_b32 s2, s2, 31
+; GFX9-NEXT: s_sub_i32 s3, 32, s2
; GFX9-NEXT: v_mov_b32_e32 v0, s1
-; GFX9-NEXT: s_sub_i32 s1, 32, s2
-; GFX9-NEXT: v_mov_b32_e32 v2, s1
+; GFX9-NEXT: s_and_b32 s1, s2, 31
+; GFX9-NEXT: v_mov_b32_e32 v2, s3
; GFX9-NEXT: v_mov_b32_e32 v1, s0
; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v2
-; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0
+; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s1, 0
; GFX9-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: v_mov_b32_e32 v1, s5
@@ -70,12 +70,12 @@ define amdgpu_kernel void @fshl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y, i32 %
; R600-NEXT: CF_END
; R600-NEXT: PAD
; R600-NEXT: ALU clause starting at 4:
-; R600-NEXT: AND_INT * T0.W, KC0[3].X, literal.x,
-; R600-NEXT: 31(4.344025e-44), 0(0.000000e+00)
-; R600-NEXT: SUB_INT * T1.W, literal.x, PV.W,
+; R600-NEXT: SUB_INT * T0.W, literal.x, KC0[3].X,
; R600-NEXT: 32(4.484155e-44), 0(0.000000e+00)
-; R600-NEXT: BIT_ALIGN_INT * T1.W, KC0[2].Z, KC0[2].W, PV.W,
-; R600-NEXT: CNDE_INT T0.X, T0.W, KC0[2].Z, PV.W,
+; R600-NEXT: BIT_ALIGN_INT T0.W, KC0[2].Z, KC0[2].W, PV.W,
+; R600-NEXT: AND_INT * T1.W, KC0[3].X, literal.x,
+; R600-NEXT: 31(4.344025e-44), 0(0.000000e+00)
+; R600-NEXT: CNDE_INT T0.X, PS, KC0[2].Z, PV.W,
; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
entry:
@@ -149,15 +149,15 @@ define amdgpu_kernel void @fshl_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x,
; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: v_mov_b32_e32 v0, s9
-; SI-NEXT: s_and_b32 s1, s1, 31
; SI-NEXT: s_sub_i32 s10, 32, s1
; SI-NEXT: v_mov_b32_e32 v1, s10
-; SI-NEXT: s_and_b32 s0, s0, 31
+; SI-NEXT: s_and_b32 s1, s1, 31
; SI-NEXT: v_alignbit_b32 v0, s3, v0, v1
; SI-NEXT: v_cmp_eq_u32_e64 vcc, s1, 0
; SI-NEXT: v_mov_b32_e32 v1, s3
; SI-NEXT: s_sub_i32 s1, 32, s0
; SI-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc
+; SI-NEXT: s_and_b32 s0, s0, 31
; SI-NEXT: v_mov_b32_e32 v0, s8
; SI-NEXT: v_mov_b32_e32 v2, s1
; SI-NEXT: v_alignbit_b32 v0, s2, v0, v2
@@ -175,15 +175,15 @@ define amdgpu_kernel void @fshl_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x,
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x3c
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v0, s7
+; VI-NEXT: s_sub_i32 s8, 32, s1
+; VI-NEXT: v_mov_b32_e32 v1, s8
; VI-NEXT: s_and_b32 s1, s1, 31
-; VI-NEXT: s_sub_i32 s7, 32, s1
-; VI-NEXT: v_mov_b32_e32 v1, s7
-; VI-NEXT: s_and_b32 s0, s0, 31
; VI-NEXT: v_alignbit_b32 v0, s5, v0, v1
; VI-NEXT: v_cmp_eq_u32_e64 vcc, s1, 0
; VI-NEXT: v_mov_b32_e32 v1, s5
; VI-NEXT: s_sub_i32 s1, 32, s0
; VI-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc
+; VI-NEXT: s_and_b32 s0, s0, 31
; VI-NEXT: v_mov_b32_e32 v0, s6
; VI-NEXT: v_mov_b32_e32 v2, s1
; VI-NEXT: v_alignbit_b32 v0, s4, v0, v2
@@ -203,15 +203,15 @@ define amdgpu_kernel void @fshl_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x,
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x3c
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s7
+; GFX9-NEXT: s_sub_i32 s8, 32, s1
+; GFX9-NEXT: v_mov_b32_e32 v1, s8
; GFX9-NEXT: s_and_b32 s1, s1, 31
-; GFX9-NEXT: s_sub_i32 s7, 32, s1
-; GFX9-NEXT: v_mov_b32_e32 v1, s7
-; GFX9-NEXT: s_and_b32 s0, s0, 31
; GFX9-NEXT: v_alignbit_b32 v0, s5, v0, v1
; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s1, 0
; GFX9-NEXT: v_mov_b32_e32 v1, s5
; GFX9-NEXT: s_sub_i32 s1, 32, s0
; GFX9-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc
+; GFX9-NEXT: s_and_b32 s0, s0, 31
; GFX9-NEXT: v_mov_b32_e32 v0, s6
; GFX9-NEXT: v_mov_b32_e32 v2, s1
; GFX9-NEXT: v_alignbit_b32 v0, s4, v0, v2
@@ -225,25 +225,24 @@ define amdgpu_kernel void @fshl_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x,
;
; R600-LABEL: fshl_v2i32:
; R600: ; %bb.0: ; %entry
-; R600-NEXT: ALU 14, @4, KC0[CB0:0-32], KC1[]
-; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
+; R600-NEXT: ALU 13, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XY, T0.X, 1
; R600-NEXT: CF_END
; R600-NEXT: PAD
; R600-NEXT: ALU clause starting at 4:
-; R600-NEXT: AND_INT * T0.W, KC0[4].X, literal.x,
-; R600-NEXT: 31(4.344025e-44), 0(0.000000e+00)
-; R600-NEXT: SUB_INT T1.W, literal.x, PV.W,
-; R600-NEXT: AND_INT * T2.W, KC0[3].W, literal.y,
-; R600-NEXT: 32(4.484155e-44), 31(4.344025e-44)
-; R600-NEXT: SUB_INT T0.Z, literal.x, PS,
-; R600-NEXT: BIT_ALIGN_INT T1.W, KC0[3].X, KC0[3].Z, PV.W,
-; R600-NEXT: SETE_INT * T0.W, T0.W, 0.0,
-; R600-NEXT: 32(4.484155e-44), 0(0.000000e+00)
-; R600-NEXT: CNDE_INT T0.Y, PS, PV.W, KC0[3].X,
+; R600-NEXT: AND_INT T0.W, KC0[4].X, literal.x,
+; R600-NEXT: SUB_INT * T1.W, literal.y, KC0[4].X,
+; R600-NEXT: 31(4.344025e-44), 32(4.484155e-44)
+; R600-NEXT: AND_INT T0.Y, KC0[3].W, literal.x,
+; R600-NEXT: SUB_INT T0.Z, literal.y, KC0[3].W,
+; R600-NEXT: BIT_ALIGN_INT T1.W, KC0[3].X, KC0[3].Z, PS,
+; R600-NEXT: SETE_INT * T0.W, PV.W, 0.0,
+; R600-NEXT: 31(4.344025e-44), 32(4.484155e-44)
+; R600-NEXT: CNDE_INT T1.Y, PS, PV.W, KC0[3].X,
; R600-NEXT: BIT_ALIGN_INT T0.W, KC0[2].W, KC0[3].Y, PV.Z,
-; R600-NEXT: SETE_INT * T1.W, T2.W, 0.0,
-; R600-NEXT: CNDE_INT T0.X, PS, PV.W, KC0[2].W,
-; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; R600-NEXT: SETE_INT * T1.W, PV.Y, 0.0,
+; R600-NEXT: CNDE_INT T1.X, PS, PV.W, KC0[2].W,
+; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
entry:
%0 = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z)
@@ -327,31 +326,31 @@ define amdgpu_kernel void @fshl_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x,
; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: v_mov_b32_e32 v0, s15
-; SI-NEXT: s_and_b32 s3, s3, 31
; SI-NEXT: s_sub_i32 s16, 32, s3
; SI-NEXT: v_mov_b32_e32 v1, s16
-; SI-NEXT: s_and_b32 s2, s2, 31
+; SI-NEXT: s_and_b32 s3, s3, 31
; SI-NEXT: v_alignbit_b32 v0, s11, v0, v1
; SI-NEXT: v_cmp_eq_u32_e64 vcc, s3, 0
; SI-NEXT: v_mov_b32_e32 v1, s11
; SI-NEXT: s_sub_i32 s3, 32, s2
; SI-NEXT: v_cndmask_b32_e32 v3, v0, v1, vcc
+; SI-NEXT: s_and_b32 s2, s2, 31
; SI-NEXT: v_mov_b32_e32 v0, s14
; SI-NEXT: v_mov_b32_e32 v1, s3
-; SI-NEXT: s_and_b32 s1, s1, 31
; SI-NEXT: v_alignbit_b32 v0, s10, v0, v1
; SI-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0
; SI-NEXT: v_mov_b32_e32 v1, s10
; SI-NEXT: s_sub_i32 s2, 32, s1
; SI-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
+; SI-NEXT: s_and_b32 s1, s1, 31
; SI-NEXT: v_mov_b32_e32 v0, s13
; SI-NEXT: v_mov_b32_e32 v1, s2
-; SI-NEXT: s_and_b32 s0, s0, 31
; SI-NEXT: v_alignbit_b32 v0, s9, v0, v1
; SI-NEXT: v_cmp_eq_u32_e64 vcc, s1, 0
; SI-NEXT: v_mov_b32_e32 v1, s9
; SI-NEXT: s_sub_i32 s1, 32, s0
; SI-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc
+; SI-NEXT: s_and_b32 s0, s0, 31
; SI-NEXT: v_mov_b32_e32 v0, s12
; SI-NEXT: v_mov_b32_e32 v4, s1
; SI-NEXT: v_alignbit_b32 v0, s8, v0, v4
@@ -369,31 +368,31 @@ define amdgpu_kernel void @fshl_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x,
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x54
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v0, s11
+; VI-NEXT: s_sub_i32 s14, 32, s3
+; VI-NEXT: v_mov_b32_e32 v1, s14
; VI-NEXT: s_and_b32 s3, s3, 31
-; VI-NEXT: s_sub_i32 s11, 32, s3
-; VI-NEXT: v_mov_b32_e32 v1, s11
-; VI-NEXT: s_and_b32 s2, s2, 31
; VI-NEXT: v_alignbit_b32 v0, s7, v0, v1
; VI-NEXT: v_cmp_eq_u32_e64 vcc, s3, 0
; VI-NEXT: v_mov_b32_e32 v1, s7
; VI-NEXT: s_sub_i32 s3, 32, s2
; VI-NEXT: v_cndmask_b32_e32 v3, v0, v1, vcc
+; VI-NEXT: s_and_b32 s2, s2, 31
; VI-NEXT: v_mov_b32_e32 v0, s10
; VI-NEXT: v_mov_b32_e32 v1, s3
-; VI-NEXT: s_and_b32 s1, s1, 31
; VI-NEXT: v_alignbit_b32 v0, s6, v0, v1
; VI-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0
; VI-NEXT: v_mov_b32_e32 v1, s6
; VI-NEXT: s_sub_i32 s2, 32, s1
; VI-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
+; VI-NEXT: s_and_b32 s1, s1, 31
; VI-NEXT: v_mov_b32_e32 v0, s9
; VI-NEXT: v_mov_b32_e32 v1, s2
-; VI-NEXT: s_and_b32 s0, s0, 31
; VI-NEXT: v_alignbit_b32 v0, s5, v0, v1
; VI-NEXT: v_cmp_eq_u32_e64 vcc, s1, 0
; VI-NEXT: v_mov_b32_e32 v1, s5
; VI-NEXT: s_sub_i32 s1, 32, s0
; VI-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc
+; VI-NEXT: s_and_b32 s0, s0, 31
; VI-NEXT: v_mov_b32_e32 v0, s8
; VI-NEXT: v_mov_b32_e32 v4, s1
; VI-NEXT: v_alignbit_b32 v0, s4, v0, v4
@@ -413,31 +412,31 @@ define amdgpu_kernel void @fshl_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x,
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x54
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s11
+; GFX9-NEXT: s_sub_i32 s14, 32, s3
+; GFX9-NEXT: v_mov_b32_e32 v1, s14
; GFX9-NEXT: s_and_b32 s3, s3, 31
-; GFX9-NEXT: s_sub_i32 s11, 32, s3
-; GFX9-NEXT: v_mov_b32_e32 v1, s11
-; GFX9-NEXT: s_and_b32 s2, s2, 31
; GFX9-NEXT: v_alignbit_b32 v0, s7, v0, v1
; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s3, 0
; GFX9-NEXT: v_mov_b32_e32 v1, s7
; GFX9-NEXT: s_sub_i32 s3, 32, s2
; GFX9-NEXT: v_cndmask_b32_e32 v3, v0, v1, vcc
+; GFX9-NEXT: s_and_b32 s2, s2, 31
; GFX9-NEXT: v_mov_b32_e32 v0, s10
; GFX9-NEXT: v_mov_b32_e32 v1, s3
-; GFX9-NEXT: s_and_b32 s1, s1, 31
; GFX9-NEXT: v_alignbit_b32 v0, s6, v0, v1
; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0
; GFX9-NEXT: v_mov_b32_e32 v1, s6
; GFX9-NEXT: s_sub_i32 s2, 32, s1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
+; GFX9-NEXT: s_and_b32 s1, s1, 31
; GFX9-NEXT: v_mov_b32_e32 v0, s9
; GFX9-NEXT: v_mov_b32_e32 v1, s2
-; GFX9-NEXT: s_and_b32 s0, s0, 31
; GFX9-NEXT: v_alignbit_b32 v0, s5, v0, v1
; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s1, 0
; GFX9-NEXT: v_mov_b32_e32 v1, s5
; GFX9-NEXT: s_sub_i32 s1, 32, s0
; GFX9-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc
+; GFX9-NEXT: s_and_b32 s0, s0, 31
; GFX9-NEXT: v_mov_b32_e32 v0, s8
; GFX9-NEXT: v_mov_b32_e32 v4, s1
; GFX9-NEXT: v_alignbit_b32 v0, s4, v0, v4
@@ -451,38 +450,36 @@ define amdgpu_kernel void @fshl_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x,
;
; R600-LABEL: fshl_v4i32:
; R600: ; %bb.0: ; %entry
-; R600-NEXT: ALU 27, @4, KC0[CB0:0-32], KC1[]
-; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1
+; R600-NEXT: ALU 25, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
; R600-NEXT: CF_END
; R600-NEXT: PAD
; R600-NEXT: ALU clause starting at 4:
-; R600-NEXT: AND_INT T0.W, KC0[6].X, literal.x,
-; R600-NEXT: AND_INT * T1.W, KC0[5].W, literal.x,
-; R600-NEXT: 31(4.344025e-44), 0(0.000000e+00)
-; R600-NEXT: SUB_INT * T2.W, literal.x, PV.W,
+; R600-NEXT: SUB_INT * T0.W, literal.x, KC0[6].X,
; R600-NEXT: 32(4.484155e-44), 0(0.000000e+00)
-; R600-NEXT: AND_INT T0.X, KC0[5].Y, literal.x,
-; R600-NEXT: BIT_ALIGN_INT T0.Y, KC0[4].X, KC0[5].X, PV.W,
-; R600-NEXT: SETE_INT T0.Z, T0.W, 0.0,
-; R600-NEXT: SUB_INT * T0.W, literal.y, T1.W,
-; R600-NEXT: 31(4.344025e-44), 32(4.484155e-44)
-; R600-NEXT: AND_INT * T2.W, KC0[5].Z, literal.x,
+; R600-NEXT: BIT_ALIGN_INT * T0.W, KC0[4].X, KC0[5].X, PV.W,
+; R600-NEXT: AND_INT * T1.W, KC0[6].X, literal.x,
; R600-NEXT: 31(4.344025e-44), 0(0.000000e+00)
-; R600-NEXT: SUB_INT T1.Y, literal.x, PV.W,
-; R600-NEXT: BIT_ALIGN_INT T1.Z, KC0[3].W, KC0[4].W, T0.W,
-; R600-NEXT: SETE_INT * T0.W, T1.W, 0.0,
-; R600-NEXT: 32(4.484155e-44), 0(0.000000e+00)
-; R600-NEXT: CNDE_INT * T1.W, T0.Z, T0.Y, KC0[4].X,
-; R600-NEXT: CNDE_INT T1.Z, T0.W, T1.Z, KC0[3].W,
-; R600-NEXT: BIT_ALIGN_INT T0.W, KC0[3].Z, KC0[4].Z, T1.Y,
-; R600-NEXT: SETE_INT * T2.W, T2.W, 0.0,
-; R600-NEXT: CNDE_INT T1.Y, PS, PV.W, KC0[3].Z,
-; R600-NEXT: SUB_INT * T0.W, literal.x, T0.X,
-; R600-NEXT: 32(4.484155e-44), 0(0.000000e+00)
-; R600-NEXT: BIT_ALIGN_INT T0.W, KC0[3].Y, KC0[4].Y, PV.W,
+; R600-NEXT: AND_INT T0.X, KC0[5].Z, literal.x,
+; R600-NEXT: SUB_INT T0.Y, literal.y, KC0[5].Z,
+; R600-NEXT: SETE_INT T0.Z, PV.W, 0.0,
+; R600-NEXT: SUB_INT T1.W, literal.y, KC0[5].W,
+; R600-NEXT: AND_INT * T2.W, KC0[5].W, literal.x,
+; R600-NEXT: 31(4.344025e-44), 32(4.484155e-44)
+; R600-NEXT: SETE_INT T1.Z, PS, 0.0,
+; R600-NEXT: BIT_ALIGN_INT * T1.W, KC0[3].W, KC0[4].W, PV.W,
+; R600-NEXT: CNDE_INT * T0.W, T0.Z, T0.W, KC0[4].X,
+; R600-NEXT: CNDE_INT T0.Z, T1.Z, T1.W, KC0[3].W,
+; R600-NEXT: BIT_ALIGN_INT T1.W, KC0[3].Z, KC0[4].Z, T0.Y,
; R600-NEXT: SETE_INT * T2.W, T0.X, 0.0,
-; R600-NEXT: CNDE_INT T1.X, PS, PV.W, KC0[3].Y,
-; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
+; R600-NEXT: CNDE_INT T0.Y, PS, PV.W, KC0[3].Z,
+; R600-NEXT: AND_INT T1.W, KC0[5].Y, literal.x,
+; R600-NEXT: SUB_INT * T2.W, literal.y, KC0[5].Y,
+; R600-NEXT: 31(4.344025e-44), 32(4.484155e-44)
+; R600-NEXT: BIT_ALIGN_INT T2.W, KC0[3].Y, KC0[4].Y, PS,
+; R600-NEXT: SETE_INT * T1.W, PV.W, 0.0,
+; R600-NEXT: CNDE_INT T0.X, PS, PV.W, KC0[3].Y,
+; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
entry:
%0 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z)
diff --git a/llvm/test/CodeGen/X86/shift-by-signext.ll b/llvm/test/CodeGen/X86/shift-by-signext.ll
index 85ca2bd6e1d3..e315e6aa6be4 100644
--- a/llvm/test/CodeGen/X86/shift-by-signext.ll
+++ b/llvm/test/CodeGen/X86/shift-by-signext.ll
@@ -88,9 +88,9 @@ declare i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
define i32 @n6_fshl(i32 %x, i32 %y, i8 %shamt) nounwind {
; X86-LABEL: n6_fshl:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: shldl %cl, %edx, %eax
; X86-NEXT: retl
;
@@ -108,9 +108,9 @@ define i32 @n6_fshl(i32 %x, i32 %y, i8 %shamt) nounwind {
define i32 @n7_fshr(i32 %x, i32 %y, i8 %shamt) nounwind {
; X86-LABEL: n7_fshr:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: shrdl %cl, %edx, %eax
; X86-NEXT: retl
;
More information about the llvm-commits
mailing list