[llvm] [DAGCombiner][AArch64] Fix incorrect cast VT in `takeInexpensiveLog2` (PR #67161)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 22 09:15:50 PDT 2023
https://github.com/goldsteinn created https://github.com/llvm/llvm-project/pull/67161
Previously, we where taking `CurVT` before finalizing `ToCast` which
meant potentially returning an `SDValue` with an illegal `ValueType`
for the operation.
Fix is to just take `CurVT` after we have finalized `ToCast` with
`PeekThroughCastsAndTrunc`.
>From a1c8acef0c1430d9d0b624e46af226ff2f7cf3b4 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Fri, 22 Sep 2023 11:08:17 -0500
Subject: [PATCH] [DAGCombiner][AArch64] Fix incorrect cast VT in
`takeInexpensiveLog2`
Previously, we where taking `CurVT` before finalizing `ToCast` which
meant potentially returning an `SDValue` with an illegal `ValueType`
for the operation.
Fix is to just take `CurVT` after we have finalized `ToCast` with
`PeekThroughCastsAndTrunc`.
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +-
.../fold-int-pow2-with-fmul-or-fdiv.ll | 444 ++++++++++++++++++
2 files changed, 445 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 693523e737acf66..7d5dc96bd0e2c94 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -27323,8 +27323,8 @@ static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
return SDValue();
auto CastToVT = [&](EVT NewVT, SDValue ToCast) {
- EVT CurVT = ToCast.getValueType();
ToCast = PeekThroughCastsAndTrunc(ToCast);
+ EVT CurVT = ToCast.getValueType();
if (NewVT == CurVT)
return ToCast;
diff --git a/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
new file mode 100644
index 000000000000000..b433e61cd18732e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
@@ -0,0 +1,444 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-NEON
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-NO-NEON
+
+declare i16 @llvm.umax.i16(i16, i16)
+declare i64 @llvm.umin.i64(i64, i64)
+
+declare <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float>, <4 x i32>)
+
+define <4 x float> @fmul_pow2_4xfloat(<4 x i32> %i) {
+; CHECK-LABEL: fmul_pow2_4xfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.4s, #1
+; CHECK-NEXT: ushl v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: fmov v1.4s, #9.00000000
+; CHECK-NEXT: ucvtf v0.4s, v0.4s
+; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+ %p2 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %i
+ %p2_f = uitofp <4 x i32> %p2 to <4 x float>
+ %r = fmul <4 x float> <float 9.000000e+00, float 9.000000e+00, float 9.000000e+00, float 9.000000e+00>, %p2_f
+ ret <4 x float> %r
+}
+
+define <4 x float> @fmul_pow2_ldexp_4xfloat(<4 x i32> %i) {
+; CHECK-LABEL: fmul_pow2_ldexp_4xfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: mov w0, v0.s[1]
+; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: fmov s0, #9.00000000
+; CHECK-NEXT: bl ldexpf
+; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: str d0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: fmov s0, #9.00000000
+; CHECK-NEXT: fmov w0, s1
+; CHECK-NEXT: bl ldexpf
+; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: mov w0, v0.s[2]
+; CHECK-NEXT: fmov s0, #9.00000000
+; CHECK-NEXT: bl ldexpf
+; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT: mov v1.s[2], v0.s[0]
+; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: mov w0, v0.s[3]
+; CHECK-NEXT: fmov s0, #9.00000000
+; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: bl ldexpf
+; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT: mov v1.s[3], v0.s[0]
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
+ %r = call <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float> <float 9.000000e+00, float 9.000000e+00, float 9.000000e+00, float 9.000000e+00>, <4 x i32> %i)
+ ret <4 x float> %r
+}
+
+define <4 x float> @fdiv_pow2_4xfloat(<4 x i32> %i) {
+; CHECK-LABEL: fdiv_pow2_4xfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov v1.4s, #9.00000000
+; CHECK-NEXT: shl v0.4s, v0.4s, #23
+; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: ret
+ %p2 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %i
+ %p2_f = uitofp <4 x i32> %p2 to <4 x float>
+ %r = fdiv <4 x float> <float 9.000000e+00, float 9.000000e+00, float 9.000000e+00, float 9.000000e+00>, %p2_f
+ ret <4 x float> %r
+}
+
+define double @fmul_pow_shl_cnt(i64 %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: fmov d1, #9.00000000
+; CHECK-NEXT: lsl x8, x8, x0
+; CHECK-NEXT: ucvtf d0, x8
+; CHECK-NEXT: fmul d0, d0, d1
+; CHECK-NEXT: ret
+ %shl = shl nuw i64 1, %cnt
+ %conv = uitofp i64 %shl to double
+ %mul = fmul double 9.000000e+00, %conv
+ ret double %mul
+}
+
+define double @fmul_pow_shl_cnt2(i64 %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #2 // =0x2
+; CHECK-NEXT: fmov d1, #-9.00000000
+; CHECK-NEXT: lsl x8, x8, x0
+; CHECK-NEXT: ucvtf d0, x8
+; CHECK-NEXT: fmul d0, d0, d1
+; CHECK-NEXT: ret
+ %shl = shl nuw i64 2, %cnt
+ %conv = uitofp i64 %shl to double
+ %mul = fmul double -9.000000e+00, %conv
+ ret double %mul
+}
+
+define float @fmul_pow_select(i32 %cnt, i1 %c) nounwind {
+; CHECK-LABEL: fmul_pow_select:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: tst w1, #0x1
+; CHECK-NEXT: fmov s1, #9.00000000
+; CHECK-NEXT: cinc w8, w8, eq
+; CHECK-NEXT: lsl w8, w8, w0
+; CHECK-NEXT: ucvtf s0, w8
+; CHECK-NEXT: fmul s0, s0, s1
+; CHECK-NEXT: ret
+ %shl2 = shl nuw i32 2, %cnt
+ %shl1 = shl nuw i32 1, %cnt
+ %shl = select i1 %c, i32 %shl1, i32 %shl2
+ %conv = uitofp i32 %shl to float
+ %mul = fmul float 9.000000e+00, %conv
+ ret float %mul
+}
+
+define float @fmul_fly_pow_mul_min_pow2(i64 %cnt) nounwind {
+; CHECK-LABEL: fmul_fly_pow_mul_min_pow2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #8 // =0x8
+; CHECK-NEXT: mov w9, #8192 // =0x2000
+; CHECK-NEXT: fmov s1, #9.00000000
+; CHECK-NEXT: lsl x8, x8, x0
+; CHECK-NEXT: cmp x8, #2, lsl #12 // =8192
+; CHECK-NEXT: csel x8, x8, x9, lo
+; CHECK-NEXT: ucvtf s0, x8
+; CHECK-NEXT: fmul s0, s0, s1
+; CHECK-NEXT: ret
+ %shl8 = shl nuw i64 8, %cnt
+ %shl = call i64 @llvm.umin.i64(i64 %shl8, i64 8192)
+ %conv = uitofp i64 %shl to float
+ %mul = fmul float 9.000000e+00, %conv
+ ret float %mul
+}
+
+define double @fmul_pow_mul_max_pow2(i16 %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_mul_max_pow2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #2 // =0x2
+; CHECK-NEXT: mov w9, #1 // =0x1
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: fmov d1, #3.00000000
+; CHECK-NEXT: lsl w8, w8, w0
+; CHECK-NEXT: lsl w9, w9, w0
+; CHECK-NEXT: and w8, w8, #0xfffe
+; CHECK-NEXT: and w9, w9, #0xffff
+; CHECK-NEXT: cmp w9, w8
+; CHECK-NEXT: csel w8, w9, w8, hi
+; CHECK-NEXT: ucvtf d0, w8
+; CHECK-NEXT: fmul d0, d0, d1
+; CHECK-NEXT: ret
+ %shl2 = shl nuw i16 2, %cnt
+ %shl1 = shl nuw i16 1, %cnt
+ %shl = call i16 @llvm.umax.i16(i16 %shl1, i16 %shl2)
+ %conv = uitofp i16 %shl to double
+ %mul = fmul double 3.000000e+00, %conv
+ ret double %mul
+}
+
+define double @fmul_pow_shl_cnt_fail_maybe_non_pow2(i64 %v, i64 %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt_fail_maybe_non_pow2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: lsl x8, x0, x1
+; CHECK-NEXT: fmov d1, #9.00000000
+; CHECK-NEXT: ucvtf d0, x8
+; CHECK-NEXT: fmul d0, d0, d1
+; CHECK-NEXT: ret
+ %shl = shl nuw i64 %v, %cnt
+ %conv = uitofp i64 %shl to double
+ %mul = fmul double 9.000000e+00, %conv
+ ret double %mul
+}
+
+define <2 x float> @fmul_pow_shl_cnt_vec_fail_expensive_cast(<2 x i64> %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt_vec_fail_expensive_cast:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #2 // =0x2
+; CHECK-NEXT: dup v1.2d, x8
+; CHECK-NEXT: ushl v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: fmov v1.2s, #15.00000000
+; CHECK-NEXT: ucvtf v0.2d, v0.2d
+; CHECK-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
+ %shl = shl nsw nuw <2 x i64> <i64 2, i64 2>, %cnt
+ %conv = uitofp <2 x i64> %shl to <2 x float>
+ %mul = fmul <2 x float> <float 15.000000e+00, float 15.000000e+00>, %conv
+ ret <2 x float> %mul
+}
+
+define <2 x double> @fmul_pow_shl_cnt_vec(<2 x i64> %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt_vec:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #2 // =0x2
+; CHECK-NEXT: dup v1.2d, x8
+; CHECK-NEXT: ushl v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: fmov v1.2d, #15.00000000
+; CHECK-NEXT: ucvtf v0.2d, v0.2d
+; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+ %shl = shl nsw nuw <2 x i64> <i64 2, i64 2>, %cnt
+ %conv = uitofp <2 x i64> %shl to <2 x double>
+ %mul = fmul <2 x double> <double 15.000000e+00, double 15.000000e+00>, %conv
+ ret <2 x double> %mul
+}
+
+define <4 x float> @fmul_pow_shl_cnt_vec_preserve_fma(<4 x i32> %cnt, <4 x float> %add) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt_vec_preserve_fma:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v2.4s, #2
+; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s
+; CHECK-NEXT: fmov v2.4s, #5.00000000
+; CHECK-NEXT: ucvtf v0.4s, v0.4s
+; CHECK-NEXT: fmul v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+ %shl = shl nsw nuw <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %cnt
+ %conv = uitofp <4 x i32> %shl to <4 x float>
+ %mul = fmul <4 x float> <float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00>, %conv
+ %res = fadd <4 x float> %mul, %add
+ ret <4 x float> %res
+}
+
+define <2 x double> @fmul_pow_shl_cnt_vec_non_splat_todo(<2 x i64> %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt_vec_non_splat_todo:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #2 // =0x2
+; CHECK-NEXT: dup v1.2d, x8
+; CHECK-NEXT: adrp x8, .LCPI12_0
+; CHECK-NEXT: ushl v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_0]
+; CHECK-NEXT: ucvtf v0.2d, v0.2d
+; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+ %shl = shl nsw nuw <2 x i64> <i64 2, i64 2>, %cnt
+ %conv = uitofp <2 x i64> %shl to <2 x double>
+ %mul = fmul <2 x double> <double 15.000000e+00, double 14.000000e+00>, %conv
+ ret <2 x double> %mul
+}
+
+define <2 x double> @fmul_pow_shl_cnt_vec_non_splat2_todo(<2 x i64> %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt_vec_non_splat2_todo:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI13_0
+; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI13_0]
+; CHECK-NEXT: ushl v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: fmov v1.2d, #15.00000000
+; CHECK-NEXT: ucvtf v0.2d, v0.2d
+; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+ %shl = shl nsw nuw <2 x i64> <i64 2, i64 1>, %cnt
+ %conv = uitofp <2 x i64> %shl to <2 x double>
+ %mul = fmul <2 x double> <double 15.000000e+00, double 15.000000e+00>, %conv
+ ret <2 x double> %mul
+}
+
+
+define double @fmul_pow_shl_cnt_fail_maybe_bad_exp(i64 %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt_fail_maybe_bad_exp:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: lsl x8, x8, x0
+; CHECK-NEXT: ucvtf d0, x8
+; CHECK-NEXT: adrp x8, .LCPI14_0
+; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI14_0]
+; CHECK-NEXT: fmul d0, d0, d1
+; CHECK-NEXT: ret
+ %shl = shl nuw i64 1, %cnt
+ %conv = uitofp i64 %shl to double
+ %mul = fmul double 9.745314e+288, %conv
+ ret double %mul
+}
+
+define double @fmul_pow_shl_cnt_safe(i16 %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt_safe:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: lsl w8, w8, w0
+; CHECK-NEXT: and w8, w8, #0xffff
+; CHECK-NEXT: ucvtf d0, w8
+; CHECK-NEXT: adrp x8, .LCPI15_0
+; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI15_0]
+; CHECK-NEXT: fmul d0, d0, d1
+; CHECK-NEXT: ret
+ %shl = shl nuw i16 1, %cnt
+ %conv = uitofp i16 %shl to double
+ %mul = fmul double 9.745314e+288, %conv
+ ret double %mul
+}
+
+define <2 x double> @fdiv_pow_shl_cnt_vec(<2 x i64> %cnt) nounwind {
+; CHECK-LABEL: fdiv_pow_shl_cnt_vec:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov v1.2d, #1.00000000
+; CHECK-NEXT: shl v0.2d, v0.2d, #52
+; CHECK-NEXT: sub v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: ret
+ %shl = shl nuw <2 x i64> <i64 1, i64 1>, %cnt
+ %conv = uitofp <2 x i64> %shl to <2 x double>
+ %mul = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, %conv
+ ret <2 x double> %mul
+}
+
+define <2 x float> @fdiv_pow_shl_cnt_vec_with_expensive_cast(<2 x i64> %cnt) nounwind {
+; CHECK-LABEL: fdiv_pow_shl_cnt_vec_with_expensive_cast:
+; CHECK: // %bb.0:
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: fmov v1.2s, #1.00000000
+; CHECK-NEXT: shl v0.2s, v0.2s, #23
+; CHECK-NEXT: sub v0.2s, v1.2s, v0.2s
+; CHECK-NEXT: ret
+ %shl = shl nuw <2 x i64> <i64 1, i64 1>, %cnt
+ %conv = uitofp <2 x i64> %shl to <2 x float>
+ %mul = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %conv
+ ret <2 x float> %mul
+}
+
+define float @fdiv_pow_shl_cnt_fail_maybe_z(i64 %cnt) nounwind {
+; CHECK-LABEL: fdiv_pow_shl_cnt_fail_maybe_z:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #8 // =0x8
+; CHECK-NEXT: fmov s1, #-9.00000000
+; CHECK-NEXT: lsl x8, x8, x0
+; CHECK-NEXT: ucvtf s0, x8
+; CHECK-NEXT: fdiv s0, s1, s0
+; CHECK-NEXT: ret
+ %shl = shl i64 8, %cnt
+ %conv = uitofp i64 %shl to float
+ %mul = fdiv float -9.000000e+00, %conv
+ ret float %mul
+}
+
+define float @fdiv_pow_shl_cnt_fail_neg_int(i64 %cnt) nounwind {
+; CHECK-LABEL: fdiv_pow_shl_cnt_fail_neg_int:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #8 // =0x8
+; CHECK-NEXT: fmov s1, #-9.00000000
+; CHECK-NEXT: lsl x8, x8, x0
+; CHECK-NEXT: scvtf s0, x8
+; CHECK-NEXT: fdiv s0, s1, s0
+; CHECK-NEXT: ret
+ %shl = shl i64 8, %cnt
+ %conv = sitofp i64 %shl to float
+ %mul = fdiv float -9.000000e+00, %conv
+ ret float %mul
+}
+
+define float @fdiv_pow_shl_cnt(i64 %cnt_in) nounwind {
+; CHECK-LABEL: fdiv_pow_shl_cnt:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #8 // =0x8
+; CHECK-NEXT: and x9, x0, #0x1f
+; CHECK-NEXT: fmov s1, #-0.50000000
+; CHECK-NEXT: lsl x8, x8, x9
+; CHECK-NEXT: scvtf s0, x8
+; CHECK-NEXT: fdiv s0, s1, s0
+; CHECK-NEXT: ret
+ %cnt = and i64 %cnt_in, 31
+ %shl = shl i64 8, %cnt
+ %conv = sitofp i64 %shl to float
+ %mul = fdiv float -0.500000e+00, %conv
+ ret float %mul
+}
+
+define double @fdiv_pow_shl_cnt32_to_dbl_okay(i32 %cnt) nounwind {
+; CHECK-LABEL: fdiv_pow_shl_cnt32_to_dbl_okay:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x8, #3936146074321813504 // =0x36a0000000000000
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: sub x8, x8, x0, lsl #52
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ret
+ %shl = shl nuw i32 1, %cnt
+ %conv = uitofp i32 %shl to double
+ %mul = fdiv double 0x36A0000000000000, %conv
+ ret double %mul
+}
+
+define float @fdiv_pow_shl_cnt32_out_of_bounds2(i32 %cnt) nounwind {
+; CHECK-LABEL: fdiv_pow_shl_cnt32_out_of_bounds2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: lsl w8, w8, w0
+; CHECK-NEXT: ucvtf s0, w8
+; CHECK-NEXT: mov w8, #65528 // =0xfff8
+; CHECK-NEXT: movk w8, #4351, lsl #16
+; CHECK-NEXT: fmov s1, w8
+; CHECK-NEXT: fdiv s0, s1, s0
+; CHECK-NEXT: ret
+ %shl = shl nuw i32 1, %cnt
+ %conv = uitofp i32 %shl to float
+ %mul = fdiv float 0x3a1fffff00000000, %conv
+ ret float %mul
+}
+
+define float @fdiv_pow_shl_cnt32_okay(i32 %cnt) nounwind {
+; CHECK-LABEL: fdiv_pow_shl_cnt32_okay:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #285212672 // =0x11000000
+; CHECK-NEXT: sub w8, w8, w0, lsl #23
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ret
+ %shl = shl nuw i32 1, %cnt
+ %conv = uitofp i32 %shl to float
+ %mul = fdiv float 0x3a20000000000000, %conv
+ ret float %mul
+}
+
+define fastcc i1 @quantum_hadamard(i32 %0) {
+; CHECK-LABEL: quantum_hadamard:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x8, #4607182418800017408 // =0x3ff0000000000000
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: sub x8, x8, x0, lsl #52
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: fcvt s0, d0
+; CHECK-NEXT: fcmp s0, #0.0
+; CHECK-NEXT: cset w0, gt
+; CHECK-NEXT: ret
+ %2 = zext i32 %0 to i64
+ %3 = shl i64 1, %2
+ %4 = uitofp i64 %3 to double
+ %5 = fdiv double 1.000000e+00, %4
+ %6 = fptrunc double %5 to float
+ %7 = fcmp olt float 0.000000e+00, %6
+ ret i1 %7
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-NEON: {{.*}}
+; CHECK-NO-NEON: {{.*}}
More information about the llvm-commits
mailing list