[llvm] [GlobalISel] replace right identity X * -1.0 with fneg(x) (PR #80526)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 2 20:28:11 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-globalisel
@llvm/pr-subscribers-backend-amdgpu
Author: Nick Anderson (nickleus27)
<details>
<summary>Changes</summary>
follow up patch to #<!-- -->78673
@<!-- -->Pierre-vh @<!-- -->jayfoad @<!-- -->arsenm Could you review when you have a chance.
---
Full diff: https://github.com/llvm/llvm-project/pull/80526.diff
7 Files Affected:
- (modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h (+2)
- (modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+9-1)
- (modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+7)
- (added) llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fpneg-one-fneg.mir (+109)
- (modified) llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll (+20-140)
- (modified) llvm/test/CodeGen/AMDGPU/rsq.f64.ll (+8-12)
- (modified) llvm/test/CodeGen/X86/popcnt.ll (+1)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 90428a622b417..b6d40573812ef 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -468,6 +468,8 @@ class CombinerHelper {
/// equal to \p C.
bool matchConstantFPOp(const MachineOperand &MOP, double C);
+ void replaceNegOneWithFNeg(MachineInstr &MI) const;
+
/// @brief Checks if constant at \p ConstIdx is larger than \p MI 's bitwidth
/// @param ConstIdx Index of the constant
bool matchConstantLargerBitWidth(MachineInstr &MI, unsigned ConstIdx);
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 6bda80681432a..21063263455a9 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -493,6 +493,13 @@ def right_identity_one_fp: GICombineRule<
(apply (GIReplaceReg $dst, $x))
>;
+def right_identity_neg_one_fp: GICombineRule<
+ (defs root:$dst),
+ (match (G_FMUL $dst, $x, $y):$root,
+ [{ return Helper.matchConstantFPOp(${y}, -1.0); }]),
+ (apply [{ Helper.replaceNegOneWithFNeg(*${root}); }])
+>;
+
def right_identity_one : GICombineGroup<[right_identity_one_int, right_identity_one_fp]>;
// Fold (x op x) - > x
@@ -1263,7 +1270,8 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
trunc_buildvector_fold,
trunc_lshr_buildvector_fold,
bitcast_bitcast_fold, fptrunc_fpext_fold,
- right_identity_neg_zero_fp]>;
+ right_identity_neg_zero_fp,
+ right_identity_neg_one_fp]>;
def const_combines : GICombineGroup<[constant_fold_fp_ops, const_ptradd_to_i2p,
overlapping_and, mulo_by_2, mulo_by_0,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 772229215e798..c725f9ce4b260 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6651,3 +6651,10 @@ bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) {
return false;
}
+
+void CombinerHelper::replaceNegOneWithFNeg(MachineInstr &MI) const {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ Builder.buildFNeg(Dst, Src, MI.getFlags());
+ MI.eraseFromParent();
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fpneg-one-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fpneg-one-fneg.mir
new file mode 100644
index 0000000000000..45b63fcb58361
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fpneg-one-fneg.mir
@@ -0,0 +1,109 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK
+
+---
+name: test_neg_one_f16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; CHECK-LABEL: name: test_neg_one_f16
+ ; CHECK: liveins: $sgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+ ; CHECK-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: %d:_(s16) = G_FNEG %x
+ ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %d(s16)
+ ; CHECK-NEXT: $sgpr0 = COPY %ext(s32)
+ %0:_(s32) = COPY $sgpr0
+ %x:_(s16) = G_TRUNC %0:_(s32)
+ %y:_(s16) = G_FCONSTANT half -1.0
+ %d:_(s16) = G_FMUL %x, %y
+ %ext:_(s32) = G_ANYEXT %d:_(s16)
+ $sgpr0 = COPY %ext
+
+...
+
+---
+name: test_neg_one_f32
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; CHECK-LABEL: name: test_neg_one_f32
+ ; CHECK: liveins: $sgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+ ; CHECK-NEXT: $sgpr0 = COPY [[FNEG]](s32)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = G_FCONSTANT float -1.0
+ %2:_(s32) = G_FMUL %0, %1
+ $sgpr0 = COPY %2(s32)
+
+...
+
+---
+name: test_neg_one_f64
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; CHECK-LABEL: name: test_neg_one_f64
+ ; CHECK: liveins: $sgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+ ; CHECK-NEXT: %x:_(s64) = G_ANYEXT [[COPY]](s32)
+ ; CHECK-NEXT: %d:_(s64) = G_FNEG %x
+ ; CHECK-NEXT: %ext:_(s32) = G_TRUNC %d(s64)
+ ; CHECK-NEXT: $sgpr0 = COPY %ext(s32)
+ %0:_(s32) = COPY $sgpr0
+ %x:_(s64) = G_ANYEXT %0:_(s32)
+ %y:_(s64) = G_FCONSTANT double -1.0
+ %d:_(s64) = G_FMUL %x, %y
+ %ext:_(s32) = G_TRUNC %d:_(s64)
+ $sgpr0 = COPY %ext
+
+...
+
+---
+name: test_neg_ten_f32
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; CHECK-LABEL: name: test_neg_ten_f32
+ ; CHECK: liveins: $sgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+01
+ ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]]
+ ; CHECK-NEXT: $sgpr0 = COPY [[FMUL]](s32)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = G_FCONSTANT float -10.0
+ %2:_(s32) = G_FMUL %0, %1
+ $sgpr0 = COPY %2(s32)
+
+...
+
+---
+name: test_neg_fract_f32
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; CHECK-LABEL: name: test_neg_fract_f32
+ ; CHECK: liveins: $sgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -5.000000e-01
+ ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]]
+ ; CHECK-NEXT: $sgpr0 = COPY [[FMUL]](s32)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = G_FCONSTANT float -0.5
+ %2:_(s32) = G_FMUL %0, %1
+ $sgpr0 = COPY %2(s32)
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll b/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
index 3e658c6f38532..711a5fff1a063 100644
--- a/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
@@ -218,41 +218,11 @@ define float @v_mul_neg2_f32(float %x) {
}
define float @v_mul_neg1_f32(float %x) {
-; GFX9-SDAG-LABEL: v_mul_neg1_f32:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_mul_neg1_f32:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, -1.0, v0
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-SDAG-LABEL: v_mul_neg1_f32:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: v_mul_neg1_f32:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, -1.0, v0
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SDAG-LABEL: v_mul_neg1_f32:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-GISEL-LABEL: v_mul_neg1_f32:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, -1.0, v0
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_mul_neg1_f32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
%mul = fmul float %x, -1.0
ret float %mul
}
@@ -1356,41 +1326,11 @@ define double @v_mul_0_f64(double %x) {
}
define double @v_mul_neg1_f64(double %x) {
-; GFX9-SDAG-LABEL: v_mul_neg1_f64:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_mul_neg1_f64:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], -1.0
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-SDAG-LABEL: v_mul_neg1_f64:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: v_mul_neg1_f64:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], -1.0
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SDAG-LABEL: v_mul_neg1_f64:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-GISEL-LABEL: v_mul_neg1_f64:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], -1.0
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_mul_neg1_f64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
+; GCN-NEXT: s_setpc_b64 s[30:31]
%mul = fmul double %x, -1.0
ret double %mul
}
@@ -2848,41 +2788,11 @@ define half @v_mul_neg2_f16(half %x) {
}
define half @v_mul_neg1_f16(half %x) {
-; GFX9-SDAG-LABEL: v_mul_neg1_f16:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_mul_neg1_f16:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, -1.0, v0
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-SDAG-LABEL: v_mul_neg1_f16:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: v_mul_neg1_f16:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, -1.0, v0
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SDAG-LABEL: v_mul_neg1_f16:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-GISEL-LABEL: v_mul_neg1_f16:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, -1.0, v0
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_mul_neg1_f16:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_xor_b32_e32 v0, 0x8000, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
%mul = fmul half %x, -1.0
ret half %mul
}
@@ -7112,41 +7022,11 @@ define double @v_mul_fabs_neg2_f64(double %x) {
}
define double @v_mul_fabs_neg1_f64(double %x) {
-; GFX9-SDAG-LABEL: v_mul_fabs_neg1_f64:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_or_b32_e32 v1, 0x80000000, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_mul_fabs_neg1_f64:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, -1.0
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-SDAG-LABEL: v_mul_fabs_neg1_f64:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_or_b32_e32 v1, 0x80000000, v1
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: v_mul_fabs_neg1_f64:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, -1.0
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SDAG-LABEL: v_mul_fabs_neg1_f64:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_or_b32_e32 v1, 0x80000000, v1
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-GISEL-LABEL: v_mul_fabs_neg1_f64:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, -1.0
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_mul_fabs_neg1_f64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_or_b32_e32 v1, 0x80000000, v1
+; GCN-NEXT: s_setpc_b64 s[30:31]
%fabs.x = call double @llvm.fabs.f64(double %x)
%mul = fmul double %fabs.x, -1.0
ret double %mul
diff --git a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
index 90175298a99ac..bd6e1f54e636d 100644
--- a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
@@ -3431,9 +3431,8 @@ define double @v_neg_rsq_f64__afn(double %x) {
; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], -1.0
-; SI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
-; SI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -1.0
+; SI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[2:3]
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_neg_rsq_f64__afn:
@@ -3503,9 +3502,8 @@ define double @v_neg_rsq_f64__afn(double %x) {
; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], -1.0
-; VI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
-; VI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; VI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -1.0
+; VI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[2:3]
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
%sqrt = call contract afn double @llvm.sqrt.f64(double %x)
%rsq = fdiv contract afn double -1.0, %sqrt
@@ -4015,9 +4013,8 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], -1.0
-; SI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
-; SI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -1.0
+; SI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[2:3]
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
@@ -4087,9 +4084,8 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], -1.0
-; VI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
-; VI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; VI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -1.0
+; VI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[2:3]
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
%sqrt = call contract afn nnan ninf double @llvm.sqrt.f64(double %x)
%rsq = fdiv contract afn nnan ninf double -1.0, %sqrt
diff --git a/llvm/test/CodeGen/X86/popcnt.ll b/llvm/test/CodeGen/X86/popcnt.ll
index 37c7b051de7b1..77167df9834ef 100644
--- a/llvm/test/CodeGen/X86/popcnt.ll
+++ b/llvm/test/CodeGen/X86/popcnt.ll
@@ -43,6 +43,7 @@ define i8 @cnt8(i8 %x) nounwind readnone {
; X64-POPCNT-NEXT: popcntl %eax, %eax
; X64-POPCNT-NEXT: # kill: def $al killed $al killed $eax
; X64-POPCNT-NEXT: retq
+
%cnt = tail call i8 @llvm.ctpop.i8(i8 %x)
ret i8 %cnt
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/80526
More information about the llvm-commits
mailing list