[PATCH] D125234: [AArch64] Remove redundant f{min,max}nm intrinsics.

Mon May 9 07:31:45 PDT 2022

fhahn created this revision.
fhahn added reviewers: aemerson, t.p.northover, dmgreen.
Herald added subscribers: hiraditya, kristof.beyls.
Herald added a project: All.
fhahn requested review of this revision.
Herald added a project: LLVM.

The patch extends AArch64TTIImpl::instCombineIntrinsic to simplify
llvm.aarch64.neon.f{min,max}nm(a, a) -> a.

This helps with simplifying code written using the ACLE, e.g.
see https://godbolt.org/z/jYxsoc89c


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D125234

Files:
  llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
  llvm/test/Transforms/InstCombine/AArch64/neon-min-max-intrinsics.ll


Index: llvm/test/Transforms/InstCombine/AArch64/neon-min-max-intrinsics.ll
===================================================================

--- llvm/test/Transforms/InstCombine/AArch64/neon-min-max-intrinsics.ll
+++ llvm/test/Transforms/InstCombine/AArch64/neon-min-max-intrinsics.ll
@@ -7,8 +7,7 @@
 
 define <4 x half> @fmaxnm_v4f16_same_args(<4 x half> %a) {
 ; CHECK-LABEL: @fmaxnm_v4f16_same_args(
-; CHECK-NEXT:    [[R:%.*]] = call <4 x half> @llvm.aarch64.neon.fmaxnm.v4f16(<4 x half> [[A:%.*]], <4 x half> [[A]])
-; CHECK-NEXT:    ret <4 x half> [[R]]
+; CHECK-NEXT:    ret <4 x half> [[A:%.*]]
 ;
   %r = call <4 x half> @llvm.aarch64.neon.fmaxnm.v4f16(<4 x half> %a, <4 x half> %a)
   ret <4 x half> %r
@@ -25,8 +24,7 @@
 
 define <4 x float> @fmaxnm_v4f32_same_args(<4 x float> %a) {
 ; CHECK-LABEL: @fmaxnm_v4f32_same_args(
-; CHECK-NEXT:    [[R:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> [[A:%.*]], <4 x float> [[A]])
-; CHECK-NEXT:    ret <4 x float> [[R]]
+; CHECK-NEXT:    ret <4 x float> [[A:%.*]]
 ;
   %r = call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> %a, <4 x float> %a)
   ret <4 x float> %r
@@ -43,8 +41,7 @@
 
 define <2 x double> @fmaxnm_v2f64_same_args(<2 x double> %a) {
 ; CHECK-LABEL: @fmaxnm_v2f64_same_args(
-; CHECK-NEXT:    [[R:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> [[A:%.*]], <2 x double> [[A]])
-; CHECK-NEXT:    ret <2 x double> [[R]]
+; CHECK-NEXT:    ret <2 x double> [[A:%.*]]
 ;
   %r = call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> %a, <2 x double> %a)
   ret <2 x double> %r
@@ -65,8 +62,7 @@
 
 define <4 x half> @fminnm_v4f16_same_args(<4 x half> %a) {
 ; CHECK-LABEL: @fminnm_v4f16_same_args(
-; CHECK-NEXT:    [[R:%.*]] = call <4 x half> @llvm.aarch64.neon.fminnm.v4f16(<4 x half> [[A:%.*]], <4 x half> [[A]])
-; CHECK-NEXT:    ret <4 x half> [[R]]
+; CHECK-NEXT:    ret <4 x half> [[A:%.*]]
 ;
   %r = call <4 x half> @llvm.aarch64.neon.fminnm.v4f16(<4 x half> %a, <4 x half> %a)
   ret <4 x half> %r
@@ -83,8 +79,7 @@
 
 define <4 x float> @fminnm_v4f32_same_args(<4 x float> %a) {
 ; CHECK-LABEL: @fminnm_v4f32_same_args(
-; CHECK-NEXT:    [[R:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> [[A:%.*]], <4 x float> [[A]])
-; CHECK-NEXT:    ret <4 x float> [[R]]
+; CHECK-NEXT:    ret <4 x float> [[A:%.*]]
 ;
   %r = call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> %a, <4 x float> %a)
   ret <4 x float> %r
@@ -101,8 +96,7 @@
 
 define <2 x double> @fminnm_v2f64_same_args(<2 x double> %a) {
 ; CHECK-LABEL: @fminnm_v2f64_same_args(
-; CHECK-NEXT:    [[R:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> [[A:%.*]], <2 x double> [[A]])
-; CHECK-NEXT:    ret <2 x double> [[R]]
+; CHECK-NEXT:    ret <2 x double> [[A:%.*]]
 ;
   %r = call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> %a, <2 x double> %a)
   ret <2 x double> %r
Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1219,6 +1219,16 @@
   return None;
 }
 
+static Optional<Instruction *> instCombineMaxMinNM(InstCombiner &IC,
+                                                   IntrinsicInst &II) {
+  Value *A = II.getArgOperand(0);
+  Value *B = II.getArgOperand(1);
+  if (A == B)
+    return IC.replaceInstUsesWith(II, A);
+
+  return None;
+}
+
 Optional<Instruction *>
 AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
                                      IntrinsicInst &II) const {
@@ -1226,6 +1236,9 @@
   switch (IID) {
   default:
     break;
+  case Intrinsic::aarch64_neon_fmaxnm:
+  case Intrinsic::aarch64_neon_fminnm:
+    return instCombineMaxMinNM(IC, II);
   case Intrinsic::aarch64_sve_convert_from_svbool:
     return instCombineConvertFromSVBool(IC, II);
   case Intrinsic::aarch64_sve_dup:


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D125234.428081.patch
Type: text/x-patch
Size: 3993 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220509/64d7bf1f/attachment.bin>