[llvm] [LLVM][InstCombine] Enable constant folding for SVE sdiv & udiv intrinsics. (PR #137966)
Paul Walker via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 30 07:03:30 PDT 2025
https://github.com/paulwalker-arm created https://github.com/llvm/llvm-project/pull/137966
None
>From 734014ded1b986ae0b4a6b3d4a2724897af46d39 Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Mon, 28 Apr 2025 17:42:19 +0100
Subject: [PATCH] [LLVM][InstCombine] Enable constant folding for SVE sdiv &
udiv intrinsics
---
.../Target/AArch64/AArch64TargetTransformInfo.cpp | 12 ++++++++++++
.../AArch64/sve-intrinsic-comb-no-active-lanes.ll | 6 ++----
.../AArch64/sve-intrinsic-simplify-binop.ll | 10 ++++------
.../AArch64/sve-intrinsic-simplify-to-u-form.ll | 4 ++--
4 files changed, 20 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 7b1d203560a27..634ea544a41e5 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1309,6 +1309,9 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
.setMatchingIROpcode(Instruction::Mul);
case Intrinsic::aarch64_sve_sabd:
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_sabd_u);
+ case Intrinsic::aarch64_sve_sdiv:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_sdiv_u)
+ .setMatchingIROpcode(Instruction::SDiv);
case Intrinsic::aarch64_sve_smax:
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_smax_u);
case Intrinsic::aarch64_sve_smin:
@@ -1320,6 +1323,9 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
.setMatchingIROpcode(Instruction::Sub);
case Intrinsic::aarch64_sve_uabd:
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_uabd_u);
+ case Intrinsic::aarch64_sve_udiv:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_udiv_u)
+ .setMatchingIROpcode(Instruction::UDiv);
case Intrinsic::aarch64_sve_umax:
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_umax_u);
case Intrinsic::aarch64_sve_umin:
@@ -1387,9 +1393,15 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
case Intrinsic::aarch64_sve_orr_u:
return SVEIntrinsicInfo::defaultUndefOp().setMatchingIROpcode(
Instruction::Or);
+ case Intrinsic::aarch64_sve_sdiv_u:
+ return SVEIntrinsicInfo::defaultUndefOp().setMatchingIROpcode(
+ Instruction::SDiv);
case Intrinsic::aarch64_sve_sub_u:
return SVEIntrinsicInfo::defaultUndefOp().setMatchingIROpcode(
Instruction::Sub);
+ case Intrinsic::aarch64_sve_udiv_u:
+ return SVEIntrinsicInfo::defaultUndefOp().setMatchingIROpcode(
+ Instruction::UDiv);
case Intrinsic::aarch64_sve_addqv:
case Intrinsic::aarch64_sve_and_z:
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes.ll
index 1b5c6a2d4c3fd..ddcaeaf44592e 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes.ll
@@ -709,8 +709,7 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1>, <vs
define <vscale x 4 x i32> @simplify_sdiv_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @simplify_sdiv_intrinsic
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
+; CHECK-NEXT: ret <vscale x 4 x i32> [[A]]
;
%r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
ret <vscale x 4 x i32> %r
@@ -989,8 +988,7 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1>, <vs
define <vscale x 4 x i32> @simplify_udiv_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @simplify_udiv_intrinsic
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
+; CHECK-NEXT: ret <vscale x 4 x i32> [[A]]
;
%r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
ret <vscale x 4 x i32> %r
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-binop.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-binop.ll
index 3aea04b702f4d..a8c9b2c90912e 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-binop.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-binop.ll
@@ -312,7 +312,7 @@ define <vscale x 4 x i32> @constant_orr_u(<vscale x 4 x i1> %pg) #0 {
define <vscale x 4 x i32> @constant_sdiv(<vscale x 4 x i1> %pg) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @constant_sdiv(
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -7), <vscale x 4 x i32> splat (i32 3))
+; CHECK-NEXT: [[R:%.*]] = select <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -2), <vscale x 4 x i32> splat (i32 -7)
; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
;
%r = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 -7), <vscale x 4 x i32> splat (i32 3))
@@ -344,8 +344,7 @@ define <vscale x 4 x i32> @constant_sdiv_with_overflow(<vscale x 4 x i1> %pg) #0
define <vscale x 4 x i32> @constant_sdiv_u(<vscale x 4 x i1> %pg) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @constant_sdiv_u(
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.u.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -7), <vscale x 4 x i32> splat (i32 -3))
-; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
+; CHECK-NEXT: ret <vscale x 4 x i32> splat (i32 2)
;
%r = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 -7), <vscale x 4 x i32> splat (i32 -3))
ret <vscale x 4 x i32> %r
@@ -437,7 +436,7 @@ define <vscale x 4 x i32> @constant_subr(<vscale x 4 x i1> %pg) #0 {
define <vscale x 4 x i32> @constant_udiv(<vscale x 4 x i1> %pg) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @constant_udiv(
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3))
+; CHECK-NEXT: [[R:%.*]] = select <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 2), <vscale x 4 x i32> splat (i32 7)
; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
;
%r = call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3))
@@ -458,8 +457,7 @@ define <vscale x 4 x i32> @constant_udiv_by_zero(<vscale x 4 x i1> %pg) #0 {
define <vscale x 4 x i32> @constant_udiv_u(<vscale x 4 x i1> %pg) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @constant_udiv_u(
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.u.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 9), <vscale x 4 x i32> splat (i32 3))
-; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
+; CHECK-NEXT: ret <vscale x 4 x i32> splat (i32 3)
;
%r = call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 9), <vscale x 4 x i32> splat (i32 3))
ret <vscale x 4 x i32> %r
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-to-u-form.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-to-u-form.ll
index 59f5e56cfd38a..8072b3f8f5394 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-to-u-form.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-to-u-form.ll
@@ -317,7 +317,7 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1>, <vs
define <vscale x 4 x i32> @replace_sdiv_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @replace_sdiv_intrinsic_i32
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
;
%r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -394,7 +394,7 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1>, <vs
define <vscale x 4 x i32> @replace_udiv_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @replace_udiv_intrinsic_i32
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
;
%r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
More information about the llvm-commits
mailing list