[llvm] [AArch64] SLP can vectorize frem (PR #82488)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 21 04:48:41 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-analysis
@llvm/pr-subscribers-llvm-transforms
@llvm/pr-subscribers-backend-aarch64
Author: Paschalis Mpeis (paschalis-mpeis)
<details>
<summary>Changes</summary>
When vector library calls are available for frem, given its type and
vector length, the SLP vectorizer uses updated costs that amount to a
call, matching LoopVectorizer's functionality.
This allows 'superword-level' vectorization, which can be converted to
a vector lib call by later passes.
Add tests that vectorize code that contains 2x double and 4x float frem
instructions.
# Stacked PR:
- Parent PR: #<!-- -->80423
- Review commits >= TBA 982d28b6
---
Full diff: https://github.com/llvm/llvm-project/pull/82488.diff
5 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (+7)
- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+14-3)
- (modified) llvm/test/Analysis/CostModel/AArch64/arith-fp-frem.ll (+34-34)
- (modified) llvm/test/Analysis/CostModel/AArch64/arith-fp.ll (+11-11)
- (added) llvm/test/Transforms/SLPVectorizer/AArch64/slp-frem.ll (+55)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 6655931181c2d5..ce8cd629bf501d 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2972,6 +2972,13 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
Op2Info);
+ case ISD::FREM:
+ // Pass nullptr as fmod/fmodf calls are emitted by the backend even when
+ // those functions are not delcared in the module.
+ if (!Ty->isVectorTy())
+ return getCallInstrCost(/*Function*/ nullptr, Ty, {Ty, Ty}, CostKind);
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
+ Op2Info);
}
}
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4e334748c95934..effe52fe2c4e31 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -8362,9 +8362,20 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
unsigned OpIdx = isa<UnaryOperator>(VL0) ? 0 : 1;
TTI::OperandValueInfo Op1Info = getOperandInfo(E->getOperand(0));
TTI::OperandValueInfo Op2Info = getOperandInfo(E->getOperand(OpIdx));
- return TTI->getArithmeticInstrCost(ShuffleOrOp, VecTy, CostKind, Op1Info,
- Op2Info) +
- CommonCost;
+ auto VecCost = TTI->getArithmeticInstrCost(ShuffleOrOp, VecTy, CostKind,
+ Op1Info, Op2Info);
+ // Some targets can replace frem with vector library calls.
+ if (ShuffleOrOp == Instruction::FRem) {
+ LibFunc Func;
+ if (TLI->getLibFunc(ShuffleOrOp, ScalarTy, Func) &&
+ TLI->isFunctionVectorizable(TLI->getName(Func),
+ VecTy->getElementCount())) {
+ auto VecCallCost = TTI->getCallInstrCost(
+ nullptr, VecTy, {ScalarTy, ScalarTy}, CostKind);
+ VecCost = std::min(VecCost, VecCallCost);
+ }
+ }
+ return VecCost + CommonCost;
};
return GetCostDiff(GetScalarCost, GetVectorCost);
}
diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp-frem.ll b/llvm/test/Analysis/CostModel/AArch64/arith-fp-frem.ll
index 20e0ef7ea34281..63149adfa21587 100644
--- a/llvm/test/Analysis/CostModel/AArch64/arith-fp-frem.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith-fp-frem.ll
@@ -22,44 +22,44 @@ target triple = "aarch64-unknown-linux-gnu"
define void @frem_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
; NEON-NO-VECLIB-LABEL: 'frem_f64'
-; NEON-NO-VECLIB: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem double %in, %in
-; NEON-NO-VECLIB: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem double %in, %in
+; NEON-NO-VECLIB: LV: Found an estimated cost of 10 for VF 1 For instruction: %res = frem double %in, %in
+; NEON-NO-VECLIB: LV: Found an estimated cost of 24 for VF 2 For instruction: %res = frem double %in, %in
;
; SVE-NO-VECLIB-LABEL: 'frem_f64'
-; SVE-NO-VECLIB: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem double %in, %in
-; SVE-NO-VECLIB: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem double %in, %in
+; SVE-NO-VECLIB: LV: Found an estimated cost of 10 for VF 1 For instruction: %res = frem double %in, %in
+; SVE-NO-VECLIB: LV: Found an estimated cost of 24 for VF 2 For instruction: %res = frem double %in, %in
; SVE-NO-VECLIB: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem double %in, %in
; SVE-NO-VECLIB: LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: %res = frem double %in, %in
;
; NEON-ARMPL-LABEL: 'frem_f64'
-; NEON-ARMPL: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem double %in, %in
-; NEON-ARMPL: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem double %in, %in
+; NEON-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %res = frem double %in, %in
+; NEON-ARMPL: LV: Found an estimated cost of 10 for VF 2 For instruction: %res = frem double %in, %in
;
; NEON-SLEEF-LABEL: 'frem_f64'
-; NEON-SLEEF: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem double %in, %in
-; NEON-SLEEF: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem double %in, %in
+; NEON-SLEEF: LV: Found an estimated cost of 10 for VF 1 For instruction: %res = frem double %in, %in
+; NEON-SLEEF: LV: Found an estimated cost of 10 for VF 2 For instruction: %res = frem double %in, %in
;
; SVE-ARMPL-LABEL: 'frem_f64'
-; SVE-ARMPL: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem double %in, %in
-; SVE-ARMPL: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem double %in, %in
+; SVE-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %res = frem double %in, %in
+; SVE-ARMPL: LV: Found an estimated cost of 10 for VF 2 For instruction: %res = frem double %in, %in
; SVE-ARMPL: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem double %in, %in
; SVE-ARMPL: LV: Found an estimated cost of 10 for VF vscale x 2 For instruction: %res = frem double %in, %in
;
; SVE-SLEEF-LABEL: 'frem_f64'
-; SVE-SLEEF: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem double %in, %in
-; SVE-SLEEF: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem double %in, %in
+; SVE-SLEEF: LV: Found an estimated cost of 10 for VF 1 For instruction: %res = frem double %in, %in
+; SVE-SLEEF: LV: Found an estimated cost of 10 for VF 2 For instruction: %res = frem double %in, %in
; SVE-SLEEF: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem double %in, %in
; SVE-SLEEF: LV: Found an estimated cost of 10 for VF vscale x 2 For instruction: %res = frem double %in, %in
;
; SVE-ARMPL-TAILFOLD-LABEL: 'frem_f64'
-; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem double %in, %in
-; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem double %in, %in
+; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of 10 for VF 1 For instruction: %res = frem double %in, %in
+; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of 10 for VF 2 For instruction: %res = frem double %in, %in
; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem double %in, %in
; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of 10 for VF vscale x 2 For instruction: %res = frem double %in, %in
;
; SVE-SLEEF-TAILFOLD-LABEL: 'frem_f64'
-; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem double %in, %in
-; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem double %in, %in
+; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of 10 for VF 1 For instruction: %res = frem double %in, %in
+; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of 10 for VF 2 For instruction: %res = frem double %in, %in
; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem double %in, %in
; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of 10 for VF vscale x 2 For instruction: %res = frem double %in, %in
;
@@ -83,55 +83,55 @@ define void @frem_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
define void @frem_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
; NEON-NO-VECLIB-LABEL: 'frem_f32'
-; NEON-NO-VECLIB: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem float %in, %in
-; NEON-NO-VECLIB: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem float %in, %in
-; NEON-NO-VECLIB: LV: Found an estimated cost of 20 for VF 4 For instruction: %res = frem float %in, %in
+; NEON-NO-VECLIB: LV: Found an estimated cost of 10 for VF 1 For instruction: %res = frem float %in, %in
+; NEON-NO-VECLIB: LV: Found an estimated cost of 24 for VF 2 For instruction: %res = frem float %in, %in
+; NEON-NO-VECLIB: LV: Found an estimated cost of 52 for VF 4 For instruction: %res = frem float %in, %in
;
; SVE-NO-VECLIB-LABEL: 'frem_f32'
-; SVE-NO-VECLIB: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem float %in, %in
-; SVE-NO-VECLIB: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem float %in, %in
-; SVE-NO-VECLIB: LV: Found an estimated cost of 20 for VF 4 For instruction: %res = frem float %in, %in
+; SVE-NO-VECLIB: LV: Found an estimated cost of 10 for VF 1 For instruction: %res = frem float %in, %in
+; SVE-NO-VECLIB: LV: Found an estimated cost of 24 for VF 2 For instruction: %res = frem float %in, %in
+; SVE-NO-VECLIB: LV: Found an estimated cost of 52 for VF 4 For instruction: %res = frem float %in, %in
; SVE-NO-VECLIB: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem float %in, %in
; SVE-NO-VECLIB: LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: %res = frem float %in, %in
; SVE-NO-VECLIB: LV: Found an estimated cost of Invalid for VF vscale x 4 For instruction: %res = frem float %in, %in
;
; NEON-ARMPL-LABEL: 'frem_f32'
-; NEON-ARMPL: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem float %in, %in
-; NEON-ARMPL: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem float %in, %in
+; NEON-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %res = frem float %in, %in
+; NEON-ARMPL: LV: Found an estimated cost of 24 for VF 2 For instruction: %res = frem float %in, %in
; NEON-ARMPL: LV: Found an estimated cost of 10 for VF 4 For instruction: %res = frem float %in, %in
;
; NEON-SLEEF-LABEL: 'frem_f32'
-; NEON-SLEEF: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem float %in, %in
-; NEON-SLEEF: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem float %in, %in
+; NEON-SLEEF: LV: Found an estimated cost of 10 for VF 1 For instruction: %res = frem float %in, %in
+; NEON-SLEEF: LV: Found an estimated cost of 24 for VF 2 For instruction: %res = frem float %in, %in
; NEON-SLEEF: LV: Found an estimated cost of 10 for VF 4 For instruction: %res = frem float %in, %in
;
; SVE-ARMPL-LABEL: 'frem_f32'
-; SVE-ARMPL: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem float %in, %in
-; SVE-ARMPL: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem float %in, %in
+; SVE-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %res = frem float %in, %in
+; SVE-ARMPL: LV: Found an estimated cost of 24 for VF 2 For instruction: %res = frem float %in, %in
; SVE-ARMPL: LV: Found an estimated cost of 10 for VF 4 For instruction: %res = frem float %in, %in
; SVE-ARMPL: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem float %in, %in
; SVE-ARMPL: LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: %res = frem float %in, %in
; SVE-ARMPL: LV: Found an estimated cost of 10 for VF vscale x 4 For instruction: %res = frem float %in, %in
;
; SVE-SLEEF-LABEL: 'frem_f32'
-; SVE-SLEEF: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem float %in, %in
-; SVE-SLEEF: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem float %in, %in
+; SVE-SLEEF: LV: Found an estimated cost of 10 for VF 1 For instruction: %res = frem float %in, %in
+; SVE-SLEEF: LV: Found an estimated cost of 24 for VF 2 For instruction: %res = frem float %in, %in
; SVE-SLEEF: LV: Found an estimated cost of 10 for VF 4 For instruction: %res = frem float %in, %in
; SVE-SLEEF: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem float %in, %in
; SVE-SLEEF: LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: %res = frem float %in, %in
; SVE-SLEEF: LV: Found an estimated cost of 10 for VF vscale x 4 For instruction: %res = frem float %in, %in
;
; SVE-ARMPL-TAILFOLD-LABEL: 'frem_f32'
-; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem float %in, %in
-; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem float %in, %in
+; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of 10 for VF 1 For instruction: %res = frem float %in, %in
+; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of 24 for VF 2 For instruction: %res = frem float %in, %in
; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of 10 for VF 4 For instruction: %res = frem float %in, %in
; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem float %in, %in
; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: %res = frem float %in, %in
; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of 10 for VF vscale x 4 For instruction: %res = frem float %in, %in
;
; SVE-SLEEF-TAILFOLD-LABEL: 'frem_f32'
-; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem float %in, %in
-; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem float %in, %in
+; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of 10 for VF 1 For instruction: %res = frem float %in, %in
+; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of 24 for VF 2 For instruction: %res = frem float %in, %in
; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of 10 for VF 4 For instruction: %res = frem float %in, %in
; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem float %in, %in
; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: %res = frem float %in, %in
diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
index c352892354fc24..497ade4f2f613c 100644
--- a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
@@ -197,17 +197,17 @@ define i32 @fdiv(i32 %arg) {
define i32 @frem(i32 %arg) {
; CHECK-LABEL: 'frem'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = frem half undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4F16 = frem <4 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8F16 = frem <8 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V16F16 = frem <16 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = frem <2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4F32 = frem <4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8F32 = frem <8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = frem <2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4F64 = frem <4 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F16 = frem half undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4F16 = frem <4 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V8F16 = frem <8 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %V16F16 = frem <16 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = frem float undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2F32 = frem <2 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4F32 = frem <4 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8F32 = frem <8 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = frem double undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2F64 = frem <2 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4F64 = frem <4 x double> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%F16 = frem half undef, undef
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/slp-frem.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/slp-frem.ll
new file mode 100644
index 00000000000000..a38f4bdc4640e9
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/slp-frem.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -S -mtriple=aarch64 -vector-library=ArmPL -passes=slp-vectorizer | FileCheck %s
+
+ at a = common global ptr null, align 8
+
+define void @frem_v2double() {
+; CHECK-LABEL: define void @frem_v2double() {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr @a, align 8
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr @a, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = frem <2 x double> [[TMP0]], [[TMP1]]
+; CHECK-NEXT: store <2 x double> [[TMP2]], ptr @a, align 8
+; CHECK-NEXT: ret void
+;
+entry:
+ %a0 = load double, ptr getelementptr inbounds (double, ptr @a, i64 0), align 8
+ %a1 = load double, ptr getelementptr inbounds (double, ptr @a, i64 1), align 8
+ %b0 = load double, ptr getelementptr inbounds (double, ptr @a, i64 0), align 8
+ %b1 = load double, ptr getelementptr inbounds (double, ptr @a, i64 1), align 8
+ %r0 = frem double %a0, %b0
+ %r1 = frem double %a1, %b1
+ store double %r0, ptr getelementptr inbounds (double, ptr @a, i64 0), align 8
+ store double %r1, ptr getelementptr inbounds (double, ptr @a, i64 1), align 8
+ ret void
+}
+
+define void @frem_v4float() {
+; CHECK-LABEL: define void @frem_v4float() {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr @a, align 8
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr @a, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = frem <4 x float> [[TMP0]], [[TMP1]]
+; CHECK-NEXT: store <4 x float> [[TMP2]], ptr @a, align 8
+; CHECK-NEXT: ret void
+;
+entry:
+ %a0 = load float, ptr getelementptr inbounds (float, ptr @a, i64 0), align 8
+ %a1 = load float, ptr getelementptr inbounds (float, ptr @a, i64 1), align 8
+ %a2 = load float, ptr getelementptr inbounds (float, ptr @a, i64 2), align 8
+ %a3 = load float, ptr getelementptr inbounds (float, ptr @a, i64 3), align 8
+ %b0 = load float, ptr getelementptr inbounds (float, ptr @a, i64 0), align 8
+ %b1 = load float, ptr getelementptr inbounds (float, ptr @a, i64 1), align 8
+ %b2 = load float, ptr getelementptr inbounds (float, ptr @a, i64 2), align 8
+ %b3 = load float, ptr getelementptr inbounds (float, ptr @a, i64 3), align 8
+ %r0 = frem float %a0, %b0
+ %r1 = frem float %a1, %b1
+ %r2 = frem float %a2, %b2
+ %r3 = frem float %a3, %b3
+ store float %r0, ptr getelementptr inbounds (float, ptr @a, i64 0), align 8
+ store float %r1, ptr getelementptr inbounds (float, ptr @a, i64 1), align 8
+ store float %r2, ptr getelementptr inbounds (float, ptr @a, i64 2), align 8
+ store float %r3, ptr getelementptr inbounds (float, ptr @a, i64 3), align 8
+ ret void
+}
+
``````````
</details>
https://github.com/llvm/llvm-project/pull/82488
More information about the llvm-commits
mailing list