[llvm] r356722 - [AArch64] Split the neon.addp intrinsic into integer and fp variants.
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 21 15:31:37 PDT 2019
Author: aemerson
Date: Thu Mar 21 15:31:37 2019
New Revision: 356722
URL: http://llvm.org/viewvc/llvm-project?rev=356722&view=rev
Log:
[AArch64] Split the neon.addp intrinsic into integer and fp variants.
This is the result of discussions on the list about how to deal with intrinsics
which require codegen to disambiguate them via only the integer/fp overloads.
It causes problems for GlobalISel as some of that information is lost during
translation, while with other operations like IR instructions the information is
encoded into the instruction opcode.
This patch changes clang to emit the new faddp intrinsic if the vector operands
to the builtin have FP element types. LLVM IR AutoUpgrade has been taught to
upgrade existing calls to aarch64.neon.addp with fp vector arguments, and
we remove the workarounds introduced for GlobalISel in r355865.
This is a more permanent solution to PR40968.
Differential Revision: https://reviews.llvm.org/D59655
Added:
llvm/trunk/test/CodeGen/AArch64/autoupgrade-aarch64-neon-addp-float.ll
Removed:
llvm/trunk/test/CodeGen/AArch64/GlobalISel/fallback-ambiguous-addp-intrinsic.mir
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsAArch64.td
llvm/trunk/lib/IR/AutoUpgrade.cpp
llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td
llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp
llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.h
llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
llvm/trunk/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
llvm/trunk/test/CodeGen/AArch64/arm64-vadd.ll
Modified: llvm/trunk/include/llvm/IR/IntrinsicsAArch64.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsAArch64.td?rev=356722&r1=356721&r2=356722&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsAArch64.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsAArch64.td Thu Mar 21 15:31:37 2019
@@ -289,6 +289,7 @@ let TargetPrefix = "aarch64", IntrProper
// Pairwise Add
def int_aarch64_neon_addp : AdvSIMD_2VectorArg_Intrinsic;
+ def int_aarch64_neon_faddp : AdvSIMD_2VectorArg_Intrinsic;
// Long Pairwise Add
// FIXME: In theory, we shouldn't need intrinsics for saddlp or
Modified: llvm/trunk/lib/IR/AutoUpgrade.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AutoUpgrade.cpp?rev=356722&r1=356721&r2=356722&view=diff
==============================================================================
--- llvm/trunk/lib/IR/AutoUpgrade.cpp (original)
+++ llvm/trunk/lib/IR/AutoUpgrade.cpp Thu Mar 21 15:31:37 2019
@@ -568,6 +568,17 @@ static bool UpgradeIntrinsicFunction1(Fu
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
return true;
}
+ if (Name.startswith("aarch64.neon.addp")) {
+ if (F->arg_size() != 2)
+ break; // Invalid IR.
+ auto fArgs = F->getFunctionType()->params();
+ VectorType *ArgTy = dyn_cast<VectorType>(fArgs[0]);
+ if (ArgTy && ArgTy->getElementType()->isFloatingPointTy()) {
+ NewFn = Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::aarch64_neon_faddp, fArgs);
+ return true;
+ }
+ }
break;
}
Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td?rev=356722&r1=356721&r2=356722&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td Thu Mar 21 15:31:37 2019
@@ -3499,7 +3499,7 @@ def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)),
}
defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",int_aarch64_neon_facge>;
defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",int_aarch64_neon_facgt>;
-defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_addp>;
+defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_faddp>;
defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", fadd>;
defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
Modified: llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp?rev=356722&r1=356721&r2=356722&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp Thu Mar 21 15:31:37 2019
@@ -64,11 +64,6 @@ AArch64LegalizerInfo::AArch64LegalizerIn
return std::make_pair(0, EltTy);
});
- // HACK: Check that the intrinsic isn't ambiguous.
- // (See: https://bugs.llvm.org/show_bug.cgi?id=40968)
- getActionDefinitionsBuilder(G_INTRINSIC)
- .custom();
-
getActionDefinitionsBuilder(G_PHI)
.legalFor({p0, s16, s32, s64})
.clampScalar(0, s16, s64)
@@ -517,30 +512,11 @@ bool AArch64LegalizerInfo::legalizeCusto
return false;
case TargetOpcode::G_VAARG:
return legalizeVaArg(MI, MRI, MIRBuilder);
- case TargetOpcode::G_INTRINSIC:
- return legalizeIntrinsic(MI, MRI, MIRBuilder);
}
llvm_unreachable("expected switch to return");
}
-bool AArch64LegalizerInfo::legalizeIntrinsic(
- MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const {
- // HACK: Don't allow faddp/addp for now. We don't pass down the type info
- // necessary to get this right today.
- //
- // It looks like addp/faddp is the only intrinsic that's impacted by this.
- // All other intrinsics fully describe the required types in their names.
- //
- // (See: https://bugs.llvm.org/show_bug.cgi?id=40968)
- const MachineOperand &IntrinOp = MI.getOperand(1);
- if (IntrinOp.isIntrinsicID() &&
- IntrinOp.getIntrinsicID() == Intrinsic::aarch64_neon_addp)
- return false;
- return true;
-}
-
bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const {
Modified: llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.h?rev=356722&r1=356721&r2=356722&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.h Thu Mar 21 15:31:37 2019
@@ -34,9 +34,6 @@ public:
private:
bool legalizeVaArg(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const;
-
- bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const;
};
} // End llvm namespace.
#endif
Removed: llvm/trunk/test/CodeGen/AArch64/GlobalISel/fallback-ambiguous-addp-intrinsic.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/fallback-ambiguous-addp-intrinsic.mir?rev=356721&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/fallback-ambiguous-addp-intrinsic.mir (original)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/fallback-ambiguous-addp-intrinsic.mir (removed)
@@ -1,32 +0,0 @@
-# RUN: llc -mtriple aarch64-unknown-unknown -O0 -start-before=legalizer -pass-remarks-missed=gisel* %s -o - 2>&1 | FileCheck %s
-#
-# Check that we fall back on @llvm.aarch64.neon.addp and ensure that we get the
-# correct instruction.
-# https://bugs.llvm.org/show_bug.cgi?id=40968
-
---- |
- define <2 x float> @foo(<2 x float> %v1, <2 x float> %v2) {
- entry:
- %v3 = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> %v1, <2 x float> %v2)
- ret <2 x float> %v3
- }
- declare <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float>, <2 x float>)
-...
----
-name: foo
-alignment: 2
-tracksRegLiveness: true
-body: |
- bb.1.entry:
- liveins: $d0, $d1
- ; CHECK: remark:
- ; CHECK-SAME: unable to legalize instruction: %2:_(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.addp), %0:_(<2 x s32>), %1:_(<2 x s32>)
- ; CHECK: faddp
- ; CHECK-NOT: addp
- %0:_(<2 x s32>) = COPY $d0
- %1:_(<2 x s32>) = COPY $d1
- %2:_(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.addp), %0(<2 x s32>), %1(<2 x s32>)
- $d0 = COPY %2(<2 x s32>)
- RET_ReallyLR implicit $d0
-
-...
Modified: llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir?rev=356722&r1=356721&r2=356722&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir (original)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir Thu Mar 21 15:31:37 2019
@@ -151,7 +151,7 @@
# DEBUG: .. the first uncovered type index: 1, OK
#
# DEBUG-NEXT: G_INTRINSIC (opcode {{[0-9]+}}): 0 type indices
-# DEBUG: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG: .. type index coverage check SKIPPED: no rules defined
#
# DEBUG-NEXT: G_INTRINSIC_W_SIDE_EFFECTS (opcode {{[0-9]+}}): 0 type indices
# DEBUG: .. type index coverage check SKIPPED: no rules defined
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll?rev=356722&r1=356721&r2=356722&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll Thu Mar 21 15:31:37 2019
@@ -65,27 +65,27 @@ define <2 x i64> @test_addp_v2i64(<2 x i
ret <2 x i64> %val
}
-declare <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float>, <4 x float>)
-declare <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double>, <2 x double>)
+declare <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double>, <2 x double>)
define <2 x float> @test_faddp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
; CHECK: test_faddp_v2f32:
- %val = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> %lhs, <2 x float> %rhs)
+ %val = call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> %lhs, <2 x float> %rhs)
; CHECK: faddp v0.2s, v0.2s, v1.2s
ret <2 x float> %val
}
define <4 x float> @test_faddp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
; CHECK: test_faddp_v4f32:
- %val = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> %lhs, <4 x float> %rhs)
+ %val = call <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float> %lhs, <4 x float> %rhs)
; CHECK: faddp v0.4s, v0.4s, v1.4s
ret <4 x float> %val
}
define <2 x double> @test_faddp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
; CHECK: test_faddp_v2f64:
- %val = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> %lhs, <2 x double> %rhs)
+ %val = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> %lhs, <2 x double> %rhs)
; CHECK: faddp v0.2d, v0.2d, v1.2d
ret <2 x double> %val
}
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-vadd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-vadd.ll?rev=356722&r1=356721&r2=356722&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-vadd.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-vadd.ll Thu Mar 21 15:31:37 2019
@@ -712,7 +712,7 @@ define <2 x float> @faddp_2s(<2 x float>
;CHECK: faddp.2s
%tmp1 = load <2 x float>, <2 x float>* %A
%tmp2 = load <2 x float>, <2 x float>* %B
- %tmp3 = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+ %tmp3 = call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
ret <2 x float> %tmp3
}
@@ -721,7 +721,7 @@ define <4 x float> @faddp_4s(<4 x float>
;CHECK: faddp.4s
%tmp1 = load <4 x float>, <4 x float>* %A
%tmp2 = load <4 x float>, <4 x float>* %B
- %tmp3 = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+ %tmp3 = call <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
ret <4 x float> %tmp3
}
@@ -730,13 +730,13 @@ define <2 x double> @faddp_2d(<2 x doubl
;CHECK: faddp.2d
%tmp1 = load <2 x double>, <2 x double>* %A
%tmp2 = load <2 x double>, <2 x double>* %B
- %tmp3 = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
+ %tmp3 = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
ret <2 x double> %tmp3
}
-declare <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double>, <2 x double>) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double>, <2 x double>) nounwind readnone
define <2 x i64> @uaddl_duprhs(<4 x i32> %lhs, i32 %rhs) {
; CHECK-LABEL: uaddl_duprhs
Added: llvm/trunk/test/CodeGen/AArch64/autoupgrade-aarch64-neon-addp-float.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/autoupgrade-aarch64-neon-addp-float.ll?rev=356722&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/autoupgrade-aarch64-neon-addp-float.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/autoupgrade-aarch64-neon-addp-float.ll Thu Mar 21 15:31:37 2019
@@ -0,0 +1,9 @@
+; RUN: opt -S < %s -mtriple=arm64 | FileCheck %s
+declare <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float>, <4 x float>)
+
+; CHECK: call <4 x float> @llvm.aarch64.neon.faddp.v4f32
+define <4 x float> @upgrade_aarch64_neon_addp_float(<4 x float> %a, <4 x float> %b) {
+ %res = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> %a, <4 x float> %b)
+ ret <4 x float> %res
+}
+
More information about the llvm-commits
mailing list