[cfe-commits] r172650 - in /cfe/trunk: lib/CodeGen/CGBuiltin.cpp test/CodeGen/arm-neon-fma.c
Tim Northover
Tim.Northover at arm.com
Wed Jan 16 12:13:15 PST 2013
Author: tnorthover
Date: Wed Jan 16 14:13:15 2013
New Revision: 172650
URL: http://llvm.org/viewvc/llvm-project?rev=172650&view=rev
Log:
Correct order of operands forwarding NEON vfma to LLVM fma
Added:
cfe/trunk/test/CodeGen/arm-neon-fma.c
Modified:
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=172650&r1=172649&r2=172650&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Wed Jan 16 14:13:15 2013
@@ -2132,7 +2132,9 @@
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
- return Builder.CreateCall3(F, Ops[0], Ops[1], Ops[2]);
+
+ // NEON intrinsic puts accumulator first, unlike the LLVM fma.
+ return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
}
case ARM::BI__builtin_neon_vpadal_v:
case ARM::BI__builtin_neon_vpadalq_v: {
Added: cfe/trunk/test/CodeGen/arm-neon-fma.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-neon-fma.c?rev=172650&view=auto
==============================================================================
--- cfe/trunk/test/CodeGen/arm-neon-fma.c (added)
+++ cfe/trunk/test/CodeGen/arm-neon-fma.c Wed Jan 16 14:13:15 2013
@@ -0,0 +1,16 @@
+// REQUIRES: arm-registered-target
+// RUN: %clang -target thumbv7-none-linux-gnueabihf \
+// RUN: -mcpu=cortex-a8 -mfloat-abi=hard \
+// RUN: -O3 -S -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_neon.h>
+
+float32x2_t test_fma_order(float32x2_t accum, float32x2_t lhs, float32x2_t rhs) {
+ return vfma_f32(accum, lhs, rhs);
+// CHECK: call <2 x float> @llvm.fma.v2f32(<2 x float> %lhs, <2 x float> %rhs, <2 x float> %accum)
+}
+
+float32x4_t test_fmaq_order(float32x4_t accum, float32x4_t lhs, float32x4_t rhs) {
+ return vfmaq_f32(accum, lhs, rhs);
+// CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %lhs, <4 x float> %rhs, <4 x float> %accum)
+}
More information about the cfe-commits
mailing list