[llvm] r243926 - [AArch64] Vector FCOPYSIGN supports Custom-lowering: mark it as such.
Ahmed Bougacha
ahmed.bougacha at gmail.com
Mon Aug 3 17:42:34 PDT 2015
Author: ab
Date: Mon Aug 3 19:42:34 2015
New Revision: 243926
URL: http://llvm.org/viewvc/llvm-project?rev=243926&view=rev
Log:
[AArch64] Vector FCOPYSIGN supports Custom-lowering: mark it as such.
There's a bunch of code in LowerFCOPYSIGN that does smart lowering, and
is actually already vector-aware; let's use it instead of scalarizing!
The only interesting change is that for v2f32, we previously always used
use v4i32 as the integer vector type.
Use v2i32 instead, and mark FCOPYSIGN as Custom.
Modified:
llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/trunk/test/CodeGen/AArch64/vector-fcopysign.ll
Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=243926&r1=243925&r2=243926&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Mon Aug 3 19:42:34 2015
@@ -632,6 +632,9 @@ void AArch64TargetLowering::addTypeForNE
setOperationAction(ISD::FLOG10, VT.getSimpleVT(), Expand);
setOperationAction(ISD::FEXP, VT.getSimpleVT(), Expand);
setOperationAction(ISD::FEXP2, VT.getSimpleVT(), Expand);
+
+ // But we do support custom-lowering for FCOPYSIGN.
+ setOperationAction(ISD::FCOPYSIGN, VT.getSimpleVT(), Custom);
}
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
@@ -3651,7 +3654,7 @@ SDValue AArch64TargetLowering::LowerFCOP
SDValue VecVal1, VecVal2;
if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
EltVT = MVT::i32;
- VecVT = MVT::v4i32;
+ VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32);
EltMask = 0x80000000ULL;
if (!VT.isVector()) {
Modified: llvm/trunk/test/CodeGen/AArch64/vector-fcopysign.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/vector-fcopysign.ll?rev=243926&r1=243925&r2=243926&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/vector-fcopysign.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/vector-fcopysign.ll Mon Aug 3 19:42:34 2015
@@ -8,12 +8,8 @@ target datalayout = "e-m:o-i64:64-i128:1
define <1 x float> @test_copysign_v1f32_v1f32(<1 x float> %a, <1 x float> %b) #0 {
; CHECK-LABEL: test_copysign_v1f32_v1f32:
; CHECK: ; BB#0:
-; CHECK-NEXT: mov s2, v1[1]
-; CHECK-NEXT: mov s3, v0[1]
-; CHECK-NEXT: movi.4s v4, #0x80, lsl #24
-; CHECK-NEXT: bit.16b v3, v2, v4
-; CHECK-NEXT: bit.16b v0, v1, v4
-; CHECK-NEXT: ins.s v0[1], v3[0]
+; CHECK-NEXT: movi.2s v2, #0x80, lsl #24
+; CHECK-NEXT: bit.8b v0, v1, v2
; CHECK-NEXT: ret
%r = call <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %b)
ret <1 x float> %r
@@ -68,12 +64,8 @@ declare <1 x double> @llvm.copysign.v1f6
define <2 x float> @test_copysign_v2f32_v2f32(<2 x float> %a, <2 x float> %b) #0 {
; CHECK-LABEL: test_copysign_v2f32_v2f32:
; CHECK: ; BB#0:
-; CHECK-NEXT: mov s2, v1[1]
-; CHECK-NEXT: mov s3, v0[1]
-; CHECK-NEXT: movi.4s v4, #0x80, lsl #24
-; CHECK-NEXT: bit.16b v3, v2, v4
-; CHECK-NEXT: bit.16b v0, v1, v4
-; CHECK-NEXT: ins.s v0[1], v3[0]
+; CHECK-NEXT: movi.2s v2, #0x80, lsl #24
+; CHECK-NEXT: bit.8b v0, v1, v2
; CHECK-NEXT: ret
%r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b)
ret <2 x float> %r
@@ -103,20 +95,8 @@ declare <2 x float> @llvm.copysign.v2f32
define <4 x float> @test_copysign_v4f32_v4f32(<4 x float> %a, <4 x float> %b) #0 {
; CHECK-LABEL: test_copysign_v4f32_v4f32:
; CHECK: ; BB#0:
-; CHECK-NEXT: mov s2, v1[1]
-; CHECK-NEXT: mov s3, v0[1]
-; CHECK-NEXT: movi.4s v4, #0x80, lsl #24
-; CHECK-NEXT: mov s5, v0[2]
-; CHECK-NEXT: bit.16b v3, v2, v4
-; CHECK-NEXT: mov s2, v0[3]
-; CHECK-NEXT: mov s6, v1[2]
-; CHECK-NEXT: bit.16b v0, v1, v4
-; CHECK-NEXT: bit.16b v5, v6, v4
-; CHECK-NEXT: mov s1, v1[3]
-; CHECK-NEXT: ins.s v0[1], v3[0]
-; CHECK-NEXT: ins.s v0[2], v5[0]
-; CHECK-NEXT: bit.16b v2, v1, v4
-; CHECK-NEXT: ins.s v0[3], v2[0]
+; CHECK-NEXT: movi.4s v2, #0x80, lsl #24
+; CHECK-NEXT: bit.16b v0, v1, v2
; CHECK-NEXT: ret
%r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b)
ret <4 x float> %r
@@ -174,13 +154,9 @@ define <2 x double> @test_copysign_v2f64
define <2 x double> @test_copysign_v2f64_v2f64(<2 x double> %a, <2 x double> %b) #0 {
; CHECK-LABEL: test_copysign_v2f64_v2f64:
; CHECK: ; BB#0:
-; CHECK-NEXT: mov d2, v1[1]
-; CHECK-NEXT: mov d3, v0[1]
-; CHECK-NEXT: movi.2d v4, #0000000000000000
-; CHECK-NEXT: fneg.2d v4, v4
-; CHECK-NEXT: bit.16b v3, v2, v4
-; CHECK-NEXT: bit.16b v0, v1, v4
-; CHECK-NEXT: ins.d v0[1], v3[0]
+; CHECK-NEXT: movi.2d v2, #0000000000000000
+; CHECK-NEXT: fneg.2d v2, v2
+; CHECK-NEXT: bit.16b v0, v1, v2
; CHECK-NEXT: ret
%r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b)
ret <2 x double> %r
@@ -221,18 +197,10 @@ define <4 x double> @test_copysign_v4f64
define <4 x double> @test_copysign_v4f64_v4f64(<4 x double> %a, <4 x double> %b) #0 {
; CHECK-LABEL: test_copysign_v4f64_v4f64:
; CHECK: ; BB#0:
-; CHECK-NEXT: mov d4, v2[1]
-; CHECK-NEXT: mov d5, v0[1]
-; CHECK-NEXT: movi.2d v6, #0000000000000000
-; CHECK-NEXT: fneg.2d v6, v6
-; CHECK-NEXT: bit.16b v5, v4, v6
-; CHECK-NEXT: mov d4, v3[1]
-; CHECK-NEXT: bit.16b v0, v2, v6
-; CHECK-NEXT: mov d2, v1[1]
-; CHECK-NEXT: bit.16b v2, v4, v6
-; CHECK-NEXT: bit.16b v1, v3, v6
-; CHECK-NEXT: ins.d v0[1], v5[0]
-; CHECK-NEXT: ins.d v1[1], v2[0]
+; CHECK-NEXT: movi.2d v4, #0000000000000000
+; CHECK-NEXT: fneg.2d v4, v4
+; CHECK-NEXT: bit.16b v0, v2, v4
+; CHECK-NEXT: bit.16b v1, v3, v4
; CHECK-NEXT: ret
%r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b)
ret <4 x double> %r
More information about the llvm-commits
mailing list