[clang] b7bf937 - [PowerPC] Provide XL-compatible vec_round implementation
Nemanja Ivanovic via cfe-commits
cfe-commits at lists.llvm.org
Wed Nov 24 04:44:11 PST 2021
Author: Nemanja Ivanovic
Date: 2021-11-24T06:43:56-06:00
New Revision: b7bf937bbee38c2db0c0640176ef618d9c746538
URL: https://github.com/llvm/llvm-project/commit/b7bf937bbee38c2db0c0640176ef618d9c746538
DIFF: https://github.com/llvm/llvm-project/commit/b7bf937bbee38c2db0c0640176ef618d9c746538.diff
LOG: [PowerPC] Provide XL-compatible vec_round implementation
The XL implementation of vec_round for vector double uses
"round-to-nearest, ties to even" just as the vector float
`version does. However clang and gcc use "round-to-nearest-away"
for vector double and "round-to-nearest, ties to even"
for vector float.
The XL behaviour is implemented under the __XL_COMPAT_ALTIVEC__
macro similarly to other instances of incompatibility.
Differential revision: https://reviews.llvm.org/D113642
Added:
Modified:
clang/lib/Headers/altivec.h
clang/test/CodeGen/builtins-ppc-vsx.c
clang/test/CodeGen/builtins-ppc-xlcompat.c
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/test/CodeGen/PowerPC/read-set-flm.ll
Removed:
################################################################################
diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
index fb808d7b0a4f..3366e1fc94b1 100644
--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@@ -8413,9 +8413,20 @@ static __inline__ vector float __ATTRS_o_ai vec_round(vector float __a) {
}
#ifdef __VSX__
+#ifdef __XL_COMPAT_ALTIVEC__
+static __inline__ vector double __ATTRS_o_ai vec_rint(vector double __a);
+static __inline__ vector double __ATTRS_o_ai vec_round(vector double __a) {
+ double __fpscr = __builtin_readflm();
+ __builtin_setrnd(0);
+ vector double __rounded = vec_rint(__a);
+ __builtin_setflm(__fpscr);
+ return __rounded;
+}
+#else
static __inline__ vector double __ATTRS_o_ai vec_round(vector double __a) {
return __builtin_vsx_xvrdpi(__a);
}
+#endif
/* vec_rint */
diff --git a/clang/test/CodeGen/builtins-ppc-vsx.c b/clang/test/CodeGen/builtins-ppc-vsx.c
index da16124b0cd5..b0028e973773 100644
--- a/clang/test/CodeGen/builtins-ppc-vsx.c
+++ b/clang/test/CodeGen/builtins-ppc-vsx.c
@@ -409,10 +409,6 @@ void test1() {
// CHECK: call <4 x float> @llvm.ppc.altivec.vrfin(<4 x float>
// CHECK-LE: call <4 x float> @llvm.ppc.altivec.vrfin(<4 x float>
- res_vd = vec_round(vd);
-// CHECK: call <2 x double> @llvm.round.v2f64(<2 x double>
-// CHECK-LE: call <2 x double> @llvm.round.v2f64(<2 x double>
-
res_vd = vec_perm(vd, vd, vuc);
// CHECK: @llvm.ppc.altivec.vperm
// CHECK-LE: @llvm.ppc.altivec.vperm
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat.c b/clang/test/CodeGen/builtins-ppc-xlcompat.c
index 7d350fb5cb1b..5c27b9d36db4 100644
--- a/clang/test/CodeGen/builtins-ppc-xlcompat.c
+++ b/clang/test/CodeGen/builtins-ppc-xlcompat.c
@@ -5,11 +5,16 @@
// RUN: %clang_cc1 -target-feature +altivec -target-feature +vsx \
// RUN: -triple powerpc64le-unknown-linux-gnu -emit-llvm %s -o - \
// RUN: -D__XL_COMPAT_ALTIVEC__ -target-cpu pwr8 | FileCheck %s
+// RUN: %clang_cc1 -target-feature +altivec -target-feature +vsx \
+// RUN: -triple powerpc64le-unknown-linux-gnu -emit-llvm %s -o - \
+// RUN: -U__XL_COMPAT_ALTIVEC__ -target-cpu pwr8 | FileCheck \
+// RUN: --check-prefix=NOCOMPAT %s
#include <altivec.h>
vector double vd = { 3.4e22, 1.8e-3 };
vector signed long long vsll = { -12345678999ll, 12345678999 };
vector unsigned long long vull = { 11547229456923630743llu, 18014402265226391llu };
vector float res_vf;
+vector double res_vd;
vector signed int res_vsi;
vector unsigned int res_vui;
@@ -38,4 +43,11 @@ void test() {
// CHECK: [[TMP8:%.*]] = load <2 x double>, <2 x double>* @vd, align 16
// CHECK-NEXT: fmul <2 x double> [[TMP8]], <double 1.600000e+01, double 1.600000e+01>
// CHECK: call <4 x i32> @llvm.ppc.vsx.xvcvdpuxws(<2 x double>
+
+ res_vd = vec_round(vd);
+// CHECK: call double @llvm.ppc.readflm()
+// CHECK: call double @llvm.ppc.setrnd(i32 0)
+// CHECK: call <2 x double> @llvm.rint.v2f64(<2 x double>
+// CHECK: call double @llvm.ppc.setflm(double
+// NOCOMPAT: call <2 x double> @llvm.round.v2f64(<2 x double>
}
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 541e50d8dcce..ec7e30d7e362 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -12116,6 +12116,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineFunction::iterator It = ++BB->getIterator();
MachineFunction *F = BB->getParent();
+ MachineRegisterInfo &MRI = F->getRegInfo();
if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
@@ -12721,7 +12722,10 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
Register OldFPSCRReg = MI.getOperand(0).getReg();
// Save FPSCR value.
- BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
+ if (MRI.use_empty(OldFPSCRReg))
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
+ else
+ BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
// The floating point rounding mode is in the bits 62:63 of FPCSR, and has
// the following settings:
@@ -12854,7 +12858,10 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// Result of setflm is previous FPSCR content, so we need to save it first.
Register OldFPSCRReg = MI.getOperand(0).getReg();
- BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
+ if (MRI.use_empty(OldFPSCRReg))
+ BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
+ else
+ BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
// Put bits in 32:63 to FPSCR.
Register NewFPSCRReg = MI.getOperand(1).getReg();
diff --git a/llvm/test/CodeGen/PowerPC/read-set-flm.ll b/llvm/test/CodeGen/PowerPC/read-set-flm.ll
index 1f4a905cd69e..83c8edf2f688 100644
--- a/llvm/test/CodeGen/PowerPC/read-set-flm.ll
+++ b/llvm/test/CodeGen/PowerPC/read-set-flm.ll
@@ -11,7 +11,6 @@ define double @in_nostrict(double %a, double %b, double %c, double %d) {
; CHECK-NEXT: xsdivdp 1, 1, 2
; CHECK-NEXT: xsadddp 1, 1, 3
; CHECK-NEXT: xsadddp 0, 1, 0
-; CHECK-NEXT: mffs 1
; CHECK-NEXT: mtfsf 255, 4
; CHECK-NEXT: xsdivdp 1, 3, 4
; CHECK-NEXT: xsadddp 1, 1, 2
@@ -47,7 +46,6 @@ define double @in_strict(double %a, double %b, double %c, double %d) #0 {
; CHECK-NEXT: xsdivdp 1, 1, 2
; CHECK-NEXT: xsadddp 1, 1, 3
; CHECK-NEXT: xsadddp 0, 1, 0
-; CHECK-NEXT: mffs 1
; CHECK-NEXT: mtfsf 255, 4
; CHECK-NEXT: xsdivdp 1, 3, 4
; CHECK-NEXT: xsadddp 1, 1, 2
@@ -96,7 +94,6 @@ define void @cse_nomerge(double* %f1, double* %f2, double %f3) #0 {
; CHECK-NEXT: nop
; CHECK-NEXT: mffs 0
; CHECK-NEXT: stfd 0, 0(30)
-; CHECK-NEXT: mffs 0
; CHECK-NEXT: mtfsf 255, 31
; CHECK-NEXT: addi 1, 1, 64
; CHECK-NEXT: ld 0, 16(1)
@@ -134,7 +131,6 @@ define void @cse_nomerge_readonly(double* %f1, double* %f2, double %f3) #0 {
; CHECK-NEXT: nop
; CHECK-NEXT: mffs 0
; CHECK-NEXT: stfd 0, 0(30)
-; CHECK-NEXT: mffs 0
; CHECK-NEXT: mtfsf 255, 31
; CHECK-NEXT: addi 1, 1, 64
; CHECK-NEXT: ld 0, 16(1)
More information about the cfe-commits
mailing list