[clang] [PowerPC] Fix use of FPSCR builtins in smmintrin.h (PR #67299)

Qiu Chaofan via cfe-commits cfe-commits at lists.llvm.org
Mon Oct 9 21:11:47 PDT 2023


https://github.com/ecnelises updated https://github.com/llvm/llvm-project/pull/67299

>From 2d628587b9cede36e7a93ecb1414cc0c16596934 Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan at cn.ibm.com>
Date: Mon, 25 Sep 2023 17:06:26 +0800
Subject: [PATCH 1/2] [PowerPC] Fix use of FPSCR builtins in smmintrin.h

smmintrin.h uses __builtin_mffs, __builtin_mffsl, __builtin_mtfsf and
__builtin_set_fpscr_rn. This patch replaces the uses with ppc prefix and
implement the missing ones.

This fixes issue #64664.
---
 clang/include/clang/Basic/BuiltinsPPC.def     |  2 +
 clang/lib/Basic/Targets/PPC.cpp               |  4 ++
 clang/lib/CodeGen/CGBuiltin.cpp               |  5 ++
 clang/lib/Headers/ppc_wrappers/smmintrin.h    | 50 +++++++++++++------
 clang/test/CodeGen/PowerPC/builtins-ppc.c     | 13 ++++-
 clang/test/CodeGen/PowerPC/ppc-emmintrin.c    |  5 ++
 clang/test/CodeGen/PowerPC/ppc-mmintrin.c     |  5 ++
 clang/test/CodeGen/PowerPC/ppc-pmmintrin.c    |  3 ++
 clang/test/CodeGen/PowerPC/ppc-smmintrin.c    | 37 ++++++++------
 clang/test/CodeGen/PowerPC/ppc-tmmintrin.c    |  3 ++
 clang/test/CodeGen/PowerPC/ppc-x86gprintrin.c |  3 ++
 11 files changed, 99 insertions(+), 31 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index 18a1186053481ed..a35488ed3dfa565 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -151,9 +151,11 @@ TARGET_BUILTIN(__builtin_ppc_extract_exp, "Uid", "", "power9-vector")
 TARGET_BUILTIN(__builtin_ppc_extract_sig, "ULLid", "", "power9-vector")
 BUILTIN(__builtin_ppc_mtfsb0, "vUIi", "")
 BUILTIN(__builtin_ppc_mtfsb1, "vUIi", "")
+BUILTIN(__builtin_ppc_mffs, "d", "")
 TARGET_BUILTIN(__builtin_ppc_mffsl, "d", "", "isa-v30-instructions")
 BUILTIN(__builtin_ppc_mtfsf, "vUIiUi", "")
 BUILTIN(__builtin_ppc_mtfsfi, "vUIiUIi", "")
+BUILTIN(__builtin_ppc_set_fpscr_rn, "di", "")
 TARGET_BUILTIN(__builtin_ppc_insert_exp, "ddULLi", "", "power9-vector")
 BUILTIN(__builtin_ppc_fmsub, "dddd", "")
 BUILTIN(__builtin_ppc_fmsubs, "ffff", "")
diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index 4e895cc7310c00e..b8bc920c45f40a2 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -264,6 +264,10 @@ static void defineXLCompatMacros(MacroBuilder &Builder) {
   Builder.defineMacro("__builtin_minfe", "__builtin_ppc_minfe");
   Builder.defineMacro("__builtin_minfl", "__builtin_ppc_minfl");
   Builder.defineMacro("__builtin_minfs", "__builtin_ppc_minfs");
+  Builder.defineMacro("__builtin_mffs", "__builtin_ppc_mffs");
+  Builder.defineMacro("__builtin_mffsl", "__builtin_ppc_mffsl");
+  Builder.defineMacro("__builtin_mtfsf", "__builtin_ppc_mtfsf");
+  Builder.defineMacro("__builtin_set_fpscr_rn", "__builtin_ppc_set_fpscr_rn");
 }
 
 /// PPCTargetInfo::getTargetDefines - Return a set of the PowerPC-specific
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index bf984861bccb5cc..b80c5d9e7c01dc0 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17258,6 +17258,11 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
     Value *Op1 = EmitScalarExpr(E->getArg(1));
     return Builder.CreateFDiv(Op0, Op1, "swdiv");
   }
+  case PPC::BI__builtin_ppc_set_fpscr_rn:
+    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_setrnd),
+                              {EmitScalarExpr(E->getArg(0))});
+  case PPC::BI__builtin_ppc_mffs:
+    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_readflm));
   }
 }
 
diff --git a/clang/lib/Headers/ppc_wrappers/smmintrin.h b/clang/lib/Headers/ppc_wrappers/smmintrin.h
index 349b395c4f00b92..19cdecb18d2b83f 100644
--- a/clang/lib/Headers/ppc_wrappers/smmintrin.h
+++ b/clang/lib/Headers/ppc_wrappers/smmintrin.h
@@ -14,7 +14,7 @@
 
 #ifndef NO_WARN_X86_INTRINSICS
 /* This header is distributed to simplify porting x86_64 code that
-   makes explicit use of Intel intrinsics to powerp64/powerpc64le.
+   makes explicit use of Intel intrinsics to powerpc64/powerpc64le.
 
    It is the user's responsibility to determine if the results are
    acceptable and make additional changes as necessary.
@@ -68,10 +68,10 @@ extern __inline __m128d
     __asm__("mffsce %0" : "=f"(__fpscr_save.__fr));
     __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
 #else
-    __fpscr_save.__fr = __builtin_mffs();
+    __fpscr_save.__fr = __builtin_ppc_mffs();
     __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
     __fpscr_save.__fpscr &= ~0xf8;
-    __builtin_mtfsf(0b00000011, __fpscr_save.__fr);
+    __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
 #endif
     /* Insert an artificial "read/write" reference to the variable
        read below, to ensure the compiler does not schedule
@@ -83,10 +83,15 @@ extern __inline __m128d
 
   switch (__rounding) {
   case _MM_FROUND_TO_NEAREST_INT:
-    __fpscr_save.__fr = __builtin_mffsl();
+#ifdef _ARCH_PWR9
+    __fpscr_save.__fr = __builtin_ppc_mffsl();
+#else
+    __fpscr_save.__fr = __builtin_ppc_mffs();
+    __fpscr_save.__fpscr &= 0x70007f0ffL;
+#endif
     __attribute__((fallthrough));
   case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC:
-    __builtin_set_fpscr_rn(0b00);
+    __builtin_ppc_set_fpscr_rn(0b00);
     /* Insert an artificial "read/write" reference to the variable
        read below, to ensure the compiler does not schedule
        a read/use of the variable before the FPSCR is modified, above.
@@ -102,7 +107,7 @@ extern __inline __m128d
        This can be removed if and when GCC PR102783 is fixed.
      */
     __asm__("" : : "wa"(__r));
-    __builtin_set_fpscr_rn(__fpscr_save.__fpscr);
+    __builtin_ppc_set_fpscr_rn(__fpscr_save.__fpscr);
     break;
   case _MM_FROUND_TO_NEG_INF:
   case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC:
@@ -128,9 +133,14 @@ extern __inline __m128d
      */
     __asm__("" : : "wa"(__r));
     /* Restore enabled exceptions.  */
-    __fpscr_save.__fr = __builtin_mffsl();
+#ifdef _ARCH_PWR9
+    __fpscr_save.__fr = __builtin_ppc_mffsl();
+#else
+    __fpscr_save.__fr = __builtin_ppc_mffs();
+    __fpscr_save.__fpscr &= 0x70007f0ffL;
+#endif
     __fpscr_save.__fpscr |= __enables_save.__fpscr;
-    __builtin_mtfsf(0b00000011, __fpscr_save.__fr);
+    __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
   }
   return (__m128d)__r;
 }
@@ -159,10 +169,10 @@ extern __inline __m128
     __asm__("mffsce %0" : "=f"(__fpscr_save.__fr));
     __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
 #else
-    __fpscr_save.__fr = __builtin_mffs();
+    __fpscr_save.__fr = __builtin_ppc_mffs();
     __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
     __fpscr_save.__fpscr &= ~0xf8;
-    __builtin_mtfsf(0b00000011, __fpscr_save.__fr);
+    __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
 #endif
     /* Insert an artificial "read/write" reference to the variable
        read below, to ensure the compiler does not schedule
@@ -174,10 +184,15 @@ extern __inline __m128
 
   switch (__rounding) {
   case _MM_FROUND_TO_NEAREST_INT:
-    __fpscr_save.__fr = __builtin_mffsl();
+#ifdef _ARCH_PWR9
+    __fpscr_save.__fr = __builtin_ppc_mffsl();
+#else
+    __fpscr_save.__fr = __builtin_ppc_mffs();
+    __fpscr_save.__fpscr &= 0x70007f0ffL;
+#endif
     __attribute__((fallthrough));
   case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC:
-    __builtin_set_fpscr_rn(0b00);
+    __builtin_ppc_set_fpscr_rn(0b00);
     /* Insert an artificial "read/write" reference to the variable
        read below, to ensure the compiler does not schedule
        a read/use of the variable before the FPSCR is modified, above.
@@ -193,7 +208,7 @@ extern __inline __m128
        This can be removed if and when GCC PR102783 is fixed.
      */
     __asm__("" : : "wa"(__r));
-    __builtin_set_fpscr_rn(__fpscr_save.__fpscr);
+    __builtin_ppc_set_fpscr_rn(__fpscr_save.__fpscr);
     break;
   case _MM_FROUND_TO_NEG_INF:
   case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC:
@@ -219,9 +234,14 @@ extern __inline __m128
      */
     __asm__("" : : "wa"(__r));
     /* Restore enabled exceptions.  */
-    __fpscr_save.__fr = __builtin_mffsl();
+#ifdef _ARCH_PWR9
+    __fpscr_save.__fr = __builtin_ppc_mffsl();
+#else
+    __fpscr_save.__fr = __builtin_ppc_mffs();
+    __fpscr_save.__fpscr &= 0x70007f0ffL;
+#endif
     __fpscr_save.__fpscr |= __enables_save.__fpscr;
-    __builtin_mtfsf(0b00000011, __fpscr_save.__fr);
+    __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
   }
   return (__m128)__r;
 }
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc.c b/clang/test/CodeGen/PowerPC/builtins-ppc.c
index ccc91b6560845e2..c13edf44cdcbd2a 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc.c
@@ -1,5 +1,8 @@
 // REQUIRES: powerpc-registered-target
-// RUN: %clang_cc1 -triple powerpc-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple powerpc-unknown-unknown -emit-llvm %s -o - \
+// RUN:   | FileCheck %s
+// RUN: %clang_cc1 -triple powerpc-unknown-unknown -emit-llvm %s -o - \
+// RUN:   -target-cpu pwr9 | FileCheck %s --check-prefixes=P9,CHECK
 
 void test_eh_return_data_regno()
 {
@@ -26,6 +29,9 @@ void test_builtin_ppc_setrnd() {
 
   // CHECK: call double @llvm.ppc.setrnd(i32 %2)
   res = __builtin_setrnd(x);
+
+  // CHECK: call double @llvm.ppc.setrnd(i32 %4)
+  res = __builtin_ppc_set_fpscr_rn(x);
 }
 
 void test_builtin_ppc_flm() {
@@ -33,7 +39,10 @@ void test_builtin_ppc_flm() {
   // CHECK: call double @llvm.ppc.readflm()
   res = __builtin_readflm();
 
-  // CHECK: call double @llvm.ppc.setflm(double %1)
+  // CHECK: call double @llvm.ppc.readflm()
+  res = __builtin_ppc_mffs();
+
+  // CHECK: call double @llvm.ppc.setflm(double %2)
   res = __builtin_setflm(res);
 
 #ifdef _ARCH_PWR9
diff --git a/clang/test/CodeGen/PowerPC/ppc-emmintrin.c b/clang/test/CodeGen/PowerPC/ppc-emmintrin.c
index e2d26e611ac81c4..15d291496c20a4a 100644
--- a/clang/test/CodeGen/PowerPC/ppc-emmintrin.c
+++ b/clang/test/CodeGen/PowerPC/ppc-emmintrin.c
@@ -8,6 +8,11 @@
 // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr10 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
 // RUN:   -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK-P10
 
+// RUN: %clang -x c++ -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
+// RUN:   -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -fsyntax-only
+// RUN: %clang -x c++ -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr10 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
+// RUN:   -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -fsyntax-only
+
 // RUN: %clang -S -emit-llvm -target powerpc64-ibm-aix -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
 // RUN:   -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE
 // RUN: %clang -S -emit-llvm -target powerpc64-ibm-aix -mcpu=pwr10 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
diff --git a/clang/test/CodeGen/PowerPC/ppc-mmintrin.c b/clang/test/CodeGen/PowerPC/ppc-mmintrin.c
index 4cb5b8540092f9b..1dc6292ae3244c3 100644
--- a/clang/test/CodeGen/PowerPC/ppc-mmintrin.c
+++ b/clang/test/CodeGen/PowerPC/ppc-mmintrin.c
@@ -9,6 +9,11 @@
 // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr9 -DNO_WARN_X86_INTRINSICS %s \
 // RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n| FileCheck %s --check-prefixes=CHECK-P9,CHECK,CHECK-LE
 
+// RUN: %clang -x c++ -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \
+// RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -fsyntax-only
+// RUN: %clang -x c++ -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr9 -DNO_WARN_X86_INTRINSICS %s \
+// RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -fsyntax-only
+
 // RUN: %clang -S -emit-llvm -target powerpc64-unknown-freebsd13.0 -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \
 // RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK-P8,CHECK,CHECK-BE
 // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-freebsd13.0 -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \
diff --git a/clang/test/CodeGen/PowerPC/ppc-pmmintrin.c b/clang/test/CodeGen/PowerPC/ppc-pmmintrin.c
index 39194427978ad42..6e152c549498d23 100644
--- a/clang/test/CodeGen/PowerPC/ppc-pmmintrin.c
+++ b/clang/test/CodeGen/PowerPC/ppc-pmmintrin.c
@@ -13,6 +13,9 @@
 // RUN: %clang -S -emit-llvm -target powerpc64-ibm-aix -mcpu=pwr8 -DNO_MM_MALLOC -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
 // RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s
 
+// RUN: %clang -x c++ -S -emit-llvm -target powerpc64le-gnu-linux -mcpu=pwr8 -DNO_MM_MALLOC -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
+// RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -fsyntax-only
+
 #include <pmmintrin.h>
 
 __m128d resd, md1, md2;
diff --git a/clang/test/CodeGen/PowerPC/ppc-smmintrin.c b/clang/test/CodeGen/PowerPC/ppc-smmintrin.c
index 220b65c1ce16495..7daef71a61c329e 100644
--- a/clang/test/CodeGen/PowerPC/ppc-smmintrin.c
+++ b/clang/test/CodeGen/PowerPC/ppc-smmintrin.c
@@ -15,6 +15,11 @@
 // RUN: %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr10 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
 // RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefix=P10
 
+// RUN: %clang -x c++ -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
+// RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -fsyntax-only
+// RUN: %clang -x c++ -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr10 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
+// RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -fsyntax-only
+
 // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-freebsd13.0 -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
 // RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s
 // RUN: %clang -S -emit-llvm -target powerpc64-unknown-freebsd13.0 -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
@@ -239,44 +244,48 @@ test_round() {
 // CHECK-LABEL: @test_round
 
 // CHECK-LABEL: define available_externally <4 x float> @_mm_round_ps(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}})
-// CHECK: call signext i32 @__builtin_mffs()
-// CHECK: call signext i32 @__builtin_mtfsf(i32 noundef signext 3, double noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: call double @llvm.ppc.readflm()
+// CHECK: call void @llvm.ppc.mtfsf(i32 3, double %{{[0-9a-zA-Z_.]+}})
 // CHECK: %{{[0-9a-zA-Z_.]+}} = call <4 x float> asm "", "=^wa,0"
-// CHECK: call signext i32 @__builtin_mffsl()
-// CHECK: call signext i32 @__builtin_set_fpscr_rn(i32 noundef signext 0)
+// CHECK: call double @llvm.ppc.readflm()
+// P10: call double @llvm.ppc.mffsl()
+// CHECK: call double @llvm.ppc.setrnd(i32 0)
 // CHECK: %{{[0-9a-zA-Z_.]+}} = call <4 x float> asm "", "=^wa,0"
 // CHECK: call <4 x float> @vec_rint(float vector[4])
 // CHECK: call void asm sideeffect "", "^wa"
-// CHECK: call signext i32 @__builtin_set_fpscr_rn(i64 noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: call double @llvm.ppc.setrnd(i32 %{{[0-9a-zA-Z_.]+}})
 // CHECK: call <4 x float> @vec_floor(float vector[4])
 // CHECK: call <4 x float> @vec_ceil(float vector[4])
 // CHECK: call <4 x float> @vec_trunc(float vector[4])
 // CHECK: call <4 x float> @vec_rint(float vector[4])
 // CHECK: call void asm sideeffect "", "^wa"
-// CHECK: call signext i32 @__builtin_mffsl()
-// CHECK: call signext i32 @__builtin_mtfsf(i32 noundef signext 3, double noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: call double @llvm.ppc.readflm()
+// P10: call double @llvm.ppc.mffsl()
+// CHECK: call void @llvm.ppc.mtfsf(i32 3, double %{{[0-9a-zA-Z_.]+}})
 
 // CHECK-LABEL: define available_externally <4 x float> @_mm_round_ss(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}})
 // CHECK: call <4 x float> @_mm_round_ps(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}})
 // CHECK: extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
 
 // CHECK-LABEL: define available_externally <2 x double> @_mm_round_pd(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}})
-// CHECK: call signext i32 @__builtin_mffs()
-// CHECK: call signext i32 @__builtin_mtfsf(i32 noundef signext 3, double noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: call double @llvm.ppc.readflm()
+// CHECK: call void @llvm.ppc.mtfsf(i32 3, double %{{[0-9a-zA-Z_.]+}})
 // CHECK: %{{[0-9a-zA-Z_.]+}} = call <2 x double> asm "", "=^wa,0"
-// CHECK: call signext i32 @__builtin_mffsl()
-// CHECK: call signext i32 @__builtin_set_fpscr_rn(i32 noundef signext 0)
+// CHECK: call double @llvm.ppc.readflm()
+// P10: call double @llvm.ppc.mffsl()
+// CHECK: call double @llvm.ppc.setrnd(i32 0)
 // CHECK: %{{[0-9a-zA-Z_.]+}} = call <2 x double> asm "", "=^wa,0"
 // CHECK: call <2 x double> @vec_rint(double vector[2])
 // CHECK: call void asm sideeffect "", "^wa"
-// CHECK: call signext i32 @__builtin_set_fpscr_rn(i64 noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: call double @llvm.ppc.setrnd(i32 %{{[0-9a-zA-Z_.]+}})
 // CHECK: call <2 x double> @vec_floor(double vector[2])
 // CHECK: call <2 x double> @vec_ceil(double vector[2])
 // CHECK: call <2 x double> @vec_trunc(double vector[2])
 // CHECK: call <2 x double> @vec_rint(double vector[2])
 // CHECK: call void asm sideeffect "", "^wa"
-// CHECK: call signext i32 @__builtin_mffsl()
-// CHECK: call signext i32 @__builtin_mtfsf(i32 noundef signext 3, double noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: call double @llvm.ppc.readflm()
+// P10: call double @llvm.ppc.mffsl()
+// CHECK: call void @llvm.ppc.mtfsf(i32 3, double %{{[0-9a-zA-Z_.]+}})
 
 // CHECK-LABEL: define available_externally <2 x double> @_mm_round_sd(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, <2 x double> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}})
 // CHECK: call <2 x double> @_mm_round_pd(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}})
diff --git a/clang/test/CodeGen/PowerPC/ppc-tmmintrin.c b/clang/test/CodeGen/PowerPC/ppc-tmmintrin.c
index 60633e34b56b9a1..40d3839dcf026f7 100644
--- a/clang/test/CodeGen/PowerPC/ppc-tmmintrin.c
+++ b/clang/test/CodeGen/PowerPC/ppc-tmmintrin.c
@@ -13,6 +13,9 @@
 // RUN: %clang -S -emit-llvm -target powerpc64-ibm-aix -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
 // RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE
 
+// RUN: %clang -x c++ -S -emit-llvm -target powerpc64le-gnu-linux -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
+// RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -fsyntax-only
+
 #include <tmmintrin.h>
 
 __m64 res, m1, m2;
diff --git a/clang/test/CodeGen/PowerPC/ppc-x86gprintrin.c b/clang/test/CodeGen/PowerPC/ppc-x86gprintrin.c
index 238ce7c7ee574ef..ac90a5f8c530ba4 100644
--- a/clang/test/CodeGen/PowerPC/ppc-x86gprintrin.c
+++ b/clang/test/CodeGen/PowerPC/ppc-x86gprintrin.c
@@ -12,6 +12,9 @@
 // RUN: %clang -S -emit-llvm -target powerpc64-ibm-aix -mcpu=pwr7 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
 // RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s
 
+// RUN: %clang -x c++ -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr7 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
+// RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -fsyntax-only
+
 #include <x86gprintrin.h>
 
 unsigned short us;

>From 2c7688c2076cce1d9e33a6881b70e042041078a8 Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan at cn.ibm.com>
Date: Tue, 10 Oct 2023 12:10:27 +0800
Subject: [PATCH 2/2] Implement mffsl in pre-Power9 targets

---
 clang/include/clang/Basic/BuiltinsPPC.def   |  2 +-
 clang/lib/Headers/ppc_wrappers/smmintrin.h  | 20 --------
 clang/test/CodeGen/PowerPC/builtins-ppc.c   |  6 +--
 clang/test/CodeGen/PowerPC/ppc-smmintrin.c  | 12 ++---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 54 +++++++++++++++++++--
 llvm/lib/Target/PowerPC/PPCInstrInfo.td     |  4 +-
 llvm/test/CodeGen/PowerPC/read-set-flm.ll   | 46 +++++++++++++++++-
 7 files changed, 104 insertions(+), 40 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index a35488ed3dfa565..f7dcf3df7b7dd58 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -152,7 +152,7 @@ TARGET_BUILTIN(__builtin_ppc_extract_sig, "ULLid", "", "power9-vector")
 BUILTIN(__builtin_ppc_mtfsb0, "vUIi", "")
 BUILTIN(__builtin_ppc_mtfsb1, "vUIi", "")
 BUILTIN(__builtin_ppc_mffs, "d", "")
-TARGET_BUILTIN(__builtin_ppc_mffsl, "d", "", "isa-v30-instructions")
+TARGET_BUILTIN(__builtin_ppc_mffsl, "d", "", "")
 BUILTIN(__builtin_ppc_mtfsf, "vUIiUi", "")
 BUILTIN(__builtin_ppc_mtfsfi, "vUIiUIi", "")
 BUILTIN(__builtin_ppc_set_fpscr_rn, "di", "")
diff --git a/clang/lib/Headers/ppc_wrappers/smmintrin.h b/clang/lib/Headers/ppc_wrappers/smmintrin.h
index 19cdecb18d2b83f..7174a83af1b8120 100644
--- a/clang/lib/Headers/ppc_wrappers/smmintrin.h
+++ b/clang/lib/Headers/ppc_wrappers/smmintrin.h
@@ -83,12 +83,7 @@ extern __inline __m128d
 
   switch (__rounding) {
   case _MM_FROUND_TO_NEAREST_INT:
-#ifdef _ARCH_PWR9
     __fpscr_save.__fr = __builtin_ppc_mffsl();
-#else
-    __fpscr_save.__fr = __builtin_ppc_mffs();
-    __fpscr_save.__fpscr &= 0x70007f0ffL;
-#endif
     __attribute__((fallthrough));
   case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC:
     __builtin_ppc_set_fpscr_rn(0b00);
@@ -133,12 +128,7 @@ extern __inline __m128d
      */
     __asm__("" : : "wa"(__r));
     /* Restore enabled exceptions.  */
-#ifdef _ARCH_PWR9
     __fpscr_save.__fr = __builtin_ppc_mffsl();
-#else
-    __fpscr_save.__fr = __builtin_ppc_mffs();
-    __fpscr_save.__fpscr &= 0x70007f0ffL;
-#endif
     __fpscr_save.__fpscr |= __enables_save.__fpscr;
     __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
   }
@@ -184,12 +174,7 @@ extern __inline __m128
 
   switch (__rounding) {
   case _MM_FROUND_TO_NEAREST_INT:
-#ifdef _ARCH_PWR9
     __fpscr_save.__fr = __builtin_ppc_mffsl();
-#else
-    __fpscr_save.__fr = __builtin_ppc_mffs();
-    __fpscr_save.__fpscr &= 0x70007f0ffL;
-#endif
     __attribute__((fallthrough));
   case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC:
     __builtin_ppc_set_fpscr_rn(0b00);
@@ -234,12 +219,7 @@ extern __inline __m128
      */
     __asm__("" : : "wa"(__r));
     /* Restore enabled exceptions.  */
-#ifdef _ARCH_PWR9
     __fpscr_save.__fr = __builtin_ppc_mffsl();
-#else
-    __fpscr_save.__fr = __builtin_ppc_mffs();
-    __fpscr_save.__fpscr &= 0x70007f0ffL;
-#endif
     __fpscr_save.__fpscr |= __enables_save.__fpscr;
     __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
   }
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc.c b/clang/test/CodeGen/PowerPC/builtins-ppc.c
index c13edf44cdcbd2a..b94e79910650a6d 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc.c
@@ -1,8 +1,6 @@
 // REQUIRES: powerpc-registered-target
 // RUN: %clang_cc1 -triple powerpc-unknown-unknown -emit-llvm %s -o - \
 // RUN:   | FileCheck %s
-// RUN: %clang_cc1 -triple powerpc-unknown-unknown -emit-llvm %s -o - \
-// RUN:   -target-cpu pwr9 | FileCheck %s --check-prefixes=P9,CHECK
 
 void test_eh_return_data_regno()
 {
@@ -45,10 +43,8 @@ void test_builtin_ppc_flm() {
   // CHECK: call double @llvm.ppc.setflm(double %2)
   res = __builtin_setflm(res);
 
-#ifdef _ARCH_PWR9
-  // P9: call double @llvm.ppc.mffsl()
+  // CHECK: call double @llvm.ppc.mffsl()
   res = __builtin_ppc_mffsl();
-#endif
 }
 
 double test_builtin_unpack_ldbl(long double x) {
diff --git a/clang/test/CodeGen/PowerPC/ppc-smmintrin.c b/clang/test/CodeGen/PowerPC/ppc-smmintrin.c
index 7daef71a61c329e..9bee798ff1c071b 100644
--- a/clang/test/CodeGen/PowerPC/ppc-smmintrin.c
+++ b/clang/test/CodeGen/PowerPC/ppc-smmintrin.c
@@ -247,8 +247,7 @@ test_round() {
 // CHECK: call double @llvm.ppc.readflm()
 // CHECK: call void @llvm.ppc.mtfsf(i32 3, double %{{[0-9a-zA-Z_.]+}})
 // CHECK: %{{[0-9a-zA-Z_.]+}} = call <4 x float> asm "", "=^wa,0"
-// CHECK: call double @llvm.ppc.readflm()
-// P10: call double @llvm.ppc.mffsl()
+// CHECK: call double @llvm.ppc.mffsl()
 // CHECK: call double @llvm.ppc.setrnd(i32 0)
 // CHECK: %{{[0-9a-zA-Z_.]+}} = call <4 x float> asm "", "=^wa,0"
 // CHECK: call <4 x float> @vec_rint(float vector[4])
@@ -259,8 +258,7 @@ test_round() {
 // CHECK: call <4 x float> @vec_trunc(float vector[4])
 // CHECK: call <4 x float> @vec_rint(float vector[4])
 // CHECK: call void asm sideeffect "", "^wa"
-// CHECK: call double @llvm.ppc.readflm()
-// P10: call double @llvm.ppc.mffsl()
+// CHECK: call double @llvm.ppc.mffsl()
 // CHECK: call void @llvm.ppc.mtfsf(i32 3, double %{{[0-9a-zA-Z_.]+}})
 
 // CHECK-LABEL: define available_externally <4 x float> @_mm_round_ss(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}})
@@ -271,8 +269,7 @@ test_round() {
 // CHECK: call double @llvm.ppc.readflm()
 // CHECK: call void @llvm.ppc.mtfsf(i32 3, double %{{[0-9a-zA-Z_.]+}})
 // CHECK: %{{[0-9a-zA-Z_.]+}} = call <2 x double> asm "", "=^wa,0"
-// CHECK: call double @llvm.ppc.readflm()
-// P10: call double @llvm.ppc.mffsl()
+// CHECK: call double @llvm.ppc.mffsl()
 // CHECK: call double @llvm.ppc.setrnd(i32 0)
 // CHECK: %{{[0-9a-zA-Z_.]+}} = call <2 x double> asm "", "=^wa,0"
 // CHECK: call <2 x double> @vec_rint(double vector[2])
@@ -283,8 +280,7 @@ test_round() {
 // CHECK: call <2 x double> @vec_trunc(double vector[2])
 // CHECK: call <2 x double> @vec_rint(double vector[2])
 // CHECK: call void asm sideeffect "", "^wa"
-// CHECK: call double @llvm.ppc.readflm()
-// P10: call double @llvm.ppc.mffsl()
+// CHECK: call double @llvm.ppc.mffsl()
 // CHECK: call void @llvm.ppc.mtfsf(i32 3, double %{{[0-9a-zA-Z_.]+}})
 
 // CHECK-LABEL: define available_externally <2 x double> @_mm_round_sd(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, <2 x double> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}})
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index c6257dcdf76f633..526f7b1419d9924 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -646,8 +646,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f64, Custom);
 
-  // To handle counter-based loop conditions.
   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
+  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
 
   setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
   setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
@@ -11595,6 +11595,50 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
   llvm_unreachable("ERROR:Should return for all cases within swtich.");
 }
 
+// Lower mffsl intrinsic with mffs in targets without ISA 3.0
+static SDValue lowerMFFSL(SDValue Op, SelectionDAG &DAG,
+                          const PPCSubtarget &Subtarget) {
+  assert(cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue() ==
+             Intrinsic::ppc_mffsl &&
+         "Should only be called on int_ppc_mffsl");
+  if (Subtarget.isISA3_0())
+    return Op;
+
+  SDLoc dl(Op);
+  SDValue Chain = Op.getOperand(0);
+  SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
+  Chain = MFFS.getValue(1);
+
+  if (Subtarget.isPPC64()) {
+    SDValue Int = DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS);
+    // Mask 29-31, 45-51 and 56-63 bits
+    SDValue Masked = DAG.getNode(ISD::AND, dl, MVT::i64, Int,
+                                 DAG.getConstant(0x70007f0ffULL, dl, MVT::i64));
+    SDValue Cast = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Masked);
+    return DAG.getMergeValues({Cast, Chain}, dl);
+  }
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachinePointerInfo PtrInfo;
+  int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
+  SDValue Base = DAG.getFrameIndex(SSFI, MVT::i32);
+  Chain = DAG.getStore(Chain, dl, MFFS, Base, PtrInfo);
+
+  assert(!Subtarget.isLittleEndian() && "32-bit little endian is unsupported!");
+  SDValue Offset4 = DAG.getNode(ISD::ADD, dl, MVT::i32, Base,
+                                DAG.getConstant(4, dl, MVT::i32));
+  SDValue Hi = DAG.getLoad(MVT::i32, dl, Chain, Base, PtrInfo);
+  SDValue Lo = DAG.getLoad(MVT::i32, dl, Hi.getValue(1), Offset4, PtrInfo);
+  Chain = Lo.getValue(1);
+  Hi =
+      DAG.getNode(ISD::AND, dl, MVT::i32, Hi, DAG.getConstant(7, dl, MVT::i32));
+  Lo = DAG.getNode(ISD::AND, dl, MVT::i32, Lo,
+                   DAG.getConstant(0x7f0ffULL, dl, MVT::i32));
+  Chain = DAG.getStore(Chain, dl, Hi, Base, PtrInfo);
+  Chain = DAG.getStore(Chain, dl, Lo, Offset4, PtrInfo);
+  return DAG.getLoad(MVT::f64, dl, Chain, Base, PtrInfo);
+}
+
 /// LowerOperation - Provide custom lowering hooks for some operations.
 ///
 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -11669,8 +11713,12 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     return LowerFP_ROUND(Op, DAG);
   case ISD::ROTL:               return LowerROTL(Op, DAG);
 
-  // For counter-based loop handling.
-  case ISD::INTRINSIC_W_CHAIN:  return SDValue();
+  case ISD::INTRINSIC_W_CHAIN: {
+    if (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue() ==
+        Intrinsic::ppc_mffsl)
+      return lowerMFFSL(Op, DAG, Subtarget);
+    return SDValue();
+  }
 
   case ISD::BITCAST:            return LowerBITCAST(Op, DAG);
 
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index a97062e0c643fb2..74311b00fe7ffe9 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -3188,7 +3188,6 @@ def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm),
           (TCRETURNri CTRRC:$dst, imm:$imm)>;
 
 def : Pat<(int_ppc_readflm), (MFFS)>;
-def : Pat<(int_ppc_mffsl), (MFFSL)>;
 
 // Hi and Lo for Darwin Global Addresses.
 def : Pat<(PPChi tglobaladdr:$in, 0), (LIS tglobaladdr:$in)>;
@@ -4510,6 +4509,9 @@ def : Pat<(int_ppc_dcbfl xoaddr:$dst),
 def : Pat<(int_ppc_dcbflp xoaddr:$dst),
           (DCBF 3, xoaddr:$dst)>;
 
+let Predicates = [IsISA3_0] in
+def : Pat<(int_ppc_mffsl), (MFFSL)>;
+
 let Predicates = [IsISA3_1] in {
   def DCBFPS  : PPCAsmPseudo<"dcbfps $dst", (ins memrr:$dst)>;
   def DCBSTPS : PPCAsmPseudo<"dcbstps $dst", (ins memrr:$dst)>;
diff --git a/llvm/test/CodeGen/PowerPC/read-set-flm.ll b/llvm/test/CodeGen/PowerPC/read-set-flm.ll
index d9f392474555288..71747e282372cf1 100644
--- a/llvm/test/CodeGen/PowerPC/read-set-flm.ll
+++ b/llvm/test/CodeGen/PowerPC/read-set-flm.ll
@@ -1,5 +1,8 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple powerpc64le-unknown-linux | FileCheck %s
+; RUN: llc < %s -mtriple powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s \
+; RUN:   --check-prefix=P9
+; RUN: llc < %s -mtriple powerpc64-ibm-aix | FileCheck %s --check-prefix=BE
+; RUN: llc < %s -mtriple powerpc-ibm-aix | FileCheck %s --check-prefix=BE32
 ; RUN: llc < %s -mtriple powerpc64le-unknown-linux -debug-only=machine-scheduler \
 ; RUN:   2>&1 | FileCheck %s --check-prefix=LOG
 ; REQUIRES: asserts
@@ -151,8 +154,47 @@ entry:
 define double @mffsl() {
 ; CHECK-LABEL: mffsl:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    mffsl 1
+; CHECK-NEXT:    mffs 0
+; CHECK-NEXT:    lis 4, -8192
+; CHECK-NEXT:    mffprd 3, 0
+; CHECK-NEXT:    ori 4, 4, 65055
+; CHECK-NEXT:    rldicl 4, 4, 3, 29
+; CHECK-NEXT:    and 3, 3, 4
+; CHECK-NEXT:    mtfprd 1, 3
 ; CHECK-NEXT:    blr
+;
+; P9-LABEL: mffsl:
+; P9:       # %bb.0: # %entry
+; P9-NEXT:    mffsl 1
+; P9-NEXT:    blr
+;
+; BE-LABEL: mffsl:
+; BE:       # %bb.0: # %entry
+; BE-NEXT:    mffs 0
+; BE-NEXT:    stfd 0, -16(1)
+; BE-NEXT:    ld 3, -16(1)
+; BE-NEXT:    lis 4, -8192
+; BE-NEXT:    ori 4, 4, 65055
+; BE-NEXT:    rldicl 4, 4, 3, 29
+; BE-NEXT:    and 3, 3, 4
+; BE-NEXT:    std 3, -8(1)
+; BE-NEXT:    lfd 1, -8(1)
+; BE-NEXT:    blr
+;
+; BE32-LABEL: mffsl:
+; BE32:       # %bb.0: # %entry
+; BE32-NEXT:    mffs 0
+; BE32-NEXT:    stfd 0, -8(1)
+; BE32-NEXT:    lis 4, 7
+; BE32-NEXT:    lwz 3, -4(1)
+; BE32-NEXT:    ori 4, 4, 61695
+; BE32-NEXT:    lwz 5, -8(1)
+; BE32-NEXT:    and 3, 3, 4
+; BE32-NEXT:    stw 3, -4(1)
+; BE32-NEXT:    clrlwi 3, 5, 29
+; BE32-NEXT:    stw 3, -8(1)
+; BE32-NEXT:    lfd 1, -8(1)
+; BE32-NEXT:    blr
 entry:
   %x = call double @llvm.ppc.mffsl()
   ret double %x



More information about the cfe-commits mailing list