[llvm] a4558a4 - [PowerPC] Implement 32-bit expansion for rldimi (#86783)

via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 9 01:43:53 PDT 2024


Author: Qiu Chaofan
Date: 2024-04-09T16:43:49+08:00
New Revision: a4558a4a53eda8d170bbd2c358d383bb0a13f91f

URL: https://github.com/llvm/llvm-project/commit/a4558a4a53eda8d170bbd2c358d383bb0a13f91f
DIFF: https://github.com/llvm/llvm-project/commit/a4558a4a53eda8d170bbd2c358d383bb0a13f91f.diff

LOG: [PowerPC] Implement 32-bit expansion for rldimi (#86783)

rldimi is 64-bit instruction, due to backward compatibility, it needs to
be expanded into series of rotate and masking in 32-bit environment. In
the future, we may improve bit permutation selector and remove such
direct codegen.

Added: 
    

Modified: 
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/lib/Sema/SemaChecking.cpp
    clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c
    llvm/include/llvm/IR/IntrinsicsPowerPC.td
    llvm/test/CodeGen/PowerPC/rldimi.ll

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index df7502b8def531..c052367d287820 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17288,6 +17288,16 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
     Value *Op1 = EmitScalarExpr(E->getArg(1));
     Value *Op2 = EmitScalarExpr(E->getArg(2));
     Value *Op3 = EmitScalarExpr(E->getArg(3));
+    // rldimi is 64-bit instruction, expand the intrinsic before isel to
+    // leverage peephole and avoid legalization efforts.
+    if (BuiltinID == PPC::BI__builtin_ppc_rldimi &&
+        !getTarget().getTriple().isPPC64()) {
+      Function *F = CGM.getIntrinsic(Intrinsic::fshl, Op0->getType());
+      Op2 = Builder.CreateZExt(Op2, Int64Ty);
+      Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2});
+      return Builder.CreateOr(Builder.CreateAnd(Shift, Op3),
+                              Builder.CreateAnd(Op1, Builder.CreateNot(Op3)));
+    }
     return Builder.CreateCall(
         CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi
                              ? Intrinsic::ppc_rldimi

diff  --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index f4746647b96546..b84a779b7189c0 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -5232,7 +5232,6 @@ static bool isPPC_64Builtin(unsigned BuiltinID) {
   case PPC::BI__builtin_ppc_fetch_and_andlp:
   case PPC::BI__builtin_ppc_fetch_and_orlp:
   case PPC::BI__builtin_ppc_fetch_and_swaplp:
-  case PPC::BI__builtin_ppc_rldimi:
     return true;
   }
   return false;

diff  --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c
index 272e0222dc9e41..f7f357df62af16 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c
@@ -24,7 +24,6 @@ void test_trap(void) {
   __tw(ia, ib, 0); //expected-error {{argument value 0 is outside the valid range [1, 31]}}
 }
 
-#ifdef __PPC64__
 void test_builtin_ppc_rldimi() {
   unsigned int shift;
   unsigned long long mask;
@@ -33,7 +32,6 @@ void test_builtin_ppc_rldimi() {
   res = __builtin_ppc_rldimi(ull, ull, 63, 0xFFFF000000000F00);      // expected-error {{argument 3 value should represent a contiguous bit field}}
   res = __builtin_ppc_rldimi(ull, ull, 64, 0xFFFF000000000000);      // expected-error {{argument value 64 is outside the valid range [0, 63]}}
 }
-#endif
 
 void test_builtin_ppc_rlwimi() {
   unsigned int shift;
@@ -86,10 +84,6 @@ void testalignx(const void *pointer, unsigned int alignment) {
 }
 
 #ifndef __PPC64__
-unsigned long long testrldimi32() {
-  return __rldimi(ull, ui, 3, 0x7ffff8ULL); //expected-error {{this builtin is only available on 64-bit targets}}
-}
-
 long long testbpermd(long long bit_selector, long long source) {
   return __bpermd(bit_selector, source); //expected-error {{this builtin is only available on 64-bit targets}}
 }

diff  --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index ee9a04241ac2ec..aff1fc7f085c43 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -182,10 +182,6 @@ let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
   def int_ppc_fctuwz
       : ClangBuiltin<"__builtin_ppc_fctuwz">,
         DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
-  def int_ppc_rldimi
-      : ClangBuiltin<"__builtin_ppc_rldimi">,
-        DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
-                              [IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>;
   def int_ppc_rlwimi
       : ClangBuiltin<"__builtin_ppc_rlwimi">,
         DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
@@ -194,6 +190,9 @@ let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
       : ClangBuiltin<"__builtin_ppc_rlwnm">,
         DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
                               [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+  def int_ppc_rldimi
+      : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
+                              [IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>;
 
   // XL compatible select functions
   // TODO: Add llvm_f128_ty support.

diff  --git a/llvm/test/CodeGen/PowerPC/rldimi.ll b/llvm/test/CodeGen/PowerPC/rldimi.ll
index 78ea9aa862f2c2..4ce015849d9ea3 100644
--- a/llvm/test/CodeGen/PowerPC/rldimi.ll
+++ b/llvm/test/CodeGen/PowerPC/rldimi.ll
@@ -139,4 +139,158 @@ define i64 @rldimi11(i64 %a, i64 %b) {
   ret i64 %r
 }
 
+define i64 @rldimi12(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi12:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    rotldi 3, 3, 20
+; CHECK-NEXT:    rldimi 4, 3, 44, 31
+; CHECK-NEXT:    mr 3, 4
+; CHECK-NEXT:    blr
+  %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 0, i64 18446726490113441791)
+  ret i64 %r
+}
+
+define i64 @rldimi13(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi13:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    rotldi 3, 3, 62
+; CHECK-NEXT:    rldimi 4, 3, 32, 2
+; CHECK-NEXT:    mr 3, 4
+; CHECK-NEXT:    blr
+  %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 30, i64 4611686014132420608)
+  ret i64 %r
+}
+
+define i64 @rldimi14(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi14:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    rotldi 3, 3, 23
+; CHECK-NEXT:    rldimi 4, 3, 53, 0
+; CHECK-NEXT:    mr 3, 4
+; CHECK-NEXT:    blr
+  %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18437736874454810624) ; mb=0, me=10
+  ret i64 %r
+}
+
+define i64 @rldimi15(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi15:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    rotldi 3, 3, 36
+; CHECK-NEXT:    rldimi 4, 3, 40, 10
+; CHECK-NEXT:    mr 3, 4
+; CHECK-NEXT:    blr
+  %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18013298997854208) ; mb=10, me=23
+  ret i64 %r
+}
+
+define i64 @rldimi16(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    rotldi 3, 3, 57
+; CHECK-NEXT:    rldimi 4, 3, 19, 10
+; CHECK-NEXT:    mr 3, 4
+; CHECK-NEXT:    blr
+  %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18014398508957696) ; mb=10, me=44
+  ret i64 %r
+}
+
+define i64 @rldimi17(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi17:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    rotldi 3, 3, 43
+; CHECK-NEXT:    rldimi 4, 3, 33, 25
+; CHECK-NEXT:    mr 3, 4
+; CHECK-NEXT:    blr
+  %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 541165879296) ; mb=25, me=30
+  ret i64 %r
+}
+
+define i64 @rldimi18(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi18:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    rotldi 3, 3, 57
+; CHECK-NEXT:    rldimi 4, 3, 19, 25
+; CHECK-NEXT:    mr 3, 4
+; CHECK-NEXT:    blr
+  %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 549755289600) ; mb=25, me=44
+  ret i64 %r
+}
+
+define i64 @rldimi19(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi19:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    rotldi 3, 3, 57
+; CHECK-NEXT:    rldimi 4, 3, 19, 33
+; CHECK-NEXT:    mr 3, 4
+; CHECK-NEXT:    blr
+  %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 2146959360) ; mb=33, me=44
+  ret i64 %r
+}
+
+define i64 @rldimi20(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi20:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    rotldi 3, 3, 23
+; CHECK-NEXT:    rldimi 4, 3, 53, 15
+; CHECK-NEXT:    mr 3, 4
+; CHECK-NEXT:    blr
+  %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18438299824408231935) ; mb=15, me=10
+  ret i64 %r
+}
+
+define i64 @rldimi21(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi21:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    rotldi 3, 3, 23
+; CHECK-NEXT:    rldimi 4, 3, 53, 25
+; CHECK-NEXT:    mr 3, 4
+; CHECK-NEXT:    blr
+  %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18437737424210624511) ; mb=25, me=10
+  ret i64 %r
+}
+
+define i64 @rldimi22(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi22:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    rotldi 3, 3, 34
+; CHECK-NEXT:    rldimi 4, 3, 42, 25
+; CHECK-NEXT:    mr 3, 4
+; CHECK-NEXT:    blr
+  %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18446740225418854399) ; mb=25, me=21
+  ret i64 %r
+}
+
+define i64 @rldimi23(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi23:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    rotldi 3, 3, 23
+; CHECK-NEXT:    rldimi 4, 3, 53, 44
+; CHECK-NEXT:    mr 3, 4
+; CHECK-NEXT:    blr
+  %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18437736874455859199) ; mb=44, me=10
+  ret i64 %r
+}
+
+define i64 @rldimi24(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi24:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    rotldi 3, 3, 38
+; CHECK-NEXT:    rldimi 4, 3, 38, 44
+; CHECK-NEXT:    mr 3, 4
+; CHECK-NEXT:    blr
+  %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18446743798832693247) ; mb=44, me=25
+  ret i64 %r
+}
+
+define i64 @rldimi25(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi25:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    rotldi 3, 3, 48
+; CHECK-NEXT:    rldimi 4, 3, 28, 44
+; CHECK-NEXT:    mr 3, 4
+; CHECK-NEXT:    blr
+  %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18446744073442164735) ; mb=44, me=35
+  ret i64 %r
+}
+
 declare i64 @llvm.ppc.rldimi(i64, i64, i32 immarg, i64 immarg)


        


More information about the llvm-commits mailing list