[clang] 65ae09e - [PowerPC] Fix behavior of rldimi/rlwimi/rlwnm builtins (#85040)

via cfe-commits cfe-commits at lists.llvm.org
Sun Mar 17 23:17:19 PDT 2024


Author: Qiu Chaofan
Date: 2024-03-18T14:17:16+08:00
New Revision: 65ae09eeb6773b14189fc67051870c8fc4eb9ae3

URL: https://github.com/llvm/llvm-project/commit/65ae09eeb6773b14189fc67051870c8fc4eb9ae3
DIFF: https://github.com/llvm/llvm-project/commit/65ae09eeb6773b14189fc67051870c8fc4eb9ae3.diff

LOG: [PowerPC] Fix behavior of rldimi/rlwimi/rlwnm builtins (#85040)

rldimi is 64-bit instruction, so the corresponding builtin should not
be available in 32-bit mode. Rotate amount should be in range and
cases when mask is zero needs special handling.

This change also swaps the first and second operands of rldimi/rlwimi
to match previous behavior. For masks not ending at bit 63-SH,
rotation will be inserted before rldimi.

Added: 
    

Modified: 
    clang/lib/Sema/SemaChecking.cpp
    clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c
    clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/test/CodeGen/PowerPC/rldimi.ll
    llvm/test/CodeGen/PowerPC/rlwimi.ll
    llvm/test/CodeGen/PowerPC/rlwinm.ll

Removed: 
    


################################################################################
diff  --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 46b783862e4e42..de544a03b29846 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -5189,6 +5189,7 @@ static bool isPPC_64Builtin(unsigned BuiltinID) {
   case PPC::BI__builtin_ppc_fetch_and_andlp:
   case PPC::BI__builtin_ppc_fetch_and_orlp:
   case PPC::BI__builtin_ppc_fetch_and_swaplp:
+  case PPC::BI__builtin_ppc_rldimi:
     return true;
   }
   return false;
@@ -5290,8 +5291,10 @@ bool Sema::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
   case PPC::BI__builtin_ppc_rlwnm:
     return SemaValueIsRunOfOnes(TheCall, 2);
   case PPC::BI__builtin_ppc_rlwimi:
+    return SemaBuiltinConstantArgRange(TheCall, 2, 0, 31) ||
+           SemaValueIsRunOfOnes(TheCall, 3);
   case PPC::BI__builtin_ppc_rldimi:
-    return SemaBuiltinConstantArg(TheCall, 2, Result) ||
+    return SemaBuiltinConstantArgRange(TheCall, 2, 0, 63) ||
            SemaValueIsRunOfOnes(TheCall, 3);
   case PPC::BI__builtin_ppc_addex: {
     if (SemaBuiltinConstantArgRange(TheCall, 2, 0, 3))

diff  --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c
index 5f57d7575c859a..272e0222dc9e41 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c
@@ -24,13 +24,16 @@ void test_trap(void) {
   __tw(ia, ib, 0); //expected-error {{argument value 0 is outside the valid range [1, 31]}}
 }
 
+#ifdef __PPC64__
 void test_builtin_ppc_rldimi() {
   unsigned int shift;
   unsigned long long mask;
   unsigned long long res = __builtin_ppc_rldimi(ull, ull, shift, 7); // expected-error {{argument to '__builtin_ppc_rldimi' must be a constant integer}}
   res = __builtin_ppc_rldimi(ull, ull, 63, mask);                    // expected-error {{argument to '__builtin_ppc_rldimi' must be a constant integer}}
   res = __builtin_ppc_rldimi(ull, ull, 63, 0xFFFF000000000F00);      // expected-error {{argument 3 value should represent a contiguous bit field}}
+  res = __builtin_ppc_rldimi(ull, ull, 64, 0xFFFF000000000000);      // expected-error {{argument value 64 is outside the valid range [0, 63]}}
 }
+#endif
 
 void test_builtin_ppc_rlwimi() {
   unsigned int shift;
@@ -83,6 +86,10 @@ void testalignx(const void *pointer, unsigned int alignment) {
 }
 
 #ifndef __PPC64__
+unsigned long long testrldimi32() {
+  return __rldimi(ull, ui, 3, 0x7ffff8ULL); //expected-error {{this builtin is only available on 64-bit targets}}
+}
+
 long long testbpermd(long long bit_selector, long long source) {
   return __bpermd(bit_selector, source); //expected-error {{this builtin is only available on 64-bit targets}}
 }

diff  --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c
index b218547c00d931..4773d6cb1a0cfd 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c
@@ -1,8 +1,10 @@
 // REQUIRES: powerpc-registered-target
 // RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu \
-// RUN:   -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s
+// RUN:   -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s \
+// RUN:   -check-prefixes=PPC64,CHECK
 // RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu \
-// RUN:   -emit-llvm %s -o - -target-cpu pwr8 | FileCheck %s
+// RUN:   -emit-llvm %s -o - -target-cpu pwr8 | FileCheck %s \
+// RUN:   -check-prefixes=PPC64,CHECK
 // RUN: %clang_cc1 -triple powerpc-unknown-aix \
 // RUN:   -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s
 // RUN: %clang_cc1 -triple powerpc64-unknown-aix \
@@ -11,18 +13,20 @@
 extern unsigned int ui;
 extern unsigned long long ull;
 
+#ifdef __PPC64__
 void test_builtin_ppc_rldimi() {
-  // CHECK-LABEL: test_builtin_ppc_rldimi
-  // CHECK:       %res = alloca i64, align 8
-  // CHECK-NEXT:  [[RA:%[0-9]+]] = load i64, ptr @ull, align 8
-  // CHECK-NEXT:  [[RB:%[0-9]+]] = load i64, ptr @ull, align 8
-  // CHECK-NEXT:  [[RC:%[0-9]+]] = call i64 @llvm.ppc.rldimi(i64 [[RA]], i64 [[RB]], i32 63, i64 72057593769492480)
-  // CHECK-NEXT:  store i64 [[RC]], ptr %res, align 8
-  // CHECK-NEXT:  ret void
+  // PPC64-LABEL: test_builtin_ppc_rldimi
+  // PPC64:       %res = alloca i64, align 8
+  // PPC64-NEXT:  [[RA:%[0-9]+]] = load i64, ptr @ull, align 8
+  // PPC64-NEXT:  [[RB:%[0-9]+]] = load i64, ptr @ull, align 8
+  // PPC64-NEXT:  [[RC:%[0-9]+]] = call i64 @llvm.ppc.rldimi(i64 [[RA]], i64 [[RB]], i32 63, i64 72057593769492480)
+  // PPC64-NEXT:  store i64 [[RC]], ptr %res, align 8
+  // PPC64-NEXT:  ret void
 
   /*shift = 63, mask = 0x00FFFFFFF0000000 = 72057593769492480, ~mask = 0xFF0000000FFFFFFF = -72057593769492481*/
   unsigned long long res = __builtin_ppc_rldimi(ull, ull, 63, 0x00FFFFFFF0000000);
 }
+#endif
 
 void test_builtin_ppc_rlwimi() {
   // CHECK-LABEL: test_builtin_ppc_rlwimi

diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 16a0a4722ad484..28e8523436bbfe 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -10764,30 +10764,54 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     return DAG.getRegister(PPC::R2, MVT::i32);
 
   case Intrinsic::ppc_rldimi: {
+    assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
+    SDValue Src = Op.getOperand(1);
+    APInt Mask = Op.getConstantOperandAPInt(4);
+    if (Mask.isZero())
+      return Op.getOperand(2);
+    if (Mask.isAllOnes())
+      return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
     uint64_t SH = Op.getConstantOperandVal(3);
     unsigned MB = 0, ME = 0;
-    if (!isRunOfOnes64(Op.getConstantOperandVal(4), MB, ME) || ME != 63 - SH)
+    if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
       report_fatal_error("invalid rldimi mask!");
-    return SDValue(DAG.getMachineNode(
-                       PPC::RLDIMI, dl, MVT::i64,
-                       {Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
-                        DAG.getTargetConstant(MB, dl, MVT::i32)}),
-                   0);
+    // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
+    if (ME < 63 - SH) {
+      Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
+                        DAG.getConstant(ME + SH + 1, dl, MVT::i32));
+    } else if (ME > 63 - SH) {
+      Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
+                        DAG.getConstant(ME + SH - 63, dl, MVT::i32));
+    }
+    return SDValue(
+        DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
+                           {Op.getOperand(2), Src,
+                            DAG.getTargetConstant(63 - ME, dl, MVT::i32),
+                            DAG.getTargetConstant(MB, dl, MVT::i32)}),
+        0);
   }
 
   case Intrinsic::ppc_rlwimi: {
+    APInt Mask = Op.getConstantOperandAPInt(4);
+    if (Mask.isZero())
+      return Op.getOperand(2);
+    if (Mask.isAllOnes())
+      return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
+                         Op.getOperand(3));
     unsigned MB = 0, ME = 0;
-    if (!isRunOfOnes(Op.getConstantOperandVal(4), MB, ME))
+    if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
       report_fatal_error("invalid rlwimi mask!");
     return SDValue(DAG.getMachineNode(
                        PPC::RLWIMI, dl, MVT::i32,
-                       {Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
+                       {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
                         DAG.getTargetConstant(MB, dl, MVT::i32),
                         DAG.getTargetConstant(ME, dl, MVT::i32)}),
                    0);
   }
 
   case Intrinsic::ppc_rlwnm: {
+    if (Op.getConstantOperandVal(3) == 0)
+      return DAG.getConstant(0, dl, MVT::i32);
     unsigned MB = 0, ME = 0;
     if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
       report_fatal_error("invalid rlwnm mask!");

diff  --git a/llvm/test/CodeGen/PowerPC/rldimi.ll b/llvm/test/CodeGen/PowerPC/rldimi.ll
index 322975f547c996..78ea9aa862f2c2 100644
--- a/llvm/test/CodeGen/PowerPC/rldimi.ll
+++ b/llvm/test/CodeGen/PowerPC/rldimi.ll
@@ -59,8 +59,8 @@ entry:
   ret i64 %8
 }
 
-define i64 @rldimi_intrinsic(i64 %a) {
-; CHECK-LABEL: rldimi_intrinsic:
+define i64 @rldimi4(i64 %a) {
+; CHECK-LABEL: rldimi4:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    rldimi 3, 3, 8, 0
 ; CHECK-NEXT:    rldimi 3, 3, 16, 0
@@ -72,4 +72,71 @@ define i64 @rldimi_intrinsic(i64 %a) {
   ret i64 %r3
 }
 
+define i64 @rldimi5(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi5:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    rldimi 4, 3, 8, 40
+; CHECK-NEXT:    mr 3, 4
+; CHECK-NEXT:    blr
+  %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 8, i64 16776960) ; 0xffff << 8
+  ret i64 %r
+}
+
+define i64 @rldimi6(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi6:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    rotldi 3, 3, 1
+; CHECK-NEXT:    rldimi 4, 3, 7, 41
+; CHECK-NEXT:    mr 3, 4
+; CHECK-NEXT:    blr
+  %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 8, i64 8388480) ; 0xffff << 7
+  ret i64 %r
+}
+
+define i64 @rldimi7(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi7:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    rotldi 3, 3, 63
+; CHECK-NEXT:    rldimi 4, 3, 9, 39
+; CHECK-NEXT:    mr 3, 4
+; CHECK-NEXT:    blr
+  %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 8, i64 33553920) ; 0xffff << 9
+  ret i64 %r
+}
+
+define i64 @rldimi8(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mr 3, 4
+; CHECK-NEXT:    blr
+  %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 0, i64 0)
+  ret i64 %r
+}
+
+define i64 @rldimi9(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi9:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mr 3, 4
+; CHECK-NEXT:    blr
+  %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 63, i64 0)
+  ret i64 %r
+}
+
+define i64 @rldimi10(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi10:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 0, i64 -1)
+  ret i64 %r
+}
+
+define i64 @rldimi11(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi11:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    rotldi 3, 3, 8
+; CHECK-NEXT:    blr
+  %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 8, i64 -1)
+  ret i64 %r
+}
+
 declare i64 @llvm.ppc.rldimi(i64, i64, i32 immarg, i64 immarg)

diff  --git a/llvm/test/CodeGen/PowerPC/rlwimi.ll b/llvm/test/CodeGen/PowerPC/rlwimi.ll
index 8b126cd3393c10..8da769508c9ae8 100644
--- a/llvm/test/CodeGen/PowerPC/rlwimi.ll
+++ b/llvm/test/CodeGen/PowerPC/rlwimi.ll
@@ -107,11 +107,51 @@ entry:
 define i32 @test9(i32 %a, i32 %b) {
 ; CHECK-LABEL: test9:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    rlwimi 3, 4, 8, 20, 26
+; CHECK-NEXT:    rlwimi 4, 3, 8, 20, 26
+; CHECK-NEXT:    mr 3, 4
 ; CHECK-NEXT:    blr
 entry:
   %r = call i32 @llvm.ppc.rlwimi(i32 %a, i32 %b, i32 8, i32 4064)
   ret i32 %r
 }
 
+define i32 @test10(i32 %a, i32 %b) {
+; CHECK-LABEL: test10:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    blr
+entry:
+  %r = call i32 @llvm.ppc.rlwimi(i32 %a, i32 %b, i32 0, i32 -1)
+  ret i32 %r
+}
+
+define i32 @test11(i32 %a, i32 %b) {
+; CHECK-LABEL: test11:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rotlwi 3, 3, 8
+; CHECK-NEXT:    blr
+entry:
+  %r = call i32 @llvm.ppc.rlwimi(i32 %a, i32 %b, i32 8, i32 -1)
+  ret i32 %r
+}
+
+define i32 @test12(i32 %a, i32 %b) {
+; CHECK-LABEL: test12:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mr 3, 4
+; CHECK-NEXT:    blr
+entry:
+  %r = call i32 @llvm.ppc.rlwimi(i32 %a, i32 %b, i32 0, i32 0)
+  ret i32 %r
+}
+
+define i32 @test13(i32 %a, i32 %b) {
+; CHECK-LABEL: test13:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rlwimi 3, 4, 0, 27, 19
+; CHECK-NEXT:    blr
+entry:
+  %r = call i32 @llvm.ppc.rlwimi(i32 %a, i32 %b, i32 0, i32 4064)
+  ret i32 %r
+}
+
 declare i32 @llvm.ppc.rlwimi(i32, i32, i32 immarg, i32 immarg)

diff  --git a/llvm/test/CodeGen/PowerPC/rlwinm.ll b/llvm/test/CodeGen/PowerPC/rlwinm.ll
index c6d4e5bb000040..363eb171276566 100644
--- a/llvm/test/CodeGen/PowerPC/rlwinm.ll
+++ b/llvm/test/CodeGen/PowerPC/rlwinm.ll
@@ -97,4 +97,24 @@ entry:
   ret i32 %r
 }
 
+define i32 @test10(i32 %a, i32 %s) {
+; CHECK-LABEL: test10:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:    blr
+entry:
+  %r = call i32 @llvm.ppc.rlwnm(i32 %a, i32 %s, i32 0)
+  ret i32 %r
+}
+
+define i32 @test11(i32 %a, i32 %s) {
+; CHECK-LABEL: test11:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rotlw 3, 3, 4
+; CHECK-NEXT:    blr
+entry:
+  %r = call i32 @llvm.ppc.rlwnm(i32 %a, i32 %s, i32 -1)
+  ret i32 %r
+}
+
 declare i32 @llvm.ppc.rlwnm(i32, i32, i32 immarg)


        


More information about the cfe-commits mailing list