[clang] [llvm] [PowerPC] Fix behavior of rldimi/rlwimi/rlwnm builtins (PR #85040)

Qiu Chaofan via cfe-commits cfe-commits at lists.llvm.org
Thu Mar 14 20:57:37 PDT 2024


https://github.com/ecnelises updated https://github.com/llvm/llvm-project/pull/85040

>From 4977659b16a7f220e1a738a0b9841102fe9f1d07 Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan at cn.ibm.com>
Date: Wed, 13 Mar 2024 15:46:51 +0800
Subject: [PATCH 1/2] [PowerPC] Fix behavior of rldimi/rlwimi/rlwnm builtins

rldimi is 64-bit instruction, so the corresponding builtin should not
be available in 32-bit mode. Rotate amount should be in range and
cases when mask is zero needs special handling.

This change also swaps the first and second operands of rldimi/rlwimi
to match previous behavior. For masks not ending at bit 63-SH,
rotation will be inserted before rldimi.
---
 clang/lib/Sema/SemaChecking.cpp               |  5 ++-
 .../PowerPC/builtins-ppc-xlcompat-error.c     |  7 ++++
 .../PowerPC/builtins-ppc-xlcompat-rotate.c    | 22 +++++++-----
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   | 35 +++++++++++++++----
 llvm/test/CodeGen/PowerPC/rlwimi.ll           |  3 +-
 5 files changed, 54 insertions(+), 18 deletions(-)

diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index a5f42b630c3fa2..b032ea1db344a8 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -4992,6 +4992,7 @@ static bool isPPC_64Builtin(unsigned BuiltinID) {
   case PPC::BI__builtin_ppc_fetch_and_andlp:
   case PPC::BI__builtin_ppc_fetch_and_orlp:
   case PPC::BI__builtin_ppc_fetch_and_swaplp:
+  case PPC::BI__builtin_ppc_rldimi:
     return true;
   }
   return false;
@@ -5093,8 +5094,10 @@ bool Sema::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
   case PPC::BI__builtin_ppc_rlwnm:
     return SemaValueIsRunOfOnes(TheCall, 2);
   case PPC::BI__builtin_ppc_rlwimi:
+    return SemaBuiltinConstantArgRange(TheCall, 2, 0, 31) ||
+           SemaValueIsRunOfOnes(TheCall, 3);
   case PPC::BI__builtin_ppc_rldimi:
-    return SemaBuiltinConstantArg(TheCall, 2, Result) ||
+    return SemaBuiltinConstantArgRange(TheCall, 2, 0, 63) ||
            SemaValueIsRunOfOnes(TheCall, 3);
   case PPC::BI__builtin_ppc_addex: {
     if (SemaBuiltinConstantArgRange(TheCall, 2, 0, 3))
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c
index 5f57d7575c859a..272e0222dc9e41 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c
@@ -24,13 +24,16 @@ void test_trap(void) {
   __tw(ia, ib, 0); //expected-error {{argument value 0 is outside the valid range [1, 31]}}
 }
 
+#ifdef __PPC64__
 void test_builtin_ppc_rldimi() {
   unsigned int shift;
   unsigned long long mask;
   unsigned long long res = __builtin_ppc_rldimi(ull, ull, shift, 7); // expected-error {{argument to '__builtin_ppc_rldimi' must be a constant integer}}
   res = __builtin_ppc_rldimi(ull, ull, 63, mask);                    // expected-error {{argument to '__builtin_ppc_rldimi' must be a constant integer}}
   res = __builtin_ppc_rldimi(ull, ull, 63, 0xFFFF000000000F00);      // expected-error {{argument 3 value should represent a contiguous bit field}}
+  res = __builtin_ppc_rldimi(ull, ull, 64, 0xFFFF000000000000);      // expected-error {{argument value 64 is outside the valid range [0, 63]}}
 }
+#endif
 
 void test_builtin_ppc_rlwimi() {
   unsigned int shift;
@@ -83,6 +86,10 @@ void testalignx(const void *pointer, unsigned int alignment) {
 }
 
 #ifndef __PPC64__
+unsigned long long testrldimi32() {
+  return __rldimi(ull, ui, 3, 0x7ffff8ULL); //expected-error {{this builtin is only available on 64-bit targets}}
+}
+
 long long testbpermd(long long bit_selector, long long source) {
   return __bpermd(bit_selector, source); //expected-error {{this builtin is only available on 64-bit targets}}
 }
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c
index b218547c00d931..4773d6cb1a0cfd 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c
@@ -1,8 +1,10 @@
 // REQUIRES: powerpc-registered-target
 // RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu \
-// RUN:   -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s
+// RUN:   -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s \
+// RUN:   -check-prefixes=PPC64,CHECK
 // RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu \
-// RUN:   -emit-llvm %s -o - -target-cpu pwr8 | FileCheck %s
+// RUN:   -emit-llvm %s -o - -target-cpu pwr8 | FileCheck %s \
+// RUN:   -check-prefixes=PPC64,CHECK
 // RUN: %clang_cc1 -triple powerpc-unknown-aix \
 // RUN:   -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s
 // RUN: %clang_cc1 -triple powerpc64-unknown-aix \
@@ -11,18 +13,20 @@
 extern unsigned int ui;
 extern unsigned long long ull;
 
+#ifdef __PPC64__
 void test_builtin_ppc_rldimi() {
-  // CHECK-LABEL: test_builtin_ppc_rldimi
-  // CHECK:       %res = alloca i64, align 8
-  // CHECK-NEXT:  [[RA:%[0-9]+]] = load i64, ptr @ull, align 8
-  // CHECK-NEXT:  [[RB:%[0-9]+]] = load i64, ptr @ull, align 8
-  // CHECK-NEXT:  [[RC:%[0-9]+]] = call i64 @llvm.ppc.rldimi(i64 [[RA]], i64 [[RB]], i32 63, i64 72057593769492480)
-  // CHECK-NEXT:  store i64 [[RC]], ptr %res, align 8
-  // CHECK-NEXT:  ret void
+  // PPC64-LABEL: test_builtin_ppc_rldimi
+  // PPC64:       %res = alloca i64, align 8
+  // PPC64-NEXT:  [[RA:%[0-9]+]] = load i64, ptr @ull, align 8
+  // PPC64-NEXT:  [[RB:%[0-9]+]] = load i64, ptr @ull, align 8
+  // PPC64-NEXT:  [[RC:%[0-9]+]] = call i64 @llvm.ppc.rldimi(i64 [[RA]], i64 [[RB]], i32 63, i64 72057593769492480)
+  // PPC64-NEXT:  store i64 [[RC]], ptr %res, align 8
+  // PPC64-NEXT:  ret void
 
   /*shift = 63, mask = 0x00FFFFFFF0000000 = 72057593769492480, ~mask = 0xFF0000000FFFFFFF = -72057593769492481*/
   unsigned long long res = __builtin_ppc_rldimi(ull, ull, 63, 0x00FFFFFFF0000000);
 }
+#endif
 
 void test_builtin_ppc_rlwimi() {
   // CHECK-LABEL: test_builtin_ppc_rlwimi
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 68c80dd9aa5c76..306a04f47ee84d 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -10764,30 +10764,51 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     return DAG.getRegister(PPC::R2, MVT::i32);
 
   case Intrinsic::ppc_rldimi: {
+    assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
+    if (Op.getConstantOperandVal(4) == 0)
+      return Op.getOperand(2);
     uint64_t SH = Op.getConstantOperandVal(3);
     unsigned MB = 0, ME = 0;
-    if (!isRunOfOnes64(Op.getConstantOperandVal(4), MB, ME) || ME != 63 - SH)
+    if (!isRunOfOnes64(Op.getConstantOperandVal(4), MB, ME))
       report_fatal_error("invalid rldimi mask!");
-    return SDValue(DAG.getMachineNode(
-                       PPC::RLDIMI, dl, MVT::i64,
-                       {Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
-                        DAG.getTargetConstant(MB, dl, MVT::i32)}),
-                   0);
+
+    // For all-one mask, MB will be set to 0, adjust it next to 63-SH.
+    if (MB == 0 && ME == 63 && SH != 0)
+      MB = 64 - SH;
+    SDValue Src = Op.getOperand(1);
+    // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
+    if (ME < 63 - SH) {
+      Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
+                        DAG.getConstant(ME + SH + 1, dl, MVT::i32));
+    } else if (ME > 63 - SH) {
+      Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
+                        DAG.getConstant(ME + SH - 63, dl, MVT::i32));
+    }
+    return SDValue(
+        DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
+                           {Op.getOperand(2), Src,
+                            DAG.getTargetConstant(63 - ME, dl, MVT::i32),
+                            DAG.getTargetConstant(MB, dl, MVT::i32)}),
+        0);
   }
 
   case Intrinsic::ppc_rlwimi: {
+    if (Op.getConstantOperandVal(4) == 0)
+      return Op.getOperand(2);
     unsigned MB = 0, ME = 0;
     if (!isRunOfOnes(Op.getConstantOperandVal(4), MB, ME))
       report_fatal_error("invalid rlwimi mask!");
     return SDValue(DAG.getMachineNode(
                        PPC::RLWIMI, dl, MVT::i32,
-                       {Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
+                       {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
                         DAG.getTargetConstant(MB, dl, MVT::i32),
                         DAG.getTargetConstant(ME, dl, MVT::i32)}),
                    0);
   }
 
   case Intrinsic::ppc_rlwnm: {
+    if (Op.getConstantOperandVal(3) == 0)
+      return DAG.getConstant(0, dl, MVT::i32);
     unsigned MB = 0, ME = 0;
     if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
       report_fatal_error("invalid rlwnm mask!");
diff --git a/llvm/test/CodeGen/PowerPC/rlwimi.ll b/llvm/test/CodeGen/PowerPC/rlwimi.ll
index 8b126cd3393c10..b7a2ded7bd8df4 100644
--- a/llvm/test/CodeGen/PowerPC/rlwimi.ll
+++ b/llvm/test/CodeGen/PowerPC/rlwimi.ll
@@ -107,7 +107,8 @@ entry:
 define i32 @test9(i32 %a, i32 %b) {
 ; CHECK-LABEL: test9:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    rlwimi 3, 4, 8, 20, 26
+; CHECK-NEXT:    rlwimi 4, 3, 8, 20, 26
+; CHECK-NEXT:    mr 3, 4
 ; CHECK-NEXT:    blr
 entry:
   %r = call i32 @llvm.ppc.rlwimi(i32 %a, i32 %b, i32 8, i32 4064)

>From 15854d47fe394244ec799230d4d5f55ca55679a8 Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan at cn.ibm.com>
Date: Fri, 15 Mar 2024 11:57:21 +0800
Subject: [PATCH 2/2] Add CodeGen tests

---
 llvm/test/CodeGen/PowerPC/rldimi.ll | 64 ++++++++++++++++++++++++++++-
 llvm/test/CodeGen/PowerPC/rlwimi.ll | 42 +++++++++++++++++++
 llvm/test/CodeGen/PowerPC/rlwinm.ll | 20 +++++++++
 3 files changed, 124 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/PowerPC/rldimi.ll b/llvm/test/CodeGen/PowerPC/rldimi.ll
index 322975f547c996..e3068de53af143 100644
--- a/llvm/test/CodeGen/PowerPC/rldimi.ll
+++ b/llvm/test/CodeGen/PowerPC/rldimi.ll
@@ -59,8 +59,8 @@ entry:
   ret i64 %8
 }
 
-define i64 @rldimi_intrinsic(i64 %a) {
-; CHECK-LABEL: rldimi_intrinsic:
+define i64 @rldimi4(i64 %a) {
+; CHECK-LABEL: rldimi4:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    rldimi 3, 3, 8, 0
 ; CHECK-NEXT:    rldimi 3, 3, 16, 0
@@ -72,4 +72,64 @@ define i64 @rldimi_intrinsic(i64 %a) {
   ret i64 %r3
 }
 
+define i64 @rldimi5(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi5:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    rldimi 4, 3, 8, 40
+; CHECK-NEXT:    mr 3, 4
+; CHECK-NEXT:    blr
+  %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 8, i64 16776960) ; 0xffff << 8
+  ret i64 %r
+}
+
+define i64 @rldimi6(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi6:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    rotldi 3, 3, 1
+; CHECK-NEXT:    rldimi 4, 3, 7, 41
+; CHECK-NEXT:    mr 3, 4
+; CHECK-NEXT:    blr
+  %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 8, i64 8388480) ; 0xffff << 7
+  ret i64 %r
+}
+
+define i64 @rldimi7(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi7:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    rotldi 3, 3, 63
+; CHECK-NEXT:    rldimi 4, 3, 9, 39
+; CHECK-NEXT:    mr 3, 4
+; CHECK-NEXT:    blr
+  %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 8, i64 33553920) ; 0xffff << 9
+  ret i64 %r
+}
+
+define i64 @rldimi8(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mr 3, 4
+; CHECK-NEXT:    blr
+  %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 0, i64 0)
+  ret i64 %r
+}
+
+define i64 @rldimi9(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi9:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mr 3, 4
+; CHECK-NEXT:    blr
+  %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 63, i64 0)
+  ret i64 %r
+}
+
+define i64 @rldimi10(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi10:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    rldimi 4, 3, 0, 0
+; CHECK-NEXT:    mr 3, 4
+; CHECK-NEXT:    blr
+  %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 0, i64 -1)
+  ret i64 %r
+}
+
 declare i64 @llvm.ppc.rldimi(i64, i64, i32 immarg, i64 immarg)
diff --git a/llvm/test/CodeGen/PowerPC/rlwimi.ll b/llvm/test/CodeGen/PowerPC/rlwimi.ll
index b7a2ded7bd8df4..cce217d8fb56ed 100644
--- a/llvm/test/CodeGen/PowerPC/rlwimi.ll
+++ b/llvm/test/CodeGen/PowerPC/rlwimi.ll
@@ -115,4 +115,46 @@ entry:
   ret i32 %r
 }
 
+define i32 @test10(i32 %a, i32 %b) {
+; CHECK-LABEL: test10:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rlwimi 4, 3, 0, 0, 31
+; CHECK-NEXT:    mr 3, 4
+; CHECK-NEXT:    blr
+entry:
+  %r = call i32 @llvm.ppc.rlwimi(i32 %a, i32 %b, i32 0, i32 -1)
+  ret i32 %r
+}
+
+define i32 @test11(i32 %a, i32 %b) {
+; CHECK-LABEL: test11:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rlwimi 4, 3, 8, 0, 31
+; CHECK-NEXT:    mr 3, 4
+; CHECK-NEXT:    blr
+entry:
+  %r = call i32 @llvm.ppc.rlwimi(i32 %a, i32 %b, i32 8, i32 -1)
+  ret i32 %r
+}
+
+define i32 @test12(i32 %a, i32 %b) {
+; CHECK-LABEL: test12:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mr 3, 4
+; CHECK-NEXT:    blr
+entry:
+  %r = call i32 @llvm.ppc.rlwimi(i32 %a, i32 %b, i32 0, i32 0)
+  ret i32 %r
+}
+
+define i32 @test13(i32 %a, i32 %b) {
+; CHECK-LABEL: test13:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rlwimi 3, 4, 0, 27, 19
+; CHECK-NEXT:    blr
+entry:
+  %r = call i32 @llvm.ppc.rlwimi(i32 %a, i32 %b, i32 0, i32 4064)
+  ret i32 %r
+}
+
 declare i32 @llvm.ppc.rlwimi(i32, i32, i32 immarg, i32 immarg)
diff --git a/llvm/test/CodeGen/PowerPC/rlwinm.ll b/llvm/test/CodeGen/PowerPC/rlwinm.ll
index c6d4e5bb000040..363eb171276566 100644
--- a/llvm/test/CodeGen/PowerPC/rlwinm.ll
+++ b/llvm/test/CodeGen/PowerPC/rlwinm.ll
@@ -97,4 +97,24 @@ entry:
   ret i32 %r
 }
 
+define i32 @test10(i32 %a, i32 %s) {
+; CHECK-LABEL: test10:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:    blr
+entry:
+  %r = call i32 @llvm.ppc.rlwnm(i32 %a, i32 %s, i32 0)
+  ret i32 %r
+}
+
+define i32 @test11(i32 %a, i32 %s) {
+; CHECK-LABEL: test11:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rotlw 3, 3, 4
+; CHECK-NEXT:    blr
+entry:
+  %r = call i32 @llvm.ppc.rlwnm(i32 %a, i32 %s, i32 -1)
+  ret i32 %r
+}
+
 declare i32 @llvm.ppc.rlwnm(i32, i32, i32 immarg)



More information about the cfe-commits mailing list