[clang] [llvm] [PowerPC] Fix behavior of rldimi/rlwimi/rlwnm builtins (PR #85040)
Qiu Chaofan via cfe-commits
cfe-commits at lists.llvm.org
Thu Mar 14 20:57:37 PDT 2024
https://github.com/ecnelises updated https://github.com/llvm/llvm-project/pull/85040
>From 4977659b16a7f220e1a738a0b9841102fe9f1d07 Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan at cn.ibm.com>
Date: Wed, 13 Mar 2024 15:46:51 +0800
Subject: [PATCH 1/2] [PowerPC] Fix behavior of rldimi/rlwimi/rlwnm builtins
rldimi is 64-bit instruction, so the corresponding builtin should not
be available in 32-bit mode. Rotate amount should be in range and
cases when mask is zero needs special handling.
This change also swaps the first and second operands of rldimi/rlwimi
to match previous behavior. For masks not ending at bit 63-SH,
rotation will be inserted before rldimi.
---
clang/lib/Sema/SemaChecking.cpp | 5 ++-
.../PowerPC/builtins-ppc-xlcompat-error.c | 7 ++++
.../PowerPC/builtins-ppc-xlcompat-rotate.c | 22 +++++++-----
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 35 +++++++++++++++----
llvm/test/CodeGen/PowerPC/rlwimi.ll | 3 +-
5 files changed, 54 insertions(+), 18 deletions(-)
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index a5f42b630c3fa2..b032ea1db344a8 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -4992,6 +4992,7 @@ static bool isPPC_64Builtin(unsigned BuiltinID) {
case PPC::BI__builtin_ppc_fetch_and_andlp:
case PPC::BI__builtin_ppc_fetch_and_orlp:
case PPC::BI__builtin_ppc_fetch_and_swaplp:
+ case PPC::BI__builtin_ppc_rldimi:
return true;
}
return false;
@@ -5093,8 +5094,10 @@ bool Sema::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
case PPC::BI__builtin_ppc_rlwnm:
return SemaValueIsRunOfOnes(TheCall, 2);
case PPC::BI__builtin_ppc_rlwimi:
+ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 31) ||
+ SemaValueIsRunOfOnes(TheCall, 3);
case PPC::BI__builtin_ppc_rldimi:
- return SemaBuiltinConstantArg(TheCall, 2, Result) ||
+ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 63) ||
SemaValueIsRunOfOnes(TheCall, 3);
case PPC::BI__builtin_ppc_addex: {
if (SemaBuiltinConstantArgRange(TheCall, 2, 0, 3))
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c
index 5f57d7575c859a..272e0222dc9e41 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c
@@ -24,13 +24,16 @@ void test_trap(void) {
__tw(ia, ib, 0); //expected-error {{argument value 0 is outside the valid range [1, 31]}}
}
+#ifdef __PPC64__
void test_builtin_ppc_rldimi() {
unsigned int shift;
unsigned long long mask;
unsigned long long res = __builtin_ppc_rldimi(ull, ull, shift, 7); // expected-error {{argument to '__builtin_ppc_rldimi' must be a constant integer}}
res = __builtin_ppc_rldimi(ull, ull, 63, mask); // expected-error {{argument to '__builtin_ppc_rldimi' must be a constant integer}}
res = __builtin_ppc_rldimi(ull, ull, 63, 0xFFFF000000000F00); // expected-error {{argument 3 value should represent a contiguous bit field}}
+ res = __builtin_ppc_rldimi(ull, ull, 64, 0xFFFF000000000000); // expected-error {{argument value 64 is outside the valid range [0, 63]}}
}
+#endif
void test_builtin_ppc_rlwimi() {
unsigned int shift;
@@ -83,6 +86,10 @@ void testalignx(const void *pointer, unsigned int alignment) {
}
#ifndef __PPC64__
+unsigned long long testrldimi32() {
+ return __rldimi(ull, ui, 3, 0x7ffff8ULL); //expected-error {{this builtin is only available on 64-bit targets}}
+}
+
long long testbpermd(long long bit_selector, long long source) {
return __bpermd(bit_selector, source); //expected-error {{this builtin is only available on 64-bit targets}}
}
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c
index b218547c00d931..4773d6cb1a0cfd 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c
@@ -1,8 +1,10 @@
// REQUIRES: powerpc-registered-target
// RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu \
-// RUN: -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s
+// RUN: -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s \
+// RUN: -check-prefixes=PPC64,CHECK
// RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu \
-// RUN: -emit-llvm %s -o - -target-cpu pwr8 | FileCheck %s
+// RUN: -emit-llvm %s -o - -target-cpu pwr8 | FileCheck %s \
+// RUN: -check-prefixes=PPC64,CHECK
// RUN: %clang_cc1 -triple powerpc-unknown-aix \
// RUN: -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s
// RUN: %clang_cc1 -triple powerpc64-unknown-aix \
@@ -11,18 +13,20 @@
extern unsigned int ui;
extern unsigned long long ull;
+#ifdef __PPC64__
void test_builtin_ppc_rldimi() {
- // CHECK-LABEL: test_builtin_ppc_rldimi
- // CHECK: %res = alloca i64, align 8
- // CHECK-NEXT: [[RA:%[0-9]+]] = load i64, ptr @ull, align 8
- // CHECK-NEXT: [[RB:%[0-9]+]] = load i64, ptr @ull, align 8
- // CHECK-NEXT: [[RC:%[0-9]+]] = call i64 @llvm.ppc.rldimi(i64 [[RA]], i64 [[RB]], i32 63, i64 72057593769492480)
- // CHECK-NEXT: store i64 [[RC]], ptr %res, align 8
- // CHECK-NEXT: ret void
+ // PPC64-LABEL: test_builtin_ppc_rldimi
+ // PPC64: %res = alloca i64, align 8
+ // PPC64-NEXT: [[RA:%[0-9]+]] = load i64, ptr @ull, align 8
+ // PPC64-NEXT: [[RB:%[0-9]+]] = load i64, ptr @ull, align 8
+ // PPC64-NEXT: [[RC:%[0-9]+]] = call i64 @llvm.ppc.rldimi(i64 [[RA]], i64 [[RB]], i32 63, i64 72057593769492480)
+ // PPC64-NEXT: store i64 [[RC]], ptr %res, align 8
+ // PPC64-NEXT: ret void
/*shift = 63, mask = 0x00FFFFFFF0000000 = 72057593769492480, ~mask = 0xFF0000000FFFFFFF = -72057593769492481*/
unsigned long long res = __builtin_ppc_rldimi(ull, ull, 63, 0x00FFFFFFF0000000);
}
+#endif
void test_builtin_ppc_rlwimi() {
// CHECK-LABEL: test_builtin_ppc_rlwimi
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 68c80dd9aa5c76..306a04f47ee84d 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -10764,30 +10764,51 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getRegister(PPC::R2, MVT::i32);
case Intrinsic::ppc_rldimi: {
+ assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
+ if (Op.getConstantOperandVal(4) == 0)
+ return Op.getOperand(2);
uint64_t SH = Op.getConstantOperandVal(3);
unsigned MB = 0, ME = 0;
- if (!isRunOfOnes64(Op.getConstantOperandVal(4), MB, ME) || ME != 63 - SH)
+ if (!isRunOfOnes64(Op.getConstantOperandVal(4), MB, ME))
report_fatal_error("invalid rldimi mask!");
- return SDValue(DAG.getMachineNode(
- PPC::RLDIMI, dl, MVT::i64,
- {Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
- DAG.getTargetConstant(MB, dl, MVT::i32)}),
- 0);
+
+ // For all-one mask, MB will be set to 0, adjust it next to 63-SH.
+ if (MB == 0 && ME == 63 && SH != 0)
+ MB = 64 - SH;
+ SDValue Src = Op.getOperand(1);
+ // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
+ if (ME < 63 - SH) {
+ Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
+ DAG.getConstant(ME + SH + 1, dl, MVT::i32));
+ } else if (ME > 63 - SH) {
+ Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
+ DAG.getConstant(ME + SH - 63, dl, MVT::i32));
+ }
+ return SDValue(
+ DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
+ {Op.getOperand(2), Src,
+ DAG.getTargetConstant(63 - ME, dl, MVT::i32),
+ DAG.getTargetConstant(MB, dl, MVT::i32)}),
+ 0);
}
case Intrinsic::ppc_rlwimi: {
+ if (Op.getConstantOperandVal(4) == 0)
+ return Op.getOperand(2);
unsigned MB = 0, ME = 0;
if (!isRunOfOnes(Op.getConstantOperandVal(4), MB, ME))
report_fatal_error("invalid rlwimi mask!");
return SDValue(DAG.getMachineNode(
PPC::RLWIMI, dl, MVT::i32,
- {Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
+ {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
DAG.getTargetConstant(MB, dl, MVT::i32),
DAG.getTargetConstant(ME, dl, MVT::i32)}),
0);
}
case Intrinsic::ppc_rlwnm: {
+ if (Op.getConstantOperandVal(3) == 0)
+ return DAG.getConstant(0, dl, MVT::i32);
unsigned MB = 0, ME = 0;
if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
report_fatal_error("invalid rlwnm mask!");
diff --git a/llvm/test/CodeGen/PowerPC/rlwimi.ll b/llvm/test/CodeGen/PowerPC/rlwimi.ll
index 8b126cd3393c10..b7a2ded7bd8df4 100644
--- a/llvm/test/CodeGen/PowerPC/rlwimi.ll
+++ b/llvm/test/CodeGen/PowerPC/rlwimi.ll
@@ -107,7 +107,8 @@ entry:
define i32 @test9(i32 %a, i32 %b) {
; CHECK-LABEL: test9:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: rlwimi 3, 4, 8, 20, 26
+; CHECK-NEXT: rlwimi 4, 3, 8, 20, 26
+; CHECK-NEXT: mr 3, 4
; CHECK-NEXT: blr
entry:
%r = call i32 @llvm.ppc.rlwimi(i32 %a, i32 %b, i32 8, i32 4064)
>From 15854d47fe394244ec799230d4d5f55ca55679a8 Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan at cn.ibm.com>
Date: Fri, 15 Mar 2024 11:57:21 +0800
Subject: [PATCH 2/2] Add CodeGen tests
---
llvm/test/CodeGen/PowerPC/rldimi.ll | 64 ++++++++++++++++++++++++++++-
llvm/test/CodeGen/PowerPC/rlwimi.ll | 42 +++++++++++++++++++
llvm/test/CodeGen/PowerPC/rlwinm.ll | 20 +++++++++
3 files changed, 124 insertions(+), 2 deletions(-)
diff --git a/llvm/test/CodeGen/PowerPC/rldimi.ll b/llvm/test/CodeGen/PowerPC/rldimi.ll
index 322975f547c996..e3068de53af143 100644
--- a/llvm/test/CodeGen/PowerPC/rldimi.ll
+++ b/llvm/test/CodeGen/PowerPC/rldimi.ll
@@ -59,8 +59,8 @@ entry:
ret i64 %8
}
-define i64 @rldimi_intrinsic(i64 %a) {
-; CHECK-LABEL: rldimi_intrinsic:
+define i64 @rldimi4(i64 %a) {
+; CHECK-LABEL: rldimi4:
; CHECK: # %bb.0:
; CHECK-NEXT: rldimi 3, 3, 8, 0
; CHECK-NEXT: rldimi 3, 3, 16, 0
@@ -72,4 +72,64 @@ define i64 @rldimi_intrinsic(i64 %a) {
ret i64 %r3
}
+define i64 @rldimi5(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: rldimi 4, 3, 8, 40
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
+ %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 8, i64 16776960) ; 0xffff << 8
+ ret i64 %r
+}
+
+define i64 @rldimi6(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: rotldi 3, 3, 1
+; CHECK-NEXT: rldimi 4, 3, 7, 41
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
+ %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 8, i64 8388480) ; 0xffff << 7
+ ret i64 %r
+}
+
+define i64 @rldimi7(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: rotldi 3, 3, 63
+; CHECK-NEXT: rldimi 4, 3, 9, 39
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
+ %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 8, i64 33553920) ; 0xffff << 9
+ ret i64 %r
+}
+
+define i64 @rldimi8(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
+ %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 0, i64 0)
+ ret i64 %r
+}
+
+define i64 @rldimi9(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi9:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
+ %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 63, i64 0)
+ ret i64 %r
+}
+
+define i64 @rldimi10(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi10:
+; CHECK: # %bb.0:
+; CHECK-NEXT: rldimi 4, 3, 0, 0
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
+ %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 0, i64 -1)
+ ret i64 %r
+}
+
declare i64 @llvm.ppc.rldimi(i64, i64, i32 immarg, i64 immarg)
diff --git a/llvm/test/CodeGen/PowerPC/rlwimi.ll b/llvm/test/CodeGen/PowerPC/rlwimi.ll
index b7a2ded7bd8df4..cce217d8fb56ed 100644
--- a/llvm/test/CodeGen/PowerPC/rlwimi.ll
+++ b/llvm/test/CodeGen/PowerPC/rlwimi.ll
@@ -115,4 +115,46 @@ entry:
ret i32 %r
}
+define i32 @test10(i32 %a, i32 %b) {
+; CHECK-LABEL: test10:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: rlwimi 4, 3, 0, 0, 31
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
+entry:
+ %r = call i32 @llvm.ppc.rlwimi(i32 %a, i32 %b, i32 0, i32 -1)
+ ret i32 %r
+}
+
+define i32 @test11(i32 %a, i32 %b) {
+; CHECK-LABEL: test11:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: rlwimi 4, 3, 8, 0, 31
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
+entry:
+ %r = call i32 @llvm.ppc.rlwimi(i32 %a, i32 %b, i32 8, i32 -1)
+ ret i32 %r
+}
+
+define i32 @test12(i32 %a, i32 %b) {
+; CHECK-LABEL: test12:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
+entry:
+ %r = call i32 @llvm.ppc.rlwimi(i32 %a, i32 %b, i32 0, i32 0)
+ ret i32 %r
+}
+
+define i32 @test13(i32 %a, i32 %b) {
+; CHECK-LABEL: test13:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: rlwimi 3, 4, 0, 27, 19
+; CHECK-NEXT: blr
+entry:
+ %r = call i32 @llvm.ppc.rlwimi(i32 %a, i32 %b, i32 0, i32 4064)
+ ret i32 %r
+}
+
declare i32 @llvm.ppc.rlwimi(i32, i32, i32 immarg, i32 immarg)
diff --git a/llvm/test/CodeGen/PowerPC/rlwinm.ll b/llvm/test/CodeGen/PowerPC/rlwinm.ll
index c6d4e5bb000040..363eb171276566 100644
--- a/llvm/test/CodeGen/PowerPC/rlwinm.ll
+++ b/llvm/test/CodeGen/PowerPC/rlwinm.ll
@@ -97,4 +97,24 @@ entry:
ret i32 %r
}
+define i32 @test10(i32 %a, i32 %s) {
+; CHECK-LABEL: test10:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: li 3, 0
+; CHECK-NEXT: blr
+entry:
+ %r = call i32 @llvm.ppc.rlwnm(i32 %a, i32 %s, i32 0)
+ ret i32 %r
+}
+
+define i32 @test11(i32 %a, i32 %s) {
+; CHECK-LABEL: test11:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: rotlw 3, 3, 4
+; CHECK-NEXT: blr
+entry:
+ %r = call i32 @llvm.ppc.rlwnm(i32 %a, i32 %s, i32 -1)
+ ret i32 %r
+}
+
declare i32 @llvm.ppc.rlwnm(i32, i32, i32 immarg)
More information about the cfe-commits
mailing list