[clang] [llvm] [PowerPC] Add intrinsics for rldimi/rlwimi/rlwnm (PR #82968)
Qiu Chaofan via cfe-commits
cfe-commits at lists.llvm.org
Mon Feb 26 22:09:08 PST 2024
https://github.com/ecnelises updated https://github.com/llvm/llvm-project/pull/82968
>From a06fa5e18313ad50019d50006e34a6b8249d95cd Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan at cn.ibm.com>
Date: Mon, 26 Feb 2024 16:32:28 +0800
Subject: [PATCH 1/2] [PowerPC] Add intrinsics for rldimi/rlwimi/rlwnm
These builtins are already there in Clang, however current codegen may
produce suboptimal results due to their complex behavior. Implement them
as intrinsics to ensure expected instructions are emitted.
---
clang/lib/CodeGen/CGBuiltin.cpp | 29 ++---
.../PowerPC/builtins-ppc-xlcompat-rotate.c | 24 ++--
llvm/include/llvm/IR/IntrinsicsPowerPC.td | 12 ++
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 52 ++++++++
llvm/test/CodeGen/PowerPC/rldimi.ll | 15 +++
llvm/test/CodeGen/PowerPC/rlwimi.ll | 123 ++++++++++++------
llvm/test/CodeGen/PowerPC/rlwinm.ll | 108 ++++++++++-----
7 files changed, 259 insertions(+), 104 deletions(-)
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 734eb5a035ca49..5d55be6e9e99df 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17080,37 +17080,24 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
}
return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
}
- // Rotate and insert under mask operation.
- // __rldimi(rs, is, shift, mask)
- // (rotl64(rs, shift) & mask) | (is & ~mask)
- // __rlwimi(rs, is, shift, mask)
- // (rotl(rs, shift) & mask) | (is & ~mask)
case PPC::BI__builtin_ppc_rldimi:
case PPC::BI__builtin_ppc_rlwimi: {
Value *Op0 = EmitScalarExpr(E->getArg(0));
Value *Op1 = EmitScalarExpr(E->getArg(1));
Value *Op2 = EmitScalarExpr(E->getArg(2));
Value *Op3 = EmitScalarExpr(E->getArg(3));
- llvm::Type *Ty = Op0->getType();
- Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
- if (BuiltinID == PPC::BI__builtin_ppc_rldimi)
- Op2 = Builder.CreateZExt(Op2, Int64Ty);
- Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2});
- Value *X = Builder.CreateAnd(Shift, Op3);
- Value *Y = Builder.CreateAnd(Op1, Builder.CreateNot(Op3));
- return Builder.CreateOr(X, Y);
- }
- // Rotate and insert under mask operation.
- // __rlwnm(rs, shift, mask)
- // rotl(rs, shift) & mask
+ return Builder.CreateCall(
+ CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi
+ ? Intrinsic::ppc_rldimi
+ : Intrinsic::ppc_rlwimi),
+ {Op0, Op1, Op2, Op3});
+ }
case PPC::BI__builtin_ppc_rlwnm: {
Value *Op0 = EmitScalarExpr(E->getArg(0));
Value *Op1 = EmitScalarExpr(E->getArg(1));
Value *Op2 = EmitScalarExpr(E->getArg(2));
- llvm::Type *Ty = Op0->getType();
- Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
- Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op1});
- return Builder.CreateAnd(Shift, Op2);
+ return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_rlwnm),
+ {Op0, Op1, Op2});
}
case PPC::BI__builtin_ppc_poppar4:
case PPC::BI__builtin_ppc_poppar8: {
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c
index d96bfb4621421e..b218547c00d931 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c
@@ -16,11 +16,8 @@ void test_builtin_ppc_rldimi() {
// CHECK: %res = alloca i64, align 8
// CHECK-NEXT: [[RA:%[0-9]+]] = load i64, ptr @ull, align 8
// CHECK-NEXT: [[RB:%[0-9]+]] = load i64, ptr @ull, align 8
- // CHECK-NEXT: [[RC:%[0-9]+]] = call i64 @llvm.fshl.i64(i64 [[RA]], i64 [[RA]], i64 63)
- // CHECK-NEXT: [[RD:%[0-9]+]] = and i64 [[RC]], 72057593769492480
- // CHECK-NEXT: [[RE:%[0-9]+]] = and i64 [[RB]], -72057593769492481
- // CHECK-NEXT: [[RF:%[0-9]+]] = or i64 [[RD]], [[RE]]
- // CHECK-NEXT: store i64 [[RF]], ptr %res, align 8
+ // CHECK-NEXT: [[RC:%[0-9]+]] = call i64 @llvm.ppc.rldimi(i64 [[RA]], i64 [[RB]], i32 63, i64 72057593769492480)
+ // CHECK-NEXT: store i64 [[RC]], ptr %res, align 8
// CHECK-NEXT: ret void
/*shift = 63, mask = 0x00FFFFFFF0000000 = 72057593769492480, ~mask = 0xFF0000000FFFFFFF = -72057593769492481*/
@@ -32,11 +29,8 @@ void test_builtin_ppc_rlwimi() {
// CHECK: %res = alloca i32, align 4
// CHECK-NEXT: [[RA:%[0-9]+]] = load i32, ptr @ui, align 4
// CHECK-NEXT: [[RB:%[0-9]+]] = load i32, ptr @ui, align 4
- // CHECK-NEXT: [[RC:%[0-9]+]] = call i32 @llvm.fshl.i32(i32 [[RA]], i32 [[RA]], i32 31)
- // CHECK-NEXT: [[RD:%[0-9]+]] = and i32 [[RC]], 16776960
- // CHECK-NEXT: [[RE:%[0-9]+]] = and i32 [[RB]], -16776961
- // CHECK-NEXT: [[RF:%[0-9]+]] = or i32 [[RD]], [[RE]]
- // CHECK-NEXT: store i32 [[RF]], ptr %res, align 4
+ // CHECK-NEXT: [[RC:%[0-9]+]] = call i32 @llvm.ppc.rlwimi(i32 [[RA]], i32 [[RB]], i32 31, i32 16776960)
+ // CHECK-NEXT: store i32 [[RC]], ptr %res, align 4
// CHECK-NEXT: ret void
/*shift = 31, mask = 0xFFFF00 = 16776960, ~mask = 0xFFFFFFFFFF0000FF = -16776961*/
@@ -47,9 +41,8 @@ void test_builtin_ppc_rlwnm() {
// CHECK-LABEL: test_builtin_ppc_rlwnm
// CHECK: %res = alloca i32, align 4
// CHECK-NEXT: [[RA:%[0-9]+]] = load i32, ptr @ui, align 4
- // CHECK-NEXT: [[RB:%[0-9]+]] = call i32 @llvm.fshl.i32(i32 [[RA]], i32 [[RA]], i32 31)
- // CHECK-NEXT: [[RC:%[0-9]+]] = and i32 [[RB]], 511
- // CHECK-NEXT: store i32 [[RC]], ptr %res, align 4
+ // CHECK-NEXT: [[RB:%[0-9]+]] = call i32 @llvm.ppc.rlwnm(i32 [[RA]], i32 31, i32 511)
+ // CHECK-NEXT: store i32 [[RB]], ptr %res, align 4
// CHECK-NEXT: ret void
/*shift = 31, mask = 0x1FF = 511*/
@@ -63,9 +56,8 @@ void test_builtin_ppc_rlwnm2(unsigned int shift) {
// CHECK-NEXT: store i32 %shift, ptr %shift.addr, align 4
// CHECK-NEXT: [[RA:%[0-9]+]] = load i32, ptr @ui, align 4
// CHECK-NEXT: [[RB:%[0-9]+]] = load i32, ptr %shift.addr, align 4
- // CHECK-NEXT: [[RC:%[0-9]+]] = call i32 @llvm.fshl.i32(i32 [[RA]], i32 [[RA]], i32 [[RB]])
- // CHECK-NEXT: [[RD:%[0-9]+]] = and i32 [[RC]], 511
- // CHECK-NEXT: store i32 [[RD]], ptr %res, align 4
+ // CHECK-NEXT: [[RC:%[0-9]+]] = call i32 @llvm.ppc.rlwnm(i32 [[RA]], i32 [[RB]], i32 511)
+ // CHECK-NEXT: store i32 [[RC]], ptr %res, align 4
// CHECK-NEXT: ret void
/*mask = 0x1FF = 511*/
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index bfc2b17043bc79..ee9a04241ac2ec 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -182,6 +182,18 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
def int_ppc_fctuwz
: ClangBuiltin<"__builtin_ppc_fctuwz">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ def int_ppc_rldimi
+ : ClangBuiltin<"__builtin_ppc_rldimi">,
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
+ [IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>;
+ def int_ppc_rlwimi
+ : ClangBuiltin<"__builtin_ppc_rlwimi">,
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>;
+ def int_ppc_rlwnm
+ : ClangBuiltin<"__builtin_ppc_rlwnm">,
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<2>>]>;
// XL compatible select functions
// TODO: Add llvm_f128_ty support.
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 51becf1d5b8584..f84addbf728ad0 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -641,6 +641,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ // setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
@@ -10722,6 +10723,20 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
return true;
}
+bool isContiguousMask(const APInt &Val, unsigned &MB, unsigned &ME,
+ unsigned BitWidth) {
+ unsigned MaskLen = 0;
+ if (Val.isShiftedMask(MB, MaskLen)) {
+ MB = (BitWidth - MB - MaskLen) % BitWidth;
+ } else if ((~Val).isShiftedMask(MB, MaskLen)) {
+ MB = (BitWidth - MB) % BitWidth;
+ } else {
+ return false;
+ }
+ ME = (MB + MaskLen - 1) % BitWidth;
+ return true;
+}
+
/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
/// lower, do it, otherwise return null.
SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
@@ -10737,6 +10752,43 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getRegister(PPC::X13, MVT::i64);
return DAG.getRegister(PPC::R2, MVT::i32);
+ case Intrinsic::ppc_rldimi: {
+ uint64_t SH = Op.getConstantOperandVal(3);
+ unsigned MB = 0, ME = 0;
+ if (!isContiguousMask(Op.getConstantOperandAPInt(4), MB, ME, 64) ||
+ ME != 63 - SH)
+ llvm_unreachable("invalid rldimi mask!");
+ return SDValue(DAG.getMachineNode(
+ PPC::RLDIMI, dl, MVT::i64,
+ {Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
+ DAG.getTargetConstant(MB, dl, MVT::i32)}),
+ 0);
+ }
+
+ case Intrinsic::ppc_rlwimi: {
+ unsigned MB = 0, ME = 0;
+ if (!isContiguousMask(Op.getConstantOperandAPInt(4), MB, ME, 32))
+ llvm_unreachable("invalid rlwimi mask!");
+ return SDValue(DAG.getMachineNode(
+ PPC::RLWIMI, dl, MVT::i32,
+ {Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
+ DAG.getTargetConstant(MB, dl, MVT::i32),
+ DAG.getTargetConstant(ME, dl, MVT::i32)}),
+ 0);
+ }
+
+ case Intrinsic::ppc_rlwnm: {
+ unsigned MB = 0, ME = 0;
+ if (!isContiguousMask(Op.getConstantOperandAPInt(3), MB, ME, 32))
+ llvm_unreachable("invalid rlwnm mask!");
+ return SDValue(
+ DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
+ {Op.getOperand(1), Op.getOperand(2),
+ DAG.getTargetConstant(MB, dl, MVT::i32),
+ DAG.getTargetConstant(ME, dl, MVT::i32)}),
+ 0);
+ }
+
case Intrinsic::ppc_mma_disassemble_acc: {
if (Subtarget.isISAFuture()) {
EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
diff --git a/llvm/test/CodeGen/PowerPC/rldimi.ll b/llvm/test/CodeGen/PowerPC/rldimi.ll
index 4e26ddfc37f99e..322975f547c996 100644
--- a/llvm/test/CodeGen/PowerPC/rldimi.ll
+++ b/llvm/test/CodeGen/PowerPC/rldimi.ll
@@ -58,3 +58,18 @@ entry:
%8 = or i64 %6, %7
ret i64 %8
}
+
+define i64 @rldimi_intrinsic(i64 %a) {
+; CHECK-LABEL: rldimi_intrinsic:
+; CHECK: # %bb.0:
+; CHECK-NEXT: rldimi 3, 3, 8, 0
+; CHECK-NEXT: rldimi 3, 3, 16, 0
+; CHECK-NEXT: rldimi 3, 3, 32, 0
+; CHECK-NEXT: blr
+ %r1 = call i64 @llvm.ppc.rldimi(i64 %a, i64 %a, i32 8, i64 -256)
+ %r2 = call i64 @llvm.ppc.rldimi(i64 %r1, i64 %r1, i32 16, i64 -65536)
+ %r3 = call i64 @llvm.ppc.rldimi(i64 %r2, i64 %r2, i32 32, i64 -4294967296)
+ ret i64 %r3
+}
+
+declare i64 @llvm.ppc.rldimi(i64, i64, i32 immarg, i64 immarg)
diff --git a/llvm/test/CodeGen/PowerPC/rlwimi.ll b/llvm/test/CodeGen/PowerPC/rlwimi.ll
index e701236b840b2c..8b126cd3393c10 100644
--- a/llvm/test/CodeGen/PowerPC/rlwimi.ll
+++ b/llvm/test/CodeGen/PowerPC/rlwimi.ll
@@ -1,70 +1,117 @@
-; All of these ands and shifts should be folded into rlwimi's
-; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- | not grep and
-; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- | grep rlwimi | count 8
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s
define i32 @test1(i32 %x, i32 %y) {
+; CHECK-LABEL: test1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: rlwimi 4, 3, 16, 0, 15
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
entry:
- %tmp.3 = shl i32 %x, 16 ; <i32> [#uses=1]
- %tmp.7 = and i32 %y, 65535 ; <i32> [#uses=1]
- %tmp.9 = or i32 %tmp.7, %tmp.3 ; <i32> [#uses=1]
- ret i32 %tmp.9
+ %tmp.3 = shl i32 %x, 16
+ %tmp.7 = and i32 %y, 65535
+ %tmp.9 = or i32 %tmp.7, %tmp.3
+ ret i32 %tmp.9
}
define i32 @test2(i32 %x, i32 %y) {
+; CHECK-LABEL: test2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: rlwimi 3, 4, 16, 0, 15
+; CHECK-NEXT: blr
entry:
- %tmp.7 = and i32 %x, 65535 ; <i32> [#uses=1]
- %tmp.3 = shl i32 %y, 16 ; <i32> [#uses=1]
- %tmp.9 = or i32 %tmp.7, %tmp.3 ; <i32> [#uses=1]
- ret i32 %tmp.9
+ %tmp.7 = and i32 %x, 65535
+ %tmp.3 = shl i32 %y, 16
+ %tmp.9 = or i32 %tmp.7, %tmp.3
+ ret i32 %tmp.9
}
define i32 @test3(i32 %x, i32 %y) {
+; CHECK-LABEL: test3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: rlwimi 4, 3, 16, 16, 31
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
entry:
- %tmp.3 = lshr i32 %x, 16 ; <i32> [#uses=1]
- %tmp.6 = and i32 %y, -65536 ; <i32> [#uses=1]
- %tmp.7 = or i32 %tmp.6, %tmp.3 ; <i32> [#uses=1]
- ret i32 %tmp.7
+ %tmp.3 = lshr i32 %x, 16
+ %tmp.6 = and i32 %y, -65536
+ %tmp.7 = or i32 %tmp.6, %tmp.3
+ ret i32 %tmp.7
}
define i32 @test4(i32 %x, i32 %y) {
+; CHECK-LABEL: test4:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: rlwimi 3, 4, 16, 16, 31
+; CHECK-NEXT: blr
entry:
- %tmp.6 = and i32 %x, -65536 ; <i32> [#uses=1]
- %tmp.3 = lshr i32 %y, 16 ; <i32> [#uses=1]
- %tmp.7 = or i32 %tmp.6, %tmp.3 ; <i32> [#uses=1]
- ret i32 %tmp.7
+ %tmp.6 = and i32 %x, -65536
+ %tmp.3 = lshr i32 %y, 16
+ %tmp.7 = or i32 %tmp.6, %tmp.3
+ ret i32 %tmp.7
}
define i32 @test5(i32 %x, i32 %y) {
+; CHECK-LABEL: test5:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: rlwimi 4, 3, 1, 0, 15
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
entry:
- %tmp.3 = shl i32 %x, 1 ; <i32> [#uses=1]
- %tmp.4 = and i32 %tmp.3, -65536 ; <i32> [#uses=1]
- %tmp.7 = and i32 %y, 65535 ; <i32> [#uses=1]
- %tmp.9 = or i32 %tmp.4, %tmp.7 ; <i32> [#uses=1]
- ret i32 %tmp.9
+ %tmp.3 = shl i32 %x, 1
+ %tmp.4 = and i32 %tmp.3, -65536
+ %tmp.7 = and i32 %y, 65535
+ %tmp.9 = or i32 %tmp.4, %tmp.7
+ ret i32 %tmp.9
}
define i32 @test6(i32 %x, i32 %y) {
+; CHECK-LABEL: test6:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: rlwimi 3, 4, 1, 0, 15
+; CHECK-NEXT: blr
entry:
- %tmp.7 = and i32 %x, 65535 ; <i32> [#uses=1]
- %tmp.3 = shl i32 %y, 1 ; <i32> [#uses=1]
- %tmp.4 = and i32 %tmp.3, -65536 ; <i32> [#uses=1]
- %tmp.9 = or i32 %tmp.4, %tmp.7 ; <i32> [#uses=1]
- ret i32 %tmp.9
+ %tmp.7 = and i32 %x, 65535
+ %tmp.3 = shl i32 %y, 1
+ %tmp.4 = and i32 %tmp.3, -65536
+ %tmp.9 = or i32 %tmp.4, %tmp.7
+ ret i32 %tmp.9
}
define i32 @test7(i32 %x, i32 %y) {
+; CHECK-LABEL: test7:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: andis. 3, 3, 65535
+; CHECK-NEXT: rldimi 3, 4, 0, 48
+; CHECK-NEXT: blr
entry:
- %tmp.2 = and i32 %x, -65536 ; <i32> [#uses=1]
- %tmp.5 = and i32 %y, 65535 ; <i32> [#uses=1]
- %tmp.7 = or i32 %tmp.5, %tmp.2 ; <i32> [#uses=1]
- ret i32 %tmp.7
+ %tmp.2 = and i32 %x, -65536
+ %tmp.5 = and i32 %y, 65535
+ %tmp.7 = or i32 %tmp.5, %tmp.2
+ ret i32 %tmp.7
}
define i32 @test8(i32 %bar) {
+; CHECK-LABEL: test8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: rlwimi 3, 3, 1, 30, 30
+; CHECK-NEXT: blr
entry:
- %tmp.3 = shl i32 %bar, 1 ; <i32> [#uses=1]
- %tmp.4 = and i32 %tmp.3, 2 ; <i32> [#uses=1]
- %tmp.6 = and i32 %bar, -3 ; <i32> [#uses=1]
- %tmp.7 = or i32 %tmp.4, %tmp.6 ; <i32> [#uses=1]
- ret i32 %tmp.7
+ %tmp.3 = shl i32 %bar, 1
+ %tmp.4 = and i32 %tmp.3, 2
+ %tmp.6 = and i32 %bar, -3
+ %tmp.7 = or i32 %tmp.4, %tmp.6
+ ret i32 %tmp.7
}
+
+define i32 @test9(i32 %a, i32 %b) {
+; CHECK-LABEL: test9:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: rlwimi 3, 4, 8, 20, 26
+; CHECK-NEXT: blr
+entry:
+ %r = call i32 @llvm.ppc.rlwimi(i32 %a, i32 %b, i32 8, i32 4064)
+ ret i32 %r
+}
+
+declare i32 @llvm.ppc.rlwimi(i32, i32, i32 immarg, i32 immarg)
diff --git a/llvm/test/CodeGen/PowerPC/rlwinm.ll b/llvm/test/CodeGen/PowerPC/rlwinm.ll
index 2f3b3bf003cf65..73e4b5f6b7ff60 100644
--- a/llvm/test/CodeGen/PowerPC/rlwinm.ll
+++ b/llvm/test/CodeGen/PowerPC/rlwinm.ll
@@ -1,61 +1,111 @@
-; All of these ands and shifts should be folded into rlwimi's
-; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -o %t
-; RUN: not grep and %t
-; RUN: not grep srawi %t
-; RUN: not grep srwi %t
-; RUN: not grep slwi %t
-; RUN: grep rlwinm %t | count 8
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s
define i32 @test1(i32 %a) {
+; CHECK-LABEL: test1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: rlwinm 3, 3, 0, 4, 19
+; CHECK-NEXT: blr
entry:
- %tmp.1 = and i32 %a, 268431360 ; <i32> [#uses=1]
- ret i32 %tmp.1
+ %tmp.1 = and i32 %a, 268431360
+ ret i32 %tmp.1
}
define i32 @test2(i32 %a) {
+; CHECK-LABEL: test2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: rldicl 3, 3, 36, 24
+; CHECK-NEXT: rldicl 3, 3, 28, 32
+; CHECK-NEXT: blr
entry:
- %tmp.1 = and i32 %a, -268435441 ; <i32> [#uses=1]
- ret i32 %tmp.1
+ %tmp.1 = and i32 %a, -268435441
+ ret i32 %tmp.1
}
define i32 @test3(i32 %a) {
+; CHECK-LABEL: test3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: rlwinm 3, 3, 24, 24, 31
+; CHECK-NEXT: blr
entry:
- %tmp.2 = ashr i32 %a, 8 ; <i32> [#uses=1]
- %tmp.3 = and i32 %tmp.2, 255 ; <i32> [#uses=1]
- ret i32 %tmp.3
+ %tmp.2 = ashr i32 %a, 8
+ %tmp.3 = and i32 %tmp.2, 255
+ ret i32 %tmp.3
}
define i32 @test4(i32 %a) {
+; CHECK-LABEL: test4:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: rlwinm 3, 3, 24, 24, 31
+; CHECK-NEXT: blr
entry:
- %tmp.3 = lshr i32 %a, 8 ; <i32> [#uses=1]
- %tmp.4 = and i32 %tmp.3, 255 ; <i32> [#uses=1]
- ret i32 %tmp.4
+ %tmp.3 = lshr i32 %a, 8
+ %tmp.4 = and i32 %tmp.3, 255
+ ret i32 %tmp.4
}
define i32 @test5(i32 %a) {
+; CHECK-LABEL: test5:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: rlwinm 3, 3, 8, 0, 8
+; CHECK-NEXT: blr
entry:
- %tmp.2 = shl i32 %a, 8 ; <i32> [#uses=1]
- %tmp.3 = and i32 %tmp.2, -8388608 ; <i32> [#uses=1]
- ret i32 %tmp.3
+ %tmp.2 = shl i32 %a, 8
+ %tmp.3 = and i32 %tmp.2, -8388608
+ ret i32 %tmp.3
}
define i32 @test6(i32 %a) {
+; CHECK-LABEL: test6:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: rlwinm 3, 3, 24, 24, 31
+; CHECK-NEXT: blr
entry:
- %tmp.1 = and i32 %a, 65280 ; <i32> [#uses=1]
- %tmp.2 = ashr i32 %tmp.1, 8 ; <i32> [#uses=1]
- ret i32 %tmp.2
+ %tmp.1 = and i32 %a, 65280
+ %tmp.2 = ashr i32 %tmp.1, 8
+ ret i32 %tmp.2
}
define i32 @test7(i32 %a) {
+; CHECK-LABEL: test7:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: rlwinm 3, 3, 24, 24, 31
+; CHECK-NEXT: blr
entry:
- %tmp.1 = and i32 %a, 65280 ; <i32> [#uses=1]
- %tmp.2 = lshr i32 %tmp.1, 8 ; <i32> [#uses=1]
- ret i32 %tmp.2
+ %tmp.1 = and i32 %a, 65280
+ %tmp.2 = lshr i32 %tmp.1, 8
+ ret i32 %tmp.2
}
define i32 @test8(i32 %a) {
+; CHECK-LABEL: test8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: rlwinm 3, 3, 8, 0, 7
+; CHECK-NEXT: blr
entry:
- %tmp.1 = and i32 %a, 16711680 ; <i32> [#uses=1]
- %tmp.2 = shl i32 %tmp.1, 8 ; <i32> [#uses=1]
- ret i32 %tmp.2
+ %tmp.1 = and i32 %a, 16711680
+ %tmp.2 = shl i32 %tmp.1, 8
+ ret i32 %tmp.2
}
+
+define i32 @test9(i32 %a, i32 %s) {
+; CHECK-LABEL: test9:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: rlwnm 3, 3, 4, 23, 31
+; CHECK-NEXT: blr
+entry:
+ %r = call i32 @llvm.ppc.rlwnm(i32 %a, i32 %s, i32 511)
+ ret i32 %r
+}
+
+define i32 @test10(i32 %a) {
+; CHECK-LABEL: test10:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: rlwinm 3, 3, 31, 23, 31
+; CHECK-NEXT: blr
+entry:
+ %r = call i32 @llvm.ppc.rlwnm(i32 %a, i32 31, i32 511)
+ ret i32 %r
+}
+
+declare i32 @llvm.ppc.rlwnm(i32, i32, i32 immarg)
>From d9c9b4eb91ca3cec0bc469364914706b89ab1eeb Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan at cn.ibm.com>
Date: Tue, 27 Feb 2024 14:07:55 +0800
Subject: [PATCH 2/2] Use isRunOfOnes
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 28 ++++++---------------
1 file changed, 7 insertions(+), 21 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index f84addbf728ad0..178904d1e38d29 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "PPCISelLowering.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCPredicates.h"
#include "PPC.h"
#include "PPCCCState.h"
@@ -641,7 +642,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
- // setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
@@ -10723,20 +10723,6 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
return true;
}
-bool isContiguousMask(const APInt &Val, unsigned &MB, unsigned &ME,
- unsigned BitWidth) {
- unsigned MaskLen = 0;
- if (Val.isShiftedMask(MB, MaskLen)) {
- MB = (BitWidth - MB - MaskLen) % BitWidth;
- } else if ((~Val).isShiftedMask(MB, MaskLen)) {
- MB = (BitWidth - MB) % BitWidth;
- } else {
- return false;
- }
- ME = (MB + MaskLen - 1) % BitWidth;
- return true;
-}
-
/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
/// lower, do it, otherwise return null.
SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
@@ -10755,9 +10741,9 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::ppc_rldimi: {
uint64_t SH = Op.getConstantOperandVal(3);
unsigned MB = 0, ME = 0;
- if (!isContiguousMask(Op.getConstantOperandAPInt(4), MB, ME, 64) ||
+ if (!isRunOfOnes64(Op.getConstantOperandVal(4), MB, ME) ||
ME != 63 - SH)
- llvm_unreachable("invalid rldimi mask!");
+ report_fatal_error("invalid rldimi mask!");
return SDValue(DAG.getMachineNode(
PPC::RLDIMI, dl, MVT::i64,
{Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
@@ -10767,8 +10753,8 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::ppc_rlwimi: {
unsigned MB = 0, ME = 0;
- if (!isContiguousMask(Op.getConstantOperandAPInt(4), MB, ME, 32))
- llvm_unreachable("invalid rlwimi mask!");
+ if (!isRunOfOnes(Op.getConstantOperandVal(4), MB, ME))
+ report_fatal_error("invalid rlwimi mask!");
return SDValue(DAG.getMachineNode(
PPC::RLWIMI, dl, MVT::i32,
{Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
@@ -10779,8 +10765,8 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::ppc_rlwnm: {
unsigned MB = 0, ME = 0;
- if (!isContiguousMask(Op.getConstantOperandAPInt(3), MB, ME, 32))
- llvm_unreachable("invalid rlwnm mask!");
+ if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
+ report_fatal_error("invalid rlwnm mask!");
return SDValue(
DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
{Op.getOperand(1), Op.getOperand(2),
More information about the cfe-commits
mailing list