[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)
via llvm-commits
llvm-commits at lists.llvm.org
Mon May 20 01:51:31 PDT 2024
github-actions[bot] wrote:
<!--LLVM CODE FORMAT COMMENT: {clang-format}-->
:warning: C/C++ code formatter, clang-format found issues in your code. :warning:
<details>
<summary>
You can test this locally with the following command:
</summary>
``````````bash
git-clang-format --diff e76b257483e6c6743de0fa6eca4d0cc60e08385d db1933033fd37bbbab0b845eed53405db365b0e6 -- clang/lib/CodeGen/CGBuiltin.cpp llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h llvm/lib/Target/AMDGPU/SIISelLowering.cpp
``````````
</details>
<details>
<summary>
View the diff from clang-format here.
</summary>
``````````diff
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index a0f949495e..9ce2f5b6c1 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18482,9 +18482,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_permlane16:
case AMDGPU::BI__builtin_amdgcn_permlanex16: {
Intrinsic::ID IID;
- IID = BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
- ? Intrinsic::amdgcn_permlane16
- : Intrinsic::amdgcn_permlanex16;
+ IID = BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
+ ? Intrinsic::amdgcn_permlane16
+ : Intrinsic::amdgcn_permlanex16;
llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index cc4797b42d..b28c3521d6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -5416,10 +5416,12 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
Register Src3 = MI.getOperand(5).getReg();
Register Src4 = MI.getOperand(6).getImm();
Register Src5 = MI.getOperand(7).getImm();
- return LaneOp.addUse(Src1).addUse(Src2).
- addUse(Src3).
- addImm(Src4).
- addImm(Src5).getReg(0);
+ return LaneOp.addUse(Src1)
+ .addUse(Src2)
+ .addUse(Src3)
+ .addImm(Src4)
+ .addImm(Src5)
+ .getReg(0);
}
default:
llvm_unreachable("unhandled lane op");
@@ -5427,7 +5429,8 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
};
Register Src1, Src2;
- if (IID == Intrinsic::amdgcn_readlane || IID == Intrinsic::amdgcn_writelane || IsPermLane16) {
+ if (IID == Intrinsic::amdgcn_readlane || IID == Intrinsic::amdgcn_writelane ||
+ IsPermLane16) {
Src1 = MI.getOperand(3).getReg();
if (IID == Intrinsic::amdgcn_writelane || IsPermLane16) {
Src2 = MI.getOperand(4).getReg();
@@ -5514,9 +5517,7 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
Src0 = IsS16Vec ? B.buildBitcast(S32, Src0Parts.getReg(i)).getReg(0)
: Src0Parts.getReg(i);
PartialRes.push_back(
- (B.buildIntrinsic(IID, {S32})
- .addUse(Src0)
- .getReg(0)));
+ (B.buildIntrinsic(IID, {S32}).addUse(Src0).getReg(0)));
}
break;
@@ -5526,7 +5527,7 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
case Intrinsic::amdgcn_permlanex16: {
Register Src1 = MI.getOperand(3).getReg();
Register Src2 = MI.getOperand(4).getReg();
-
+
Register SrcX = IsPermLane16 ? Src1 : Src2;
MachineInstrBuilder SrcXParts;
@@ -5547,9 +5548,8 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
: Src0Parts.getReg(i);
SrcX = IsS16Vec ? B.buildBitcast(S32, SrcXParts.getReg(i)).getReg(0)
: SrcXParts.getReg(i);
- PartialRes.push_back( IsPermLane16 ?
- createLaneOp(Src0, SrcX, Src2) :
- createLaneOp(Src0, Src1, SrcX));
+ PartialRes.push_back(IsPermLane16 ? createLaneOp(Src0, SrcX, Src2)
+ : createLaneOp(Src0, Src1, SrcX));
}
break;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 9e77d20813..5d34ed089f 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6092,35 +6092,36 @@ static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
unsigned ValSize = VT.getSizeInBits();
unsigned IntrinsicID = N->getConstantOperandVal(0);
bool IsPermLane16 = IntrinsicID == Intrinsic::amdgcn_permlane16 ||
- IntrinsicID == Intrinsic::amdgcn_permlanex16;
+ IntrinsicID == Intrinsic::amdgcn_permlanex16;
bool IsPermLane64 = IntrinsicID == Intrinsic::amdgcn_permlane64;
SDValue Src0 = N->getOperand(1);
SDLoc SL(N);
MVT IntVT = MVT::getIntegerVT(ValSize);
auto createLaneOp = [&](SDValue Src0, SDValue Src1, SDValue Src2,
- MVT ValueT) -> SDValue {
+ MVT ValueT) -> SDValue {
if (IsPermLane16 || IsPermLane64) {
if (IsPermLane16) {
- SDValue Src3 = N->getOperand(4);
- SDValue Src4 = N->getOperand(5);
- SDValue Src5 = N->getOperand(6);
- return DAG.getNode(IntrinsicID == Intrinsic::amdgcn_permlane16
- ? AMDGPUISD::PERMLANE16 : AMDGPUISD::PERMLANEX16,
- SL, ValueT, {Src0, Src1, Src2, Src3, Src4, Src5});
+ SDValue Src3 = N->getOperand(4);
+ SDValue Src4 = N->getOperand(5);
+ SDValue Src5 = N->getOperand(6);
+ return DAG.getNode(IntrinsicID == Intrinsic::amdgcn_permlane16
+ ? AMDGPUISD::PERMLANE16
+ : AMDGPUISD::PERMLANEX16,
+ SL, ValueT, {Src0, Src1, Src2, Src3, Src4, Src5});
}
return DAG.getNode(AMDGPUISD::PERMLANE64, SL, ValueT, {Src0});
}
- return (Src2 ? DAG.getNode(AMDGPUISD::WRITELANE, SL, ValueT, {Src0, Src1, Src2})
- : Src1 ? DAG.getNode(AMDGPUISD::READLANE, SL, ValueT, {Src0, Src1})
- : DAG.getNode(AMDGPUISD::READFIRSTLANE, SL, ValueT, {Src0}));
+ return (
+ Src2 ? DAG.getNode(AMDGPUISD::WRITELANE, SL, ValueT, {Src0, Src1, Src2})
+ : Src1 ? DAG.getNode(AMDGPUISD::READLANE, SL, ValueT, {Src0, Src1})
+ : DAG.getNode(AMDGPUISD::READFIRSTLANE, SL, ValueT, {Src0}));
};
SDValue Src1, Src2;
if (IntrinsicID == Intrinsic::amdgcn_readlane ||
- IntrinsicID == Intrinsic::amdgcn_writelane ||
- IsPermLane16) {
+ IntrinsicID == Intrinsic::amdgcn_writelane || IsPermLane16) {
Src1 = N->getOperand(2);
if (IntrinsicID == Intrinsic::amdgcn_writelane || IsPermLane16)
Src2 = N->getOperand(3);
``````````
</details>
https://github.com/llvm/llvm-project/pull/92725
More information about the llvm-commits
mailing list