[clang] [llvm] [HLSL] Implement elementwise firstbitlow builtin (PR #116858)
Steven Perron via cfe-commits
cfe-commits at lists.llvm.org
Fri Jan 10 06:41:34 PST 2025
================
@@ -3181,136 +3193,250 @@ Register SPIRVInstructionSelector::buildPointerToResource(
return AcReg;
}
-bool SPIRVInstructionSelector::selectFirstBitHigh16(Register ResVReg,
- const SPIRVType *ResType,
- MachineInstr &I,
- bool IsSigned) const {
- unsigned Opcode = IsSigned ? SPIRV::OpSConvert : SPIRV::OpUConvert;
- // zero or sign extend
+bool SPIRVInstructionSelector::selectFirstBitSet16(
+ Register ResVReg, const SPIRVType *ResType, MachineInstr &I,
+ unsigned ExtendOpcode, unsigned BitSetOpcode) const {
Register ExtReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
- bool Result =
- selectOpWithSrcs(ExtReg, ResType, I, {I.getOperand(2).getReg()}, Opcode);
- return Result && selectFirstBitHigh32(ResVReg, ResType, I, ExtReg, IsSigned);
+ bool Result = selectOpWithSrcs(ExtReg, ResType, I, {I.getOperand(2).getReg()},
+ ExtendOpcode);
+
+ return Result &&
+ selectFirstBitSet32(ResVReg, ResType, I, ExtReg, BitSetOpcode);
}
-bool SPIRVInstructionSelector::selectFirstBitHigh32(Register ResVReg,
- const SPIRVType *ResType,
- MachineInstr &I,
- Register SrcReg,
- bool IsSigned) const {
- unsigned Opcode = IsSigned ? GL::FindSMsb : GL::FindUMsb;
+bool SPIRVInstructionSelector::selectFirstBitSet32(
+ Register ResVReg, const SPIRVType *ResType, MachineInstr &I,
+ Register SrcReg, unsigned BitSetOpcode) const {
return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpExtInst))
.addDef(ResVReg)
.addUse(GR.getSPIRVTypeID(ResType))
.addImm(static_cast<uint32_t>(SPIRV::InstructionSet::GLSL_std_450))
- .addImm(Opcode)
+ .addImm(BitSetOpcode)
.addUse(SrcReg)
.constrainAllUses(TII, TRI, RBI);
}
-bool SPIRVInstructionSelector::selectFirstBitHigh64(Register ResVReg,
- const SPIRVType *ResType,
- MachineInstr &I,
- bool IsSigned) const {
- Register OpReg = I.getOperand(2).getReg();
- // 1. split our int64 into 2 pieces using a bitcast
- unsigned count = GR.getScalarOrVectorComponentCount(ResType);
- SPIRVType *baseType = GR.retrieveScalarOrVectorIntType(ResType);
+bool SPIRVInstructionSelector::selectFirstBitSet64Overflow(
+ Register ResVReg, const SPIRVType *ResType, MachineInstr &I,
+ Register SrcReg, unsigned BitSetOpcode, bool SwapPrimarySide) const {
+
+ // SPIR-V only allow vecs of size 2,3,4. Calling with a larger vec requires
+ // creating a param reg and return reg with an invalid vec size. If that is
+ // resolved then this function is valid for vectors of any component size.
+ unsigned ComponentCount = GR.getScalarOrVectorComponentCount(ResType);
+ assert(ComponentCount < 5 && "Vec 5+ will generate invalid SPIR-V ops");
+
+ bool ZeroAsNull = STI.isOpenCLEnv();
MachineIRBuilder MIRBuilder(I);
- SPIRVType *postCastT =
- GR.getOrCreateSPIRVVectorType(baseType, 2 * count, MIRBuilder);
- Register bitcastReg = MRI->createVirtualRegister(GR.getRegClass(postCastT));
- bool Result =
- selectOpWithSrcs(bitcastReg, postCastT, I, {OpReg}, SPIRV::OpBitcast);
+ SPIRVType *BaseType = GR.retrieveScalarOrVectorIntType(ResType);
+ SPIRVType *I64Type = GR.getOrCreateSPIRVIntegerType(64, MIRBuilder);
+ SPIRVType *I64x2Type = GR.getOrCreateSPIRVVectorType(I64Type, 2, MIRBuilder);
+ SPIRVType *Vec2ResType =
+ GR.getOrCreateSPIRVVectorType(BaseType, 2, MIRBuilder);
+
+ std::vector<Register> PartialRegs;
+
+ // Loops 0, 2, 4, ... but stops one loop early when ComponentCount is odd
+ unsigned CurrentComponent = 0;
+ for (; CurrentComponent + 1 < ComponentCount; CurrentComponent += 2) {
+ // This register holds the firstbitX result for each of the i64x2 vectors
+ // extracted from SrcReg
+ Register BitSetResult =
+ MRI->createVirtualRegister(GR.getRegClass(I64x2Type));
+
+ auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(SPIRV::OpVectorShuffle))
+ .addDef(BitSetResult)
+ .addUse(GR.getSPIRVTypeID(I64x2Type))
+ .addUse(SrcReg)
+ // Per the spec, repeat the vector if only one vec is needed
+ .addUse(SrcReg);
+
+ MIB.addImm(CurrentComponent);
+ MIB.addImm(CurrentComponent + 1);
+
+ if (!MIB.constrainAllUses(TII, TRI, RBI))
+ return false;
+
+ Register SubVecBitSetReg =
+ MRI->createVirtualRegister(GR.getRegClass(Vec2ResType));
+
+ if (!selectFirstBitSet64(SubVecBitSetReg, Vec2ResType, I, BitSetResult,
+ BitSetOpcode, SwapPrimarySide))
+ return false;
+
+ PartialRegs.push_back(SubVecBitSetReg);
+ }
+
+ // On odd component counts we need to handle one more component
+ if (CurrentComponent != ComponentCount) {
+ Register FinalElemReg = MRI->createVirtualRegister(GR.getRegClass(I64Type));
+ Register ConstIntLastIdx = GR.getOrCreateConstInt(
+ ComponentCount - 1, I, BaseType, TII, ZeroAsNull);
+
+ if (!selectOpWithSrcs(FinalElemReg, I64Type, I, {SrcReg, ConstIntLastIdx},
+ SPIRV::OpVectorExtractDynamic))
+ return false;
+
+ Register FinalElemBitSetReg =
+ MRI->createVirtualRegister(GR.getRegClass(BaseType));
+
+ if (!selectFirstBitSet64(FinalElemBitSetReg, BaseType, I, FinalElemReg,
+ BitSetOpcode, SwapPrimarySide))
+ return false;
+
+ PartialRegs.push_back(FinalElemBitSetReg);
+ }
+
+ // Join all the resulting registers back into the return type in order
+ // (ie i32x2, i32x2, i32x1 -> i32x5)
+ return selectOpWithSrcs(ResVReg, ResType, I, PartialRegs,
+ SPIRV::OpCompositeConstruct);
+}
+
+bool SPIRVInstructionSelector::selectFirstBitSet64(
+ Register ResVReg, const SPIRVType *ResType, MachineInstr &I,
+ Register SrcReg, unsigned BitSetOpcode, bool SwapPrimarySide) const {
+ unsigned ComponentCount = GR.getScalarOrVectorComponentCount(ResType);
+ SPIRVType *BaseType = GR.retrieveScalarOrVectorIntType(ResType);
+ bool ZeroAsNull = STI.isOpenCLEnv();
+ Register ConstIntZero =
+ GR.getOrCreateConstInt(0, I, BaseType, TII, ZeroAsNull);
+ Register ConstIntOne =
+ GR.getOrCreateConstInt(1, I, BaseType, TII, ZeroAsNull);
+
+ // SPIRV doesn't support vectors with more than 4 components. Since the
+ // algoritm below converts i64 -> i32x2 and i64x4 -> i32x8 it can only
+ // operate on vectors with 2 or less components. When largers vectors are
+ // seen. Split them, recurse, then recombine them.
----------------
s-perron wrote:
```suggestion
// SPIRV doesn't support vectors with more than 4 components. Since the
// algorithm below converts i64 -> i32x2 and i64x4 -> i32x8 it can only
// operate on vectors with 2 or less components. When larger vectors are
// seen, split them, recurse, and then recombine them.
```
https://github.com/llvm/llvm-project/pull/116858
More information about the cfe-commits
mailing list