[clang] [llvm] [HLSL] Implement elementwise firstbitlow builtin (PR #116858)

Sarah Spall via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 12 14:41:16 PST 2024


================
@@ -3166,109 +3171,228 @@ bool SPIRVInstructionSelector::selectFirstBitHigh32(Register ResVReg,
       .constrainAllUses(TII, TRI, RBI);
 }
 
-bool SPIRVInstructionSelector::selectFirstBitHigh64(Register ResVReg,
-                                                    const SPIRVType *ResType,
-                                                    MachineInstr &I,
-                                                    bool IsSigned) const {
-  Register OpReg = I.getOperand(2).getReg();
-  // 1. split our int64 into 2 pieces using a bitcast
-  unsigned count = GR.getScalarOrVectorComponentCount(ResType);
-  SPIRVType *baseType = GR.retrieveScalarOrVectorIntType(ResType);
+bool SPIRVInstructionSelector::selectFirstBitSet64(
+    Register ResVReg, const SPIRVType *ResType, MachineInstr &I,
+    Register SrcReg, unsigned BitSetOpcode, bool SwapPrimarySide) const {
+  unsigned ComponentCount = GR.getScalarOrVectorComponentCount(ResType);
+  SPIRVType *BaseType = GR.retrieveScalarOrVectorIntType(ResType);
+  bool ZeroAsNull = STI.isOpenCLEnv();
+  Register ConstIntZero =
+      GR.getOrCreateConstInt(0, I, BaseType, TII, ZeroAsNull);
+  Register ConstIntOne =
+      GR.getOrCreateConstInt(1, I, BaseType, TII, ZeroAsNull);
+
+  // SPIRV doesn't support vectors with more than 4 components. Since the
+  // algoritm below converts i64 -> i32x2 and i64x4 -> i32x8 it can only
+  // operate on vectors with 2 or less components. When largers vectors are
+  // seen. Split them, recurse, then recombine them.
+  if (ComponentCount > 2) {
+    unsigned LeftComponentCount = ComponentCount / 2;
+    unsigned RightComponentCount = ComponentCount - LeftComponentCount;
+    bool LeftIsVector = LeftComponentCount > 1;
+
+    // Split the SrcReg in half into 2 smaller vec registers
+    // (ie i64x4 -> i64x2, i64x2)
+    MachineIRBuilder MIRBuilder(I);
+    SPIRVType *OpType = GR.getOrCreateSPIRVIntegerType(64, MIRBuilder);
+    SPIRVType *LeftVecOpType;
+    SPIRVType *LeftVecResType;
+    if (LeftIsVector) {
+      LeftVecOpType =
+          GR.getOrCreateSPIRVVectorType(OpType, LeftComponentCount, MIRBuilder);
+      LeftVecResType = GR.getOrCreateSPIRVVectorType(
+          BaseType, LeftComponentCount, MIRBuilder);
+    } else {
+      LeftVecOpType = OpType;
+      LeftVecResType = BaseType;
+    }
+
+    SPIRVType *RightVecOpType =
+        GR.getOrCreateSPIRVVectorType(OpType, RightComponentCount, MIRBuilder);
+    SPIRVType *RightVecResType = GR.getOrCreateSPIRVVectorType(
+        BaseType, RightComponentCount, MIRBuilder);
+
+    Register LeftSideIn =
+        MRI->createVirtualRegister(GR.getRegClass(LeftVecOpType));
+    Register RightSideIn =
+        MRI->createVirtualRegister(GR.getRegClass(RightVecOpType));
+
+    bool Result;
+
+    if (LeftIsVector) {
+      auto MIB =
+          BuildMI(*I.getParent(), I, I.getDebugLoc(),
+                  TII.get(SPIRV::OpVectorShuffle))
+              .addDef(LeftSideIn)
+              .addUse(GR.getSPIRVTypeID(LeftVecOpType))
+              .addUse(SrcReg)
+              // Per the spec, repeat the vector if only one vec is needed
+              .addUse(SrcReg);
+
+      for (unsigned J = 0; J < LeftComponentCount; J++) {
+        MIB.addImm(J);
+      }
+
+      Result = MIB.constrainAllUses(TII, TRI, RBI);
+    } else {
+      Result =
+          selectOpWithSrcs(LeftSideIn, LeftVecOpType, I, {SrcReg, ConstIntZero},
+                           SPIRV::OpVectorExtractDynamic);
+    }
+
+    auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(),
+                       TII.get(SPIRV::OpVectorShuffle))
+                   .addDef(RightSideIn)
+                   .addUse(GR.getSPIRVTypeID(RightVecOpType))
+                   .addUse(SrcReg)
+                   // Per the spec, repeat the vector if only one vec is needed
+                   .addUse(SrcReg);
+
+    for (unsigned J = LeftComponentCount; J < ComponentCount; J++) {
+      MIB.addImm(J);
+    }
+
+    Result = Result && MIB.constrainAllUses(TII, TRI, RBI);
+
+    // Recursively call selectFirstBitSet64 on the 2 registers
+    Register LeftSideOut =
+        MRI->createVirtualRegister(GR.getRegClass(LeftVecResType));
+    Register RightSideOut =
+        MRI->createVirtualRegister(GR.getRegClass(RightVecResType));
+    Result = Result &&
+             selectFirstBitSet64(LeftSideOut, LeftVecResType, I, LeftSideIn,
+                                 BitSetOpcode, SwapPrimarySide);
+    Result = Result &&
+             selectFirstBitSet64(RightSideOut, RightVecResType, I, RightSideIn,
+                                 BitSetOpcode, SwapPrimarySide);
+
+    // Join the two resulting registers back into the return type
+    // (ie i32x2, i32x2 -> i32x4)
+    return Result &&
+           selectOpWithSrcs(ResVReg, ResType, I, {LeftSideOut, RightSideOut},
+                            SPIRV::OpCompositeConstruct);
+  }
+
+  // 1. Split int64 into 2 pieces using a bitcast
   MachineIRBuilder MIRBuilder(I);
-  SPIRVType *postCastT =
-      GR.getOrCreateSPIRVVectorType(baseType, 2 * count, MIRBuilder);
-  Register bitcastReg = MRI->createVirtualRegister(GR.getRegClass(postCastT));
+  SPIRVType *PostCastType =
+      GR.getOrCreateSPIRVVectorType(BaseType, 2 * ComponentCount, MIRBuilder);
+  Register BitcastReg =
+      MRI->createVirtualRegister(GR.getRegClass(PostCastType));
   bool Result =
-      selectOpWithSrcs(bitcastReg, postCastT, I, {OpReg}, SPIRV::OpBitcast);
+      selectOpWithSrcs(BitcastReg, PostCastType, I, {SrcReg}, SPIRV::OpBitcast);
 
-  // 2. call firstbithigh
-  Register FBHReg = MRI->createVirtualRegister(GR.getRegClass(postCastT));
-  Result &= selectFirstBitHigh32(FBHReg, postCastT, I, bitcastReg, IsSigned);
+  // 2. Find the first set bit from the primary side for all the pieces in #1
+  Register FBSReg = MRI->createVirtualRegister(GR.getRegClass(PostCastType));
+  Result = Result && selectFirstBitSet32(FBSReg, PostCastType, I, BitcastReg,
+                                         BitSetOpcode);
 
-  // 3. split result vector into high bits and low bits
+  // 3. Split result vector into high bits and low bits
   Register HighReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
   Register LowReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
 
-  bool ZeroAsNull = STI.isOpenCLEnv();
-  bool isScalarRes = ResType->getOpcode() != SPIRV::OpTypeVector;
-  if (isScalarRes) {
+  bool IsScalarRes = ResType->getOpcode() != SPIRV::OpTypeVector;
+  if (IsScalarRes) {
     // if scalar do a vector extract
-    Result &= selectOpWithSrcs(
-        HighReg, ResType, I,
-        {FBHReg, GR.getOrCreateConstInt(0, I, ResType, TII, ZeroAsNull)},
-        SPIRV::OpVectorExtractDynamic);
-    Result &= selectOpWithSrcs(
-        LowReg, ResType, I,
-        {FBHReg, GR.getOrCreateConstInt(1, I, ResType, TII, ZeroAsNull)},
-        SPIRV::OpVectorExtractDynamic);
-  } else { // vector case do a shufflevector
+    Result =
+        Result && selectOpWithSrcs(HighReg, ResType, I, {FBSReg, ConstIntZero},
+                                   SPIRV::OpVectorExtractDynamic);
+    Result =
+        Result && selectOpWithSrcs(LowReg, ResType, I, {FBSReg, ConstIntOne},
+                                   SPIRV::OpVectorExtractDynamic);
+  } else {
+    // if vector do a shufflevector
     auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(),
                        TII.get(SPIRV::OpVectorShuffle))
                    .addDef(HighReg)
                    .addUse(GR.getSPIRVTypeID(ResType))
-                   .addUse(FBHReg)
-                   .addUse(FBHReg);
-    // ^^ this vector will not be selected from; could be empty
-    unsigned j;
-    for (j = 0; j < count * 2; j += 2) {
-      MIB.addImm(j);
+                   .addUse(FBSReg)
+                   // Per the spec, repeat the vector if only one vec is needed
+                   .addUse(FBSReg);
+
+    // high bits are stored in even indexes. Extract them from FBSReg
+    for (unsigned J = 0; J < ComponentCount * 2; J += 2) {
+      MIB.addImm(J);
     }
-    Result &= MIB.constrainAllUses(TII, TRI, RBI);
+    Result = Result && MIB.constrainAllUses(TII, TRI, RBI);
 
-    // get low bits
     MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(),
                   TII.get(SPIRV::OpVectorShuffle))
               .addDef(LowReg)
               .addUse(GR.getSPIRVTypeID(ResType))
-              .addUse(FBHReg)
-              .addUse(FBHReg);
-    // ^^ this vector will not be selected from; could be empty
-    for (j = 1; j < count * 2; j += 2) {
-      MIB.addImm(j);
+              .addUse(FBSReg)
+              // Per the spec, repeat the vector if only one vec is needed
+              .addUse(FBSReg);
+
+    // low bits are stored in odd indexes. Extract them from FBSReg
+    for (unsigned J = 1; J < ComponentCount * 2; J += 2) {
+      MIB.addImm(J);
     }
-    Result &= MIB.constrainAllUses(TII, TRI, RBI);
+    Result = Result && MIB.constrainAllUses(TII, TRI, RBI);
   }
 
-  // 4. check if result of each top 32 bits is == -1
+  // 4. Check the result. When primary bits == -1 use secondary, otherwise use
+  // primary
   SPIRVType *BoolType = GR.getOrCreateSPIRVBoolType(I, TII);
   Register NegOneReg;
   Register Reg0;
   Register Reg32;
-  unsigned selectOp;
-  unsigned addOp;
-  if (isScalarRes) {
+  unsigned SelectOp;
+  unsigned AddOp;
+
+  if (IsScalarRes) {
     NegOneReg =
         GR.getOrCreateConstInt((unsigned)-1, I, ResType, TII, ZeroAsNull);
     Reg0 = GR.getOrCreateConstInt(0, I, ResType, TII, ZeroAsNull);
     Reg32 = GR.getOrCreateConstInt(32, I, ResType, TII, ZeroAsNull);
-    selectOp = SPIRV::OpSelectSISCond;
-    addOp = SPIRV::OpIAddS;
+    SelectOp = SPIRV::OpSelectSISCond;
+    AddOp = SPIRV::OpIAddS;
   } else {
-    BoolType = GR.getOrCreateSPIRVVectorType(BoolType, count, MIRBuilder);
+    BoolType =
+        GR.getOrCreateSPIRVVectorType(BoolType, ComponentCount, MIRBuilder);
     NegOneReg =
         GR.getOrCreateConstVector((unsigned)-1, I, ResType, TII, ZeroAsNull);
     Reg0 = GR.getOrCreateConstVector(0, I, ResType, TII, ZeroAsNull);
     Reg32 = GR.getOrCreateConstVector(32, I, ResType, TII, ZeroAsNull);
-    selectOp = SPIRV::OpSelectVIVCond;
-    addOp = SPIRV::OpIAddV;
+    SelectOp = SPIRV::OpSelectVIVCond;
+    AddOp = SPIRV::OpIAddV;
+  }
+
+  Register PrimaryReg;
+  Register SecondaryReg;
+  Register PrimaryShiftReg;
+  Register SecondaryShiftReg;
+  if (SwapPrimarySide) {
----------------
spall wrote:

Can you leave a comment explaining what SwapPrimarySide is about

https://github.com/llvm/llvm-project/pull/116858


More information about the llvm-commits mailing list