[clang] [llvm] [HLSL] Implement elementwise firstbitlow builtin (PR #116858)

Thu Dec 12 14:41:15 PST 2024

================
@@ -3166,109 +3171,228 @@ bool SPIRVInstructionSelector::selectFirstBitHigh32(Register ResVReg,
       .constrainAllUses(TII, TRI, RBI);
 }
 
-bool SPIRVInstructionSelector::selectFirstBitHigh64(Register ResVReg,
-                                                    const SPIRVType *ResType,
-                                                    MachineInstr &I,
-                                                    bool IsSigned) const {
-  Register OpReg = I.getOperand(2).getReg();
-  // 1. split our int64 into 2 pieces using a bitcast
-  unsigned count = GR.getScalarOrVectorComponentCount(ResType);
-  SPIRVType *baseType = GR.retrieveScalarOrVectorIntType(ResType);
+bool SPIRVInstructionSelector::selectFirstBitSet64(
+    Register ResVReg, const SPIRVType *ResType, MachineInstr &I,
+    Register SrcReg, unsigned BitSetOpcode, bool SwapPrimarySide) const {
+  unsigned ComponentCount = GR.getScalarOrVectorComponentCount(ResType);
+  SPIRVType *BaseType = GR.retrieveScalarOrVectorIntType(ResType);
+  bool ZeroAsNull = STI.isOpenCLEnv();
+  Register ConstIntZero =
+      GR.getOrCreateConstInt(0, I, BaseType, TII, ZeroAsNull);
+  Register ConstIntOne =
+      GR.getOrCreateConstInt(1, I, BaseType, TII, ZeroAsNull);
+
+  // SPIRV doesn't support vectors with more than 4 components. Since the
+  // algoritm below converts i64 -> i32x2 and i64x4 -> i32x8 it can only
+  // operate on vectors with 2 or less components. When largers vectors are
+  // seen. Split them, recurse, then recombine them.
+  if (ComponentCount > 2) {
+    unsigned LeftComponentCount = ComponentCount / 2;
+    unsigned RightComponentCount = ComponentCount - LeftComponentCount;
+    bool LeftIsVector = LeftComponentCount > 1;
+
+    // Split the SrcReg in half into 2 smaller vec registers
+    // (ie i64x4 -> i64x2, i64x2)
+    MachineIRBuilder MIRBuilder(I);
+    SPIRVType *OpType = GR.getOrCreateSPIRVIntegerType(64, MIRBuilder);
+    SPIRVType *LeftVecOpType;
+    SPIRVType *LeftVecResType;
+    if (LeftIsVector) {
+      LeftVecOpType =
+          GR.getOrCreateSPIRVVectorType(OpType, LeftComponentCount, MIRBuilder);
+      LeftVecResType = GR.getOrCreateSPIRVVectorType(
+          BaseType, LeftComponentCount, MIRBuilder);
+    } else {
+      LeftVecOpType = OpType;
+      LeftVecResType = BaseType;
+    }
+
+    SPIRVType *RightVecOpType =
+        GR.getOrCreateSPIRVVectorType(OpType, RightComponentCount, MIRBuilder);
+    SPIRVType *RightVecResType = GR.getOrCreateSPIRVVectorType(
+        BaseType, RightComponentCount, MIRBuilder);
+
+    Register LeftSideIn =
+        MRI->createVirtualRegister(GR.getRegClass(LeftVecOpType));
+    Register RightSideIn =
+        MRI->createVirtualRegister(GR.getRegClass(RightVecOpType));
+
+    bool Result;
+
+    if (LeftIsVector) {
+      auto MIB =
+          BuildMI(*I.getParent(), I, I.getDebugLoc(),
+                  TII.get(SPIRV::OpVectorShuffle))
+              .addDef(LeftSideIn)
+              .addUse(GR.getSPIRVTypeID(LeftVecOpType))
+              .addUse(SrcReg)
+              // Per the spec, repeat the vector if only one vec is needed
+              .addUse(SrcReg);
+
+      for (unsigned J = 0; J < LeftComponentCount; J++) {
+        MIB.addImm(J);
+      }
+
+      Result = MIB.constrainAllUses(TII, TRI, RBI);
+    } else {
+      Result =
+          selectOpWithSrcs(LeftSideIn, LeftVecOpType, I, {SrcReg, ConstIntZero},
+                           SPIRV::OpVectorExtractDynamic);
+    }
+
+    auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(),
+                       TII.get(SPIRV::OpVectorShuffle))
+                   .addDef(RightSideIn)
+                   .addUse(GR.getSPIRVTypeID(RightVecOpType))
+                   .addUse(SrcReg)
+                   // Per the spec, repeat the vector if only one vec is needed
+                   .addUse(SrcReg);
+
+    for (unsigned J = LeftComponentCount; J < ComponentCount; J++) {
+      MIB.addImm(J);
+    }
+
+    Result = Result && MIB.constrainAllUses(TII, TRI, RBI);
+
+    // Recursively call selectFirstBitSet64 on the 2 registers
+    Register LeftSideOut =
+        MRI->createVirtualRegister(GR.getRegClass(LeftVecResType));
+    Register RightSideOut =
+        MRI->createVirtualRegister(GR.getRegClass(RightVecResType));
+    Result = Result &&
+             selectFirstBitSet64(LeftSideOut, LeftVecResType, I, LeftSideIn,
+                                 BitSetOpcode, SwapPrimarySide);
+    Result = Result &&
+             selectFirstBitSet64(RightSideOut, RightVecResType, I, RightSideIn,
+                                 BitSetOpcode, SwapPrimarySide);
+
+    // Join the two resulting registers back into the return type
+    // (ie i32x2, i32x2 -> i32x4)
+    return Result &&
+           selectOpWithSrcs(ResVReg, ResType, I, {LeftSideOut, RightSideOut},
+                            SPIRV::OpCompositeConstruct);
+  }
+
+  // 1. Split int64 into 2 pieces using a bitcast
----------------
spall wrote:

I wonder if you could put this code below, the one that works on the vectors guaranteed to be size 2 or smaller in its own function, just to make this one shorter. And call this new function above instead of recursing. 

https://github.com/llvm/llvm-project/pull/116858