[llvm] [AMDGPU] Enable vectorization of i8 values. (PR #134934)

Tue Apr 15 09:13:49 PDT 2025

================
@@ -1423,3 +1430,25 @@ void GCNTTIImpl::collectKernelLaunchBounds(
   LB.push_back({"amdgpu-waves-per-eu[0]", WavesPerEU.first});
   LB.push_back({"amdgpu-waves-per-eu[1]", WavesPerEU.second});
 }
+
+InstructionCost GCNTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
+                                            Align Alignment,
+                                            unsigned AddressSpace,
+                                            TTI::TargetCostKind CostKind,
+                                            TTI::OperandValueInfo OpInfo,
+                                            const Instruction *I) {
+  VectorType *VecTy = dyn_cast<VectorType>(Src);
+  if (VecTy && Opcode == Instruction::Load &&
+      VecTy->getElementType() == IntegerType::getInt8Ty(VecTy->getContext()))
+    return 1;
+  return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind,
+                                OpInfo, I);
+}
+
+unsigned GCNTTIImpl::getNumberOfParts(Type *Tp) {
+  VectorType *VecTy = dyn_cast<VectorType>(Tp);
+  if (VecTy &&
+      VecTy->getElementType() == IntegerType::getInt8Ty(VecTy->getContext()))
+    return 1;
+  return BaseT::getNumberOfParts(Tp);
+}
----------------
shiltian wrote:

Missing empty line at the end

https://github.com/llvm/llvm-project/pull/134934