[Mlir-commits] [mlir] [mlir][Vector] Add fastmath flags to vector.reduction (PR #66905)

Wed Sep 20 06:42:17 PDT 2023

https://github.com/nicolasvasilache created https://github.com/llvm/llvm-project/pull/66905

This revision pipes the fastmath attribute support through the vector.reduction op. This seemingly simple first step already requires quite some genuflexions, file and builder reorganization. In the process, retire the boolean reassoc flag deep in the LLVM dialect builders and just use the fastmath attribute.

During conversions, templated builders for predicated intrinsics are partially cleaned up. In the future, to finalize the cleanups, one should consider adding fastmath to the VPIntrinsic ops.

>From bd2d055940bf6d4e6c83f3dbb0803607e5befb6e Mon Sep 17 00:00:00 2001
From: Nicolas Vasilache <nicolasvasilache at users.noreply.github.com>
Date: Wed, 20 Sep 2023 12:20:40 +0200
Subject: [PATCH] [mlir][Vector] Add fastmath flags to vector.reduction

This revision pipes the fastmath attribute support through the vector.reduction op.
This seemingly simple first step already requires quite some genuflexions, file and builder reorganization.
In the process, retire the boolean reassoc flag deep in the LLVM dialect builders and just use the fastmath attribute.

During conversions, templated builders for predicated intrinsics are partially cleaned up.
In the future, to finalize the cleanups, one should consider adding fastmath to the VPIntrinsic ops.
---
 .../mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td   |  13 +-
 .../mlir/Dialect/Vector/IR/CMakeLists.txt     |  24 ++-
 mlir/include/mlir/Dialect/Vector/IR/Vector.td |  31 ++++
 .../Dialect/Vector/IR/VectorAttributes.td     |  85 ++++++++++
 .../mlir/Dialect/Vector/IR/VectorOps.h        |   7 +-
 .../mlir/Dialect/Vector/IR/VectorOps.td       | 107 +++---------
 .../VectorToLLVM/ConvertVectorToLLVM.cpp      | 156 +++++++++---------
 mlir/lib/Dialect/Vector/IR/CMakeLists.txt     |   2 +-
 mlir/lib/Dialect/Vector/IR/VectorOps.cpp      |  28 +++-
 mlir/python/mlir/dialects/VectorOps.td        |  10 +-
 10 files changed, 264 insertions(+), 199 deletions(-)
 create mode 100644 mlir/include/mlir/Dialect/Vector/IR/Vector.td
 create mode 100644 mlir/include/mlir/Dialect/Vector/IR/VectorAttributes.td

diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
index 51017b5e050ffef..5af84c9e8646f5a 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
@@ -656,8 +656,9 @@ class LLVM_VecReductionI<string mnem>
 class LLVM_VecReductionAccBase<string mnem, Type element>
     : LLVM_OneResultIntrOp<"vector.reduce." # mnem, [], [0],
                            [Pure, SameOperandsAndResultElementType]>,
-      Arguments<(ins element:$start_value, LLVM_VectorOf<element>:$input,
-                 DefaultValuedAttr<BoolAttr, "false">:$reassoc)> {
+      Arguments<(ins element:$start_value,
+                     LLVM_VectorOf<element>:$input,
+                     DefaultValuedAttr<LLVM_FastmathFlagsAttr, "{}">:$fastmathFlags)> {
   let llvmBuilder = [{
     llvm::Module *module = builder.GetInsertBlock()->getModule();
     llvm::Function *fn = llvm::Intrinsic::getDeclaration(
@@ -667,17 +668,11 @@ class LLVM_VecReductionAccBase<string mnem, Type element>
                            ", ") # [{
         });
     auto operands = moduleTranslation.lookupValues(opInst.getOperands());
-    llvm::FastMathFlags origFM = builder.getFastMathFlags();
-    llvm::FastMathFlags tempFM = origFM;
-    tempFM.setAllowReassoc($reassoc);
-    builder.setFastMathFlags(tempFM);  // set fastmath flag
     $res = builder.CreateCall(fn, operands);
-    builder.setFastMathFlags(origFM);  // restore fastmath flag
   }];
   let mlirBuilder = [{
-    bool allowReassoc = inst->getFastMathFlags().allowReassoc();
     $res = $_builder.create<$_qualCppClassName>($_location,
-      $_resultType, $start_value, $input, allowReassoc);
+      $_resultType, $start_value, $input, inst->getFastMathFlags());
   }];
 }
 
diff --git a/mlir/include/mlir/Dialect/Vector/IR/CMakeLists.txt b/mlir/include/mlir/Dialect/Vector/IR/CMakeLists.txt
index 2e56afe727ac0c9..23bed7e0f447e42 100644
--- a/mlir/include/mlir/Dialect/Vector/IR/CMakeLists.txt
+++ b/mlir/include/mlir/Dialect/Vector/IR/CMakeLists.txt
@@ -1,10 +1,18 @@
-add_mlir_dialect(VectorOps vector)
-add_mlir_doc(VectorOps VectorOps Dialects/ -gen-op-doc)
+add_mlir_dialect(Vector vector)
+add_mlir_doc(Vector Vector Dialects/ -gen-op-doc -dialect=vector)
 
+# Add Vector operations
 set(LLVM_TARGET_DEFINITIONS VectorOps.td)
-mlir_tablegen(VectorOpsEnums.h.inc -gen-enum-decls)
-mlir_tablegen(VectorOpsEnums.cpp.inc -gen-enum-defs)
-mlir_tablegen(VectorOpsAttrDefs.h.inc -gen-attrdef-decls)
-mlir_tablegen(VectorOpsAttrDefs.cpp.inc -gen-attrdef-defs)
-add_public_tablegen_target(MLIRVectorOpsEnumsIncGen)
-add_dependencies(mlir-headers MLIRVectorOpsEnumsIncGen)
+mlir_tablegen(VectorOps.h.inc -gen-op-decls)
+mlir_tablegen(VectorOps.cpp.inc -gen-op-defs)
+add_public_tablegen_target(MLIRVectorOpsIncGen)
+add_dependencies(mlir-generic-headers MLIRVectorOpsIncGen)
+
+# Add Vector attributes
+set(LLVM_TARGET_DEFINITIONS VectorAttributes.td)
+mlir_tablegen(VectorEnums.h.inc -gen-enum-decls)
+mlir_tablegen(VectorEnums.cpp.inc -gen-enum-defs)
+mlir_tablegen(VectorAttributes.h.inc -gen-attrdef-decls)
+mlir_tablegen(VectorAttributes.cpp.inc -gen-attrdef-defs)
+add_public_tablegen_target(MLIRVectorAttributesIncGen)
+add_dependencies(mlir-generic-headers MLIRVectorAttributesIncGen)
diff --git a/mlir/include/mlir/Dialect/Vector/IR/Vector.td b/mlir/include/mlir/Dialect/Vector/IR/Vector.td
new file mode 100644
index 000000000000000..c439ca083e2e09b
--- /dev/null
+++ b/mlir/include/mlir/Dialect/Vector/IR/Vector.td
@@ -0,0 +1,31 @@
+//===- Vector.td - Vector Dialect --------------------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Vector dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_VECTOR_IR_VECTOR
+#define MLIR_DIALECT_VECTOR_IR_VECTOR
+
+include "mlir/IR/OpBase.td"
+
+def Vector_Dialect : Dialect {
+  let name = "vector";
+  let cppNamespace = "::mlir::vector";
+
+  let useDefaultAttributePrinterParser = 1;
+  let hasConstantMaterializer = 1;
+  let dependentDialects = ["arith::ArithDialect"];
+}
+
+// Base class for Vector dialect ops.
+class Vector_Op<string mnemonic, list<Trait> traits = []> :
+    Op<Vector_Dialect, mnemonic, traits>;
+
+#endif // MLIR_DIALECT_VECTOR_IR_VECTOR
diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorAttributes.td b/mlir/include/mlir/Dialect/Vector/IR/VectorAttributes.td
new file mode 100644
index 000000000000000..2db944b4ceaf139
--- /dev/null
+++ b/mlir/include/mlir/Dialect/Vector/IR/VectorAttributes.td
@@ -0,0 +1,85 @@
+//===- VectorAttributes.td - Vector Dialect ----------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the attributes used in the Vector dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_VECTOR_IR_VECTOR_ATTRIBUTES
+#define MLIR_DIALECT_VECTOR_IR_VECTOR_ATTRIBUTES
+
+include "Vector.td"
+include "mlir/IR/EnumAttr.td"
+
+// The "kind" of combining function for contractions and reductions.
+def COMBINING_KIND_ADD : I32BitEnumAttrCaseBit<"ADD", 0, "add">;
+def COMBINING_KIND_MUL : I32BitEnumAttrCaseBit<"MUL", 1, "mul">;
+def COMBINING_KIND_MINUI : I32BitEnumAttrCaseBit<"MINUI", 2, "minui">;
+def COMBINING_KIND_MINSI : I32BitEnumAttrCaseBit<"MINSI", 3, "minsi">;
+def COMBINING_KIND_MINF : I32BitEnumAttrCaseBit<"MINF", 4, "minf">;
+def COMBINING_KIND_MAXUI : I32BitEnumAttrCaseBit<"MAXUI", 5, "maxui">;
+def COMBINING_KIND_MAXSI : I32BitEnumAttrCaseBit<"MAXSI", 6, "maxsi">;
+def COMBINING_KIND_MAXF : I32BitEnumAttrCaseBit<"MAXF", 7, "maxf">;
+def COMBINING_KIND_AND : I32BitEnumAttrCaseBit<"AND", 8, "and">;
+def COMBINING_KIND_OR  : I32BitEnumAttrCaseBit<"OR", 9, "or">;
+def COMBINING_KIND_XOR : I32BitEnumAttrCaseBit<"XOR", 10, "xor">;
+def COMBINING_KIND_MINIMUMF : I32BitEnumAttrCaseBit<"MINIMUMF", 11, "minimumf">;
+def COMBINING_KIND_MAXIMUMF : I32BitEnumAttrCaseBit<"MAXIMUMF", 12, "maximumf">;
+
+def CombiningKind : I32BitEnumAttr<
+    "CombiningKind",
+    "Kind of combining function for contractions and reductions",
+    [COMBINING_KIND_ADD, COMBINING_KIND_MUL, COMBINING_KIND_MINUI,
+     COMBINING_KIND_MINSI, COMBINING_KIND_MINF, COMBINING_KIND_MAXUI,
+     COMBINING_KIND_MAXSI, COMBINING_KIND_MAXF, COMBINING_KIND_AND,
+     COMBINING_KIND_OR, COMBINING_KIND_XOR,
+     COMBINING_KIND_MAXIMUMF, COMBINING_KIND_MINIMUMF]> {
+  let cppNamespace = "::mlir::vector";
+  let genSpecializedAttr = 0;
+}
+
+/// An attribute that specifies the combining function for `vector.contract`,
+/// and `vector.reduction`.
+def Vector_CombiningKindAttr : EnumAttr<Vector_Dialect, CombiningKind, "kind"> {
+  let assemblyFormat = "`<` $value `>`";
+}
+
+def Vector_IteratorType : I32EnumAttr<"IteratorType", "Iterator type", [
+  I32EnumAttrCase<"parallel", 0>,
+  I32EnumAttrCase<"reduction", 1>
+]> {
+    let genSpecializedAttr = 0;
+    let cppNamespace = "::mlir::vector";
+}
+
+def Vector_IteratorTypeEnum
+    : EnumAttr<Vector_Dialect, Vector_IteratorType, "iterator_type"> {
+    let assemblyFormat = "`<` $value `>`";
+}
+
+def Vector_IteratorTypeArrayAttr
+    : TypedArrayAttrBase<Vector_IteratorTypeEnum,
+                         "Iterator type should be an enum.">;
+
+def PrintPunctuation : I32EnumAttr<"PrintPunctuation",
+                                  "Punctuation for separating vectors or vector elements", [
+  I32EnumAttrCase<"NoPunctuation", 0, "no_punctuation">,
+  I32EnumAttrCase<"NewLine", 1, "newline">,
+  I32EnumAttrCase<"Comma", 2, "comma">,
+  I32EnumAttrCase<"Open", 3, "open">,
+  I32EnumAttrCase<"Close", 4, "close">
+]> {
+  let cppNamespace = "::mlir::vector";
+  let genSpecializedAttr = 0;
+}
+
+def Vector_PrintPunctuation : EnumAttr<Vector_Dialect, PrintPunctuation, "punctuation"> {
+  let assemblyFormat = "`<` $value `>`";
+}
+
+#endif // MLIR_DIALECT_VECTOR_IR_VECTOR_ATTRIBUTES
diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.h b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.h
index 4a624bd5f1ccdc8..fcf7eb4a616b073 100644
--- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.h
+++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.h
@@ -14,6 +14,7 @@
 #define MLIR_DIALECT_VECTOR_IR_VECTOROPS_H
 
 #include "mlir/Bytecode/BytecodeOpInterface.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/Vector/Interfaces/MaskableOpInterface.h"
 #include "mlir/Dialect/Vector/Interfaces/MaskingOpInterface.h"
 #include "mlir/IR/AffineMap.h"
@@ -31,10 +32,10 @@
 #include "llvm/ADT/StringExtras.h"
 
 // Pull in all enum type definitions and utility function declarations.
-#include "mlir/Dialect/Vector/IR/VectorOpsEnums.h.inc"
+#include "mlir/Dialect/Vector/IR/VectorEnums.h.inc"
 
 #define GET_ATTRDEF_CLASSES
-#include "mlir/Dialect/Vector/IR/VectorOpsAttrDefs.h.inc"
+#include "mlir/Dialect/Vector/IR/VectorAttributes.h.inc"
 
 namespace mlir {
 class MLIRContext;
@@ -157,7 +158,7 @@ Value selectPassthru(OpBuilder &builder, Value mask, Value newValue,
 } // namespace mlir
 
 #define GET_OP_CLASSES
+#include "mlir/Dialect/Vector/IR/VectorDialect.h.inc"
 #include "mlir/Dialect/Vector/IR/VectorOps.h.inc"
-#include "mlir/Dialect/Vector/IR/VectorOpsDialect.h.inc"
 
 #endif // MLIR_DIALECT_VECTOR_IR_VECTOROPS_H
diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
index 28b5864914f6920..ab77c31b418e537 100644
--- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
+++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
@@ -10,9 +10,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef VECTOR_OPS
-#define VECTOR_OPS
+#ifndef MLIR_DIALECT_VECTOR_IR_VECTOR_OPS
+#define MLIR_DIALECT_VECTOR_IR_VECTOR_OPS
 
+include "Vector.td"
+include "VectorAttributes.td"
+include "mlir/Dialect/Arith/IR/ArithBase.td"
+include "mlir/Dialect/Arith/IR/ArithOpsInterfaces.td"
 include "mlir/Dialect/Vector/Interfaces/MaskableOpInterface.td"
 include "mlir/Dialect/Vector/Interfaces/MaskingOpInterface.td"
 include "mlir/IR/EnumAttr.td"
@@ -23,69 +27,6 @@ include "mlir/Interfaces/SideEffectInterfaces.td"
 include "mlir/Interfaces/VectorInterfaces.td"
 include "mlir/Interfaces/ViewLikeInterface.td"
 
-def Vector_Dialect : Dialect {
-  let name = "vector";
-  let cppNamespace = "::mlir::vector";
-
-  let useDefaultAttributePrinterParser = 1;
-  let hasConstantMaterializer = 1;
-  let dependentDialects = ["arith::ArithDialect"];
-}
-
-// Base class for Vector dialect ops.
-class Vector_Op<string mnemonic, list<Trait> traits = []> :
-    Op<Vector_Dialect, mnemonic, traits>;
-
-// The "kind" of combining function for contractions and reductions.
-def COMBINING_KIND_ADD : I32BitEnumAttrCaseBit<"ADD", 0, "add">;
-def COMBINING_KIND_MUL : I32BitEnumAttrCaseBit<"MUL", 1, "mul">;
-def COMBINING_KIND_MINUI : I32BitEnumAttrCaseBit<"MINUI", 2, "minui">;
-def COMBINING_KIND_MINSI : I32BitEnumAttrCaseBit<"MINSI", 3, "minsi">;
-def COMBINING_KIND_MINF : I32BitEnumAttrCaseBit<"MINF", 4, "minf">;
-def COMBINING_KIND_MAXUI : I32BitEnumAttrCaseBit<"MAXUI", 5, "maxui">;
-def COMBINING_KIND_MAXSI : I32BitEnumAttrCaseBit<"MAXSI", 6, "maxsi">;
-def COMBINING_KIND_MAXF : I32BitEnumAttrCaseBit<"MAXF", 7, "maxf">;
-def COMBINING_KIND_AND : I32BitEnumAttrCaseBit<"AND", 8, "and">;
-def COMBINING_KIND_OR  : I32BitEnumAttrCaseBit<"OR", 9, "or">;
-def COMBINING_KIND_XOR : I32BitEnumAttrCaseBit<"XOR", 10, "xor">;
-def COMBINING_KIND_MINIMUMF : I32BitEnumAttrCaseBit<"MINIMUMF", 11, "minimumf">;
-def COMBINING_KIND_MAXIMUMF : I32BitEnumAttrCaseBit<"MAXIMUMF", 12, "maximumf">;
-
-def CombiningKind : I32BitEnumAttr<
-    "CombiningKind",
-    "Kind of combining function for contractions and reductions",
-    [COMBINING_KIND_ADD, COMBINING_KIND_MUL, COMBINING_KIND_MINUI,
-     COMBINING_KIND_MINSI, COMBINING_KIND_MINF, COMBINING_KIND_MAXUI,
-     COMBINING_KIND_MAXSI, COMBINING_KIND_MAXF, COMBINING_KIND_AND,
-     COMBINING_KIND_OR, COMBINING_KIND_XOR,
-     COMBINING_KIND_MAXIMUMF, COMBINING_KIND_MINIMUMF]> {
-  let cppNamespace = "::mlir::vector";
-  let genSpecializedAttr = 0;
-}
-
-/// An attribute that specifies the combining function for `vector.contract`,
-/// and `vector.reduction`.
-def Vector_CombiningKindAttr : EnumAttr<Vector_Dialect, CombiningKind, "kind"> {
-  let assemblyFormat = "`<` $value `>`";
-}
-
-def Vector_IteratorType : I32EnumAttr<"IteratorType", "Iterator type", [
-  I32EnumAttrCase<"parallel", 0>,
-  I32EnumAttrCase<"reduction", 1>
-]> {
-    let genSpecializedAttr = 0;
-    let cppNamespace = "::mlir::vector";
-}
-
-def Vector_IteratorTypeEnum
-    : EnumAttr<Vector_Dialect, Vector_IteratorType, "iterator_type"> {
-    let assemblyFormat = "`<` $value `>`";
-}
-
-def Vector_IteratorTypeArrayAttr
-    : TypedArrayAttrBase<Vector_IteratorTypeEnum,
-                         "Iterator type should be an enum.">;
-
 // TODO: Add an attribute to specify a different algebra with operators other
 // than the current set: {*, +}.
 def Vector_ContractionOp :
@@ -274,12 +215,16 @@ def Vector_ReductionOp :
   Vector_Op<"reduction", [Pure,
      PredOpTrait<"source operand and result have same element type",
                  TCresVTEtIsSameAsOpBase<0, 0>>,
+     DeclareOpInterfaceMethods<ArithFastMathInterface>,
      DeclareOpInterfaceMethods<MaskableOpInterface>,
-     DeclareOpInterfaceMethods<VectorUnrollOpInterface,
-                               ["getShapeForUnroll"]>]>,
+     DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]>
+    ]>,
     Arguments<(ins Vector_CombiningKindAttr:$kind,
                AnyVectorOfAnyRank:$vector,
-               Optional<AnyType>:$acc)>,
+               Optional<AnyType>:$acc,
+               DefaultValuedAttr<
+                 Arith_FastMathAttr,
+                 "::mlir::arith::FastMathFlags::none">:$fastmath)>,
     Results<(outs AnyType:$dest)> {
   let summary = "reduction operation";
   let description = [{
@@ -309,9 +254,13 @@ def Vector_ReductionOp :
   }];
   let builders = [
     // Builder that infers the type of `dest`.
-    OpBuilder<(ins "CombiningKind":$kind, "Value":$vector, "Value":$acc)>,
+    OpBuilder<(ins "CombiningKind":$kind, "Value":$vector, "Value":$acc,
+                    CArg<"::mlir::arith::FastMathFlags",
+                         "::mlir::arith::FastMathFlags::none">:$fastMathFlags)>,
     // Builder that infers the type of `dest` and has no accumulator.
-    OpBuilder<(ins "CombiningKind":$kind, "Value":$vector)>
+    OpBuilder<(ins "CombiningKind":$kind, "Value":$vector,
+                    CArg<"::mlir::arith::FastMathFlags",
+                         "::mlir::arith::FastMathFlags::none">:$fastMathFlags)>
   ];
 
   // TODO: Migrate to assemblyFormat once `AllTypesMatch` supports optional
@@ -2466,22 +2415,6 @@ def Vector_TransposeOp :
   let hasVerifier = 1;
 }
 
-def PrintPunctuation : I32EnumAttr<"PrintPunctuation",
-                                  "Punctuation for separating vectors or vector elements", [
-  I32EnumAttrCase<"NoPunctuation", 0, "no_punctuation">,
-  I32EnumAttrCase<"NewLine", 1, "newline">,
-  I32EnumAttrCase<"Comma", 2, "comma">,
-  I32EnumAttrCase<"Open", 3, "open">,
-  I32EnumAttrCase<"Close", 4, "close">
-]> {
-  let cppNamespace = "::mlir::vector";
-  let genSpecializedAttr = 0;
-}
-
-def Vector_PrintPunctuation : EnumAttr<Vector_Dialect, PrintPunctuation, "punctuation"> {
-  let assemblyFormat = "`<` $value `>`";
-}
-
 def Vector_PrintOp :
   Vector_Op<"print", []>,
   Arguments<(ins Optional<Type<Or<[
@@ -2936,4 +2869,4 @@ def Vector_WarpExecuteOnLane0Op : Vector_Op<"warp_execute_on_lane_0",
   }];
 }
 
-#endif // VECTOR_OPS
+#endif // MLIR_DIALECT_VECTOR_IR_VECTOR_OPS
diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
index a979237d1f63e17..2c08257fc3089b6 100644
--- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
+++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
@@ -8,6 +8,7 @@
 
 #include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
 
+#include "mlir/Conversion/ArithCommon/AttrToLLVMConverter.h"
 #include "mlir/Conversion/LLVMCommon/TypeConverter.h"
 #include "mlir/Conversion/LLVMCommon/VectorPattern.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
@@ -592,11 +593,11 @@ struct VectorToScalarMapper<LLVM::vector_reduce_fmin> {
 } // namespace
 
 template <class LLVMRedIntrinOp>
-static Value
-createFPReductionComparisonOpLowering(ConversionPatternRewriter &rewriter,
-                                      Location loc, Type llvmType,
-                                      Value vectorOperand, Value accumulator) {
-  Value result = rewriter.create<LLVMRedIntrinOp>(loc, llvmType, vectorOperand);
+static Value createFPReductionComparisonOpLowering(
+    ConversionPatternRewriter &rewriter, Location loc, Type llvmType,
+    Value vectorOperand, Value accumulator, LLVM::FastmathFlagsAttr fmf) {
+  Value result =
+      rewriter.create<LLVMRedIntrinOp>(loc, llvmType, vectorOperand, fmf);
 
   if (accumulator) {
     result =
@@ -641,87 +642,72 @@ static Value createMaskNeutralValue(ConversionPatternRewriter &rewriter,
 /// `fmaximum`/`fminimum`.
 /// More information: https://github.com/llvm/llvm-project/issues/64940
 template <class LLVMRedIntrinOp, class MaskNeutral>
-static Value lowerMaskedReductionWithRegular(
-    ConversionPatternRewriter &rewriter, Location loc, Type llvmType,
-    Value vectorOperand, Value accumulator, Value mask) {
+static Value
+lowerMaskedReductionWithRegular(ConversionPatternRewriter &rewriter,
+                                Location loc, Type llvmType,
+                                Value vectorOperand, Value accumulator,
+                                Value mask, LLVM::FastmathFlagsAttr fmf) {
   const Value vectorMaskNeutral = createMaskNeutralValue<MaskNeutral>(
       rewriter, loc, llvmType, vectorOperand.getType());
   const Value selectedVectorByMask = rewriter.create<LLVM::SelectOp>(
       loc, mask, vectorOperand, vectorMaskNeutral);
   return createFPReductionComparisonOpLowering<LLVMRedIntrinOp>(
-      rewriter, loc, llvmType, selectedVectorByMask, accumulator);
+      rewriter, loc, llvmType, selectedVectorByMask, accumulator, fmf);
 }
 
-/// Overloaded methods to lower a reduction to an llvm instrinsic that requires
-/// a start value. This start value format spans across fp reductions without
-/// mask and all the masked reduction intrinsics.
-template <class LLVMVPRedIntrinOp, class ReductionNeutral>
-static Value lowerReductionWithStartValue(ConversionPatternRewriter &rewriter,
-                                          Location loc, Type llvmType,
-                                          Value vectorOperand,
-                                          Value accumulator) {
-  accumulator = getOrCreateAccumulator<ReductionNeutral>(rewriter, loc,
-                                                         llvmType, accumulator);
-  return rewriter.create<LLVMVPRedIntrinOp>(loc, llvmType,
-                                            /*startValue=*/accumulator,
-                                            vectorOperand);
-}
-
-template <class LLVMVPRedIntrinOp, class ReductionNeutral>
+template <class LLVMRedIntrinOp, class ReductionNeutral>
 static Value
 lowerReductionWithStartValue(ConversionPatternRewriter &rewriter, Location loc,
                              Type llvmType, Value vectorOperand,
-                             Value accumulator, bool reassociateFPReds) {
+                             Value accumulator, LLVM::FastmathFlagsAttr fmf) {
   accumulator = getOrCreateAccumulator<ReductionNeutral>(rewriter, loc,
                                                          llvmType, accumulator);
-  return rewriter.create<LLVMVPRedIntrinOp>(loc, llvmType,
-                                            /*startValue=*/accumulator,
-                                            vectorOperand, reassociateFPReds);
+  return rewriter.create<LLVMRedIntrinOp>(loc, llvmType,
+                                          /*startValue=*/accumulator,
+                                          vectorOperand, fmf);
 }
 
+/// Overloaded methods to lower a *predicated* reduction to an llvm instrinsic
+/// that requires a start value. This start value format spans across fp
+/// reductions without mask and all the masked reduction intrinsics.
 template <class LLVMVPRedIntrinOp, class ReductionNeutral>
-static Value lowerReductionWithStartValue(ConversionPatternRewriter &rewriter,
-                                          Location loc, Type llvmType,
-                                          Value vectorOperand,
-                                          Value accumulator, Value mask) {
+static Value
+lowerPredicatedReductionWithStartValue(ConversionPatternRewriter &rewriter,
+                                       Location loc, Type llvmType,
+                                       Value vectorOperand, Value accumulator) {
   accumulator = getOrCreateAccumulator<ReductionNeutral>(rewriter, loc,
                                                          llvmType, accumulator);
-  Value vectorLength =
-      createVectorLengthValue(rewriter, loc, vectorOperand.getType());
   return rewriter.create<LLVMVPRedIntrinOp>(loc, llvmType,
                                             /*startValue=*/accumulator,
-                                            vectorOperand, mask, vectorLength);
+                                            vectorOperand);
 }
 
 template <class LLVMVPRedIntrinOp, class ReductionNeutral>
-static Value lowerReductionWithStartValue(ConversionPatternRewriter &rewriter,
-                                          Location loc, Type llvmType,
-                                          Value vectorOperand,
-                                          Value accumulator, Value mask,
-                                          bool reassociateFPReds) {
+static Value lowerPredicatedReductionWithStartValue(
+    ConversionPatternRewriter &rewriter, Location loc, Type llvmType,
+    Value vectorOperand, Value accumulator, Value mask) {
   accumulator = getOrCreateAccumulator<ReductionNeutral>(rewriter, loc,
                                                          llvmType, accumulator);
   Value vectorLength =
       createVectorLengthValue(rewriter, loc, vectorOperand.getType());
   return rewriter.create<LLVMVPRedIntrinOp>(loc, llvmType,
                                             /*startValue=*/accumulator,
-                                            vectorOperand, mask, vectorLength,
-                                            reassociateFPReds);
+                                            vectorOperand, mask, vectorLength);
 }
 
 template <class LLVMIntVPRedIntrinOp, class IntReductionNeutral,
           class LLVMFPVPRedIntrinOp, class FPReductionNeutral>
-static Value lowerReductionWithStartValue(ConversionPatternRewriter &rewriter,
-                                          Location loc, Type llvmType,
-                                          Value vectorOperand,
-                                          Value accumulator, Value mask) {
+static Value lowerPredicatedReductionWithStartValue(
+    ConversionPatternRewriter &rewriter, Location loc, Type llvmType,
+    Value vectorOperand, Value accumulator, Value mask) {
   if (llvmType.isIntOrIndex())
-    return lowerReductionWithStartValue<LLVMIntVPRedIntrinOp,
-                                        IntReductionNeutral>(
+    return lowerPredicatedReductionWithStartValue<LLVMIntVPRedIntrinOp,
+                                                  IntReductionNeutral>(
         rewriter, loc, llvmType, vectorOperand, accumulator, mask);
 
   // FP dispatch.
-  return lowerReductionWithStartValue<LLVMFPVPRedIntrinOp, FPReductionNeutral>(
+  return lowerPredicatedReductionWithStartValue<LLVMFPVPRedIntrinOp,
+                                                FPReductionNeutral>(
       rewriter, loc, llvmType, vectorOperand, accumulator, mask);
 }
 
@@ -809,30 +795,39 @@ class VectorReductionOpConversion
     if (!isa<FloatType>(eltType))
       return failure();
 
+    arith::FastMathFlagsAttr fMFAttr = reductionOp.getFastMathFlagsAttr();
+    LLVM::FastmathFlagsAttr fmf = LLVM::FastmathFlagsAttr::get(
+        reductionOp.getContext(),
+        convertArithFastMathFlagsToLLVM(fMFAttr.getValue()));
+    fmf = LLVM::FastmathFlagsAttr::get(
+        reductionOp.getContext(),
+        fmf.getValue() | (reassociateFPReductions ? LLVM::FastmathFlags::reassoc
+                                                  : LLVM::FastmathFlags::none));
+
     // Floating-point reductions: add/mul/min/max
     Value result;
     if (kind == vector::CombiningKind::ADD) {
       result = lowerReductionWithStartValue<LLVM::vector_reduce_fadd,
                                             ReductionNeutralZero>(
-          rewriter, loc, llvmType, operand, acc, reassociateFPReductions);
+          rewriter, loc, llvmType, operand, acc, fmf);
     } else if (kind == vector::CombiningKind::MUL) {
       result = lowerReductionWithStartValue<LLVM::vector_reduce_fmul,
                                             ReductionNeutralFPOne>(
-          rewriter, loc, llvmType, operand, acc, reassociateFPReductions);
+          rewriter, loc, llvmType, operand, acc, fmf);
     } else if (kind == vector::CombiningKind::MINIMUMF) {
       result =
           createFPReductionComparisonOpLowering<LLVM::vector_reduce_fminimum>(
-              rewriter, loc, llvmType, operand, acc);
+              rewriter, loc, llvmType, operand, acc, fmf);
     } else if (kind == vector::CombiningKind::MAXIMUMF) {
       result =
           createFPReductionComparisonOpLowering<LLVM::vector_reduce_fmaximum>(
-              rewriter, loc, llvmType, operand, acc);
+              rewriter, loc, llvmType, operand, acc, fmf);
     } else if (kind == vector::CombiningKind::MINF) {
       result = createFPReductionComparisonOpLowering<LLVM::vector_reduce_fmin>(
-          rewriter, loc, llvmType, operand, acc);
+          rewriter, loc, llvmType, operand, acc, fmf);
     } else if (kind == vector::CombiningKind::MAXF) {
       result = createFPReductionComparisonOpLowering<LLVM::vector_reduce_fmax>(
-          rewriter, loc, llvmType, operand, acc);
+          rewriter, loc, llvmType, operand, acc, fmf);
     } else
       return failure();
 
@@ -893,74 +888,79 @@ class MaskedReductionOpConversion
     Value acc = reductionOp.getAcc();
     Location loc = reductionOp.getLoc();
 
+    arith::FastMathFlagsAttr fMFAttr = reductionOp.getFastMathFlagsAttr();
+    LLVM::FastmathFlagsAttr fmf = LLVM::FastmathFlagsAttr::get(
+        reductionOp.getContext(),
+        convertArithFastMathFlagsToLLVM(fMFAttr.getValue()));
+
     Value result;
     switch (kind) {
     case vector::CombiningKind::ADD:
-      result = lowerReductionWithStartValue<
+      result = lowerPredicatedReductionWithStartValue<
           LLVM::VPReduceAddOp, ReductionNeutralZero, LLVM::VPReduceFAddOp,
           ReductionNeutralZero>(rewriter, loc, llvmType, operand, acc,
                                 maskOp.getMask());
       break;
     case vector::CombiningKind::MUL:
-      result = lowerReductionWithStartValue<
+      result = lowerPredicatedReductionWithStartValue<
           LLVM::VPReduceMulOp, ReductionNeutralIntOne, LLVM::VPReduceFMulOp,
           ReductionNeutralFPOne>(rewriter, loc, llvmType, operand, acc,
                                  maskOp.getMask());
       break;
     case vector::CombiningKind::MINUI:
-      result = lowerReductionWithStartValue<LLVM::VPReduceUMinOp,
-                                            ReductionNeutralUIntMax>(
+      result = lowerPredicatedReductionWithStartValue<LLVM::VPReduceUMinOp,
+                                                      ReductionNeutralUIntMax>(
           rewriter, loc, llvmType, operand, acc, maskOp.getMask());
       break;
     case vector::CombiningKind::MINSI:
-      result = lowerReductionWithStartValue<LLVM::VPReduceSMinOp,
-                                            ReductionNeutralSIntMax>(
+      result = lowerPredicatedReductionWithStartValue<LLVM::VPReduceSMinOp,
+                                                      ReductionNeutralSIntMax>(
           rewriter, loc, llvmType, operand, acc, maskOp.getMask());
       break;
     case vector::CombiningKind::MAXUI:
-      result = lowerReductionWithStartValue<LLVM::VPReduceUMaxOp,
-                                            ReductionNeutralUIntMin>(
+      result = lowerPredicatedReductionWithStartValue<LLVM::VPReduceUMaxOp,
+                                                      ReductionNeutralUIntMin>(
           rewriter, loc, llvmType, operand, acc, maskOp.getMask());
       break;
     case vector::CombiningKind::MAXSI:
-      result = lowerReductionWithStartValue<LLVM::VPReduceSMaxOp,
-                                            ReductionNeutralSIntMin>(
+      result = lowerPredicatedReductionWithStartValue<LLVM::VPReduceSMaxOp,
+                                                      ReductionNeutralSIntMin>(
           rewriter, loc, llvmType, operand, acc, maskOp.getMask());
       break;
     case vector::CombiningKind::AND:
-      result = lowerReductionWithStartValue<LLVM::VPReduceAndOp,
-                                            ReductionNeutralAllOnes>(
+      result = lowerPredicatedReductionWithStartValue<LLVM::VPReduceAndOp,
+                                                      ReductionNeutralAllOnes>(
           rewriter, loc, llvmType, operand, acc, maskOp.getMask());
       break;
     case vector::CombiningKind::OR:
-      result = lowerReductionWithStartValue<LLVM::VPReduceOrOp,
-                                            ReductionNeutralZero>(
+      result = lowerPredicatedReductionWithStartValue<LLVM::VPReduceOrOp,
+                                                      ReductionNeutralZero>(
           rewriter, loc, llvmType, operand, acc, maskOp.getMask());
       break;
     case vector::CombiningKind::XOR:
-      result = lowerReductionWithStartValue<LLVM::VPReduceXorOp,
-                                            ReductionNeutralZero>(
+      result = lowerPredicatedReductionWithStartValue<LLVM::VPReduceXorOp,
+                                                      ReductionNeutralZero>(
           rewriter, loc, llvmType, operand, acc, maskOp.getMask());
       break;
     case vector::CombiningKind::MINF:
-      result = lowerReductionWithStartValue<LLVM::VPReduceFMinOp,
-                                            ReductionNeutralFPMax>(
+      result = lowerPredicatedReductionWithStartValue<LLVM::VPReduceFMinOp,
+                                                      ReductionNeutralFPMax>(
           rewriter, loc, llvmType, operand, acc, maskOp.getMask());
       break;
     case vector::CombiningKind::MAXF:
-      result = lowerReductionWithStartValue<LLVM::VPReduceFMaxOp,
-                                            ReductionNeutralFPMin>(
+      result = lowerPredicatedReductionWithStartValue<LLVM::VPReduceFMaxOp,
+                                                      ReductionNeutralFPMin>(
           rewriter, loc, llvmType, operand, acc, maskOp.getMask());
       break;
     case CombiningKind::MAXIMUMF:
       result = lowerMaskedReductionWithRegular<LLVM::vector_reduce_fmaximum,
                                                MaskNeutralFMaximum>(
-          rewriter, loc, llvmType, operand, acc, maskOp.getMask());
+          rewriter, loc, llvmType, operand, acc, maskOp.getMask(), fmf);
       break;
     case CombiningKind::MINIMUMF:
       result = lowerMaskedReductionWithRegular<LLVM::vector_reduce_fminimum,
                                                MaskNeutralFMinimum>(
-          rewriter, loc, llvmType, operand, acc, maskOp.getMask());
+          rewriter, loc, llvmType, operand, acc, maskOp.getMask(), fmf);
       break;
     }
 
diff --git a/mlir/lib/Dialect/Vector/IR/CMakeLists.txt b/mlir/lib/Dialect/Vector/IR/CMakeLists.txt
index 596f6422807cc60..9ec919423b3428f 100644
--- a/mlir/lib/Dialect/Vector/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/Vector/IR/CMakeLists.txt
@@ -8,7 +8,7 @@ add_mlir_dialect_library(MLIRVectorDialect
   MLIRMaskableOpInterfaceIncGen
   MLIRMaskingOpInterfaceIncGen
   MLIRVectorOpsIncGen
-  MLIRVectorOpsEnumsIncGen
+  MLIRVectorAttributesIncGen
 
   LINK_LIBS PUBLIC
   MLIRArithDialect
diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
index a8ad05f7bc1cabf..54a3de660850584 100644
--- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
+++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
@@ -42,9 +42,9 @@
 #include <cstdint>
 #include <numeric>
 
-#include "mlir/Dialect/Vector/IR/VectorOpsDialect.cpp.inc"
+#include "mlir/Dialect/Vector/IR/VectorDialect.cpp.inc"
 // Pull in all enum type and utility function definitions.
-#include "mlir/Dialect/Vector/IR/VectorOpsEnums.cpp.inc"
+#include "mlir/Dialect/Vector/IR/VectorEnums.cpp.inc"
 
 using namespace mlir;
 using namespace mlir::vector;
@@ -256,7 +256,7 @@ struct BitmaskEnumStorage : public AttributeStorage {
 void VectorDialect::initialize() {
   addAttributes<
 #define GET_ATTRDEF_LIST
-#include "mlir/Dialect/Vector/IR/VectorOpsAttrDefs.cpp.inc"
+#include "mlir/Dialect/Vector/IR/VectorAttributes.cpp.inc"
       >();
 
   addOperations<
@@ -415,15 +415,17 @@ void MultiDimReductionOp::getCanonicalizationPatterns(
 //===----------------------------------------------------------------------===//
 
 void vector::ReductionOp::build(OpBuilder &builder, OperationState &result,
-                                CombiningKind kind, Value vector) {
-  build(builder, result, kind, vector, /*acc=*/Value());
+                                CombiningKind kind, Value vector,
+                                arith::FastMathFlags fastMathFlags) {
+  build(builder, result, kind, vector, /*acc=*/Value(), fastMathFlags);
 }
 
 void vector::ReductionOp::build(OpBuilder &builder, OperationState &result,
-                                CombiningKind kind, Value vector, Value acc) {
+                                CombiningKind kind, Value vector, Value acc,
+                                arith::FastMathFlags fastMathFlags) {
   build(builder, result,
         llvm::cast<VectorType>(vector.getType()).getElementType(), kind, vector,
-        acc);
+        acc, fastMathFlags);
 }
 
 LogicalResult ReductionOp::verify() {
@@ -447,9 +449,13 @@ ParseResult ReductionOp::parse(OpAsmParser &parser, OperationState &result) {
   Type redType;
   Type resType;
   CombiningKindAttr kindAttr;
+  arith::FastMathFlagsAttr fastMathAttr;
   if (parser.parseCustomAttributeWithFallback(kindAttr, Type{}, "kind",
                                               result.attributes) ||
       parser.parseComma() || parser.parseOperandList(operandsInfo) ||
+      (succeeded(parser.parseOptionalKeyword("fastmath")) &&
+       parser.parseCustomAttributeWithFallback(fastMathAttr, Type{}, "fastmath",
+                                               result.attributes)) ||
       parser.parseColonType(redType) ||
       parser.parseKeywordType("into", resType) ||
       (!operandsInfo.empty() &&
@@ -470,6 +476,12 @@ void ReductionOp::print(OpAsmPrinter &p) {
   p << ", " << getVector();
   if (getAcc())
     p << ", " << getAcc();
+
+  if (getFastmathAttr() &&
+      getFastmathAttr().getValue() != arith::FastMathFlags::none) {
+    p << ' ' << getFastmathAttrName().getValue();
+    p.printStrippedAttrOrType(getFastmathAttr());
+  }
   p << " : " << getVector().getType() << " into " << getDest().getType();
 }
 
@@ -6052,7 +6064,7 @@ Value mlir::vector::selectPassthru(OpBuilder &builder, Value mask,
 //===----------------------------------------------------------------------===//
 
 #define GET_ATTRDEF_CLASSES
-#include "mlir/Dialect/Vector/IR/VectorOpsAttrDefs.cpp.inc"
+#include "mlir/Dialect/Vector/IR/VectorAttributes.cpp.inc"
 
 #define GET_OP_CLASSES
 #include "mlir/Dialect/Vector/IR/VectorOps.cpp.inc"
diff --git a/mlir/python/mlir/dialects/VectorOps.td b/mlir/python/mlir/dialects/VectorOps.td
index 69a1028c9be61c9..f659f754b66a7f2 100644
--- a/mlir/python/mlir/dialects/VectorOps.td
+++ b/mlir/python/mlir/dialects/VectorOps.td
@@ -1,4 +1,4 @@
-//===-- VectorOps.td - Entry point for VectorOps bind ------*- tablegen -*-===//
+//===-- Vector.td - Entry point for Vector bindings --------*- tablegen -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -6,9 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef PYTHON_BINDINGS_VECTOR_OPS
-#define PYTHON_BINDINGS_VECTOR_OPS
+#ifndef PYTHON_BINDINGS_VECTOR
+#define PYTHON_BINDINGS_VECTOR
 
-include "mlir/Dialect/Vector/IR/VectorOps.td"
+include "mlir/Dialect/Vector/IR/Vector.td"
 
-#endif
+#endif // PYTHON_BINDINGS_VECTOR