[llvm] Handle VECREDUCE intrinsics in NVPTX backend (PR #136253)

Tue Jun 10 23:16:34 PDT 2025

Prince781 wrote:

Proposed use:

```llvm
; default (shuffle reduction for f16x2 and f32x2)
define float @reduce_fadd_reassoc(<16 x float> %in) {
  %res = call reassoc float @llvm.vector.reduce.fadd(float 0.0, <16 x float> %in)
  ret float %res
}

; default (tree reduction on SM100 for fmin3; shuffle reduction otherwise)
define float @reduce_fmin_reassoc(<16 x float> %in) {
  %res = call reassoc float @llvm.vector.reduce.fmin(float 0.0, <16 x float> %in)
  ret float %res
}

; force tree for reassoc float
define float @reduce_fadd_reassoc(<16 x float> %in) {
  %res = call reassoc float @llvm.vector.reduce.fadd(float 0.0, <16 x float> %in) !reduce.tree
  ret float %res
}

; force shuffle
define float @reduce_fmin_reassoc(<16 x float> %in) {
  %res = call reassoc float @llvm.vector.reduce.fmin(float 0.0, <16 x float> %in) !reduce.shuffle
  ret float %res
}

; force sequential
define i32 @reduce_umin(<16 x i32> %in) {
  %res = call i32 @llvm.vector.reduce.umin(i32 0, <16 x i32> %in) !reduce.sequential
  ret i32 %res
}

```

https://github.com/llvm/llvm-project/pull/136253