[llvm] Handle VECREDUCE intrinsics in NVPTX backend (PR #136253)
Princeton Ferro via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 10 23:16:34 PDT 2025
Prince781 wrote:
Proposed use:
```llvm
; default (shuffle reduction for f16x2 and f32x2)
define float @reduce_fadd_reassoc(<16 x float> %in) {
%res = call reassoc float @llvm.vector.reduce.fadd(float 0.0, <16 x float> %in)
ret float %res
}
; default (tree reduction on SM100 for fmin3; shuffle reduction otherwise)
define float @reduce_fmin_reassoc(<16 x float> %in) {
%res = call reassoc float @llvm.vector.reduce.fmin(float 0.0, <16 x float> %in)
ret float %res
}
; force tree for reassoc float
define float @reduce_fadd_reassoc(<16 x float> %in) {
%res = call reassoc float @llvm.vector.reduce.fadd(float 0.0, <16 x float> %in) !reduce.tree
ret float %res
}
; force shuffle
define float @reduce_fmin_reassoc(<16 x float> %in) {
%res = call reassoc float @llvm.vector.reduce.fmin(float 0.0, <16 x float> %in) !reduce.shuffle
ret float %res
}
; force sequential
define i32 @reduce_umin(<16 x i32> %in) {
%res = call i32 @llvm.vector.reduce.umin(i32 0, <16 x i32> %in) !reduce.sequential
ret i32 %res
}
```
https://github.com/llvm/llvm-project/pull/136253
More information about the llvm-commits
mailing list