[llvm] 22a2d74 - [NVPTX] Emit ld.v4.b16 for loading <4 x bfloat> (#109069)

via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 17 21:06:50 PDT 2024


Author: Ahmed S. Taei
Date: 2024-09-17T21:06:46-07:00
New Revision: 22a2d74c0c6dbde6b3503ec51486d6cf5d0c83f1

URL: https://github.com/llvm/llvm-project/commit/22a2d74c0c6dbde6b3503ec51486d6cf5d0c83f1
DIFF: https://github.com/llvm/llvm-project/commit/22a2d74c0c6dbde6b3503ec51486d6cf5d0c83f1.diff

LOG: [NVPTX] Emit ld.v4.b16 for loading <4 x bfloat> (#109069)

This PR enables emitting a single load instruction for <4 x bfloat>,
otherwise, 2 ld.b32 loads are generated.

Added: 
    

Modified: 
    llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
    llvm/test/CodeGen/NVPTX/vector-loads.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index c5a40e4308860c..31a5e937adae96 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -6179,6 +6179,7 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
   case MVT::v4i16:
   case MVT::v4i32:
   case MVT::v4f16:
+  case MVT::v4bf16:
   case MVT::v4f32:
   case MVT::v8f16:  // <4 x f16x2>
   case MVT::v8bf16: // <4 x bf16x2>

diff  --git a/llvm/test/CodeGen/NVPTX/vector-loads.ll b/llvm/test/CodeGen/NVPTX/vector-loads.ll
index 9322b9e0fe6c82..f582ebc166dd0d 100644
--- a/llvm/test/CodeGen/NVPTX/vector-loads.ll
+++ b/llvm/test/CodeGen/NVPTX/vector-loads.ll
@@ -198,3 +198,12 @@ define void @extv8f16_generic_a4(ptr noalias readonly align 16 %dst, ptr noalias
 
 
 !1 = !{i32 0, i32 64}
+
+; CHECK-LABEL: bf16_v4_align_load_store
+define dso_local void @bf16_v4_align_load_store(ptr noundef %0, ptr noundef %1) #0 {
+  ; CHECK: ld.v4.b16
+  ; CHECK: st.v4.b16
+  %3 = load <4 x bfloat>, ptr %1, align 8
+  store <4 x bfloat> %3, ptr %0, align 8
+  ret void
+}


        


More information about the llvm-commits mailing list