[llvm] 20964c9 - DAG: Fix dropping flags when widening unary vector ops

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 5 14:32:50 PDT 2023


Author: Matt Arsenault
Date: 2023-07-05T17:25:24-04:00
New Revision: 20964c901ae0ae6b47b1e5eb3f79bc885c4d3702

URL: https://github.com/llvm/llvm-project/commit/20964c901ae0ae6b47b1e5eb3f79bc885c4d3702
DIFF: https://github.com/llvm/llvm-project/commit/20964c901ae0ae6b47b1e5eb3f79bc885c4d3702.diff

LOG: DAG: Fix dropping flags when widening unary vector ops

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
    llvm/test/CodeGen/AMDGPU/llvm.exp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 6bff3658252fab..c201aea8004a65 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4902,7 +4902,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
   EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue InOp = GetWidenedVector(N->getOperand(0));
   if (N->getNumOperands() == 1)
-    return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp);
+    return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, N->getFlags());
 
   assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
   assert(N->isVPOpcode() && "Expected VP opcode");

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
index ec956fbb24d413..a4dbf34746f9f8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
@@ -7510,52 +7510,29 @@ define <3 x half> @v_exp_v3f16(<3 x half> %in) {
 }
 
 define <3 x half> @v_exp_v3f16_afn(<3 x half> %in) {
-; VI-SDAG-LABEL: v_exp_v3f16_afn:
-; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v0
-; VI-SDAG-NEXT:    v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; VI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3fb8aa3b, v2
-; VI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; VI-SDAG-NEXT:    v_exp_f32_e32 v2, v2
-; VI-SDAG-NEXT:    v_exp_f32_e32 v0, v0
-; VI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; VI-SDAG-NEXT:    v_exp_f32_e32 v1, v1
-; VI-SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; VI-SDAG-NEXT:    v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; VI-SDAG-NEXT:    v_or_b32_e32 v0, v2, v0
-; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_exp_v3f16_afn:
-; VI-GISEL:       ; %bb.0:
-; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x3dc5
-; VI-GISEL-NEXT:    v_mul_f16_e32 v2, 0x3dc5, v0
-; VI-GISEL-NEXT:    v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-GISEL-NEXT:    v_exp_f16_e32 v2, v2
-; VI-GISEL-NEXT:    v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-GISEL-NEXT:    v_mul_f16_e32 v1, 0x3dc5, v1
-; VI-GISEL-NEXT:    v_exp_f16_e32 v1, v1
-; VI-GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
-; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; VI-LABEL: v_exp_v3f16_afn:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v3, 0x3dc5
+; VI-NEXT:    v_mul_f16_e32 v2, 0x3dc5, v0
+; VI-NEXT:    v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT:    v_exp_f16_e32 v2, v2
+; VI-NEXT:    v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; VI-NEXT:    v_mul_f16_e32 v1, 0x3dc5, v1
+; VI-NEXT:    v_exp_f16_e32 v1, v1
+; VI-NEXT:    v_or_b32_e32 v0, v2, v0
+; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp_v3f16_afn:
 ; GFX900-SDAG:       ; %bb.0:
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v0
-; GFX900-SDAG-NEXT:    v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3fb8aa3b, v2
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GFX900-SDAG-NEXT:    v_exp_f32_e32 v2, v2
-; GFX900-SDAG-NEXT:    v_exp_f32_e32 v0, v0
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; GFX900-SDAG-NEXT:    v_exp_f32_e32 v1, v1
-; GFX900-SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; GFX900-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX900-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; GFX900-SDAG-NEXT:    s_movk_i32 s4, 0x3dc5
+; GFX900-SDAG-NEXT:    v_mul_f16_e32 v2, 0x3dc5, v0
+; GFX900-SDAG-NEXT:    v_mul_f16_sdwa v0, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-SDAG-NEXT:    v_exp_f16_e32 v2, v2
+; GFX900-SDAG-NEXT:    v_exp_f16_e32 v0, v0
+; GFX900-SDAG-NEXT:    v_mul_f16_e32 v1, 0x3dc5, v1
+; GFX900-SDAG-NEXT:    v_exp_f16_e32 v1, v1
 ; GFX900-SDAG-NEXT:    v_pack_b32_f16 v0, v2, v0
 ; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;


        


More information about the llvm-commits mailing list