[llvm] 6e34e71 - [AMDGPU] Enable divergence driven ISel for ADD/SUB i64

via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 20 07:06:31 PDT 2020


Author: alex-t
Date: 2020-03-20T17:06:11+03:00
New Revision: 6e34e71869ab57ee33cb361426789ee85e1cde87

URL: https://github.com/llvm/llvm-project/commit/6e34e71869ab57ee33cb361426789ee85e1cde87
DIFF: https://github.com/llvm/llvm-project/commit/6e34e71869ab57ee33cb361426789ee85e1cde87.diff

LOG:  [AMDGPU] Enable divergence driven ISel for ADD/SUB i64

Summary:
Currently we custom select add/sub with carry out to scalar form relying on later replacing them to vector form if necessary.
This change enables custom selection code to take the divergence of adde/addc SDNodes into account and select the appropriate form in one step.

Reviewers: arsenm, vpykhtin, rampitec

Reviewed By: arsenm, vpykhtin

Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa

Differential Revision: https://reviews.llvm.org/D76371

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
    llvm/test/CodeGen/AMDGPU/bypass-div.ll
    llvm/test/CodeGen/AMDGPU/sdiv64.ll
    llvm/test/CodeGen/AMDGPU/srem64.ll
    llvm/test/CodeGen/AMDGPU/udiv64.ll
    llvm/test/CodeGen/AMDGPU/urem64.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 507c14a63d9e..269434d31e21 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1017,8 +1017,14 @@ void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
 
   SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
 
-  unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
-  unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
+  static const unsigned OpcMap[2][2][2] = {
+      {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
+       {AMDGPU::V_SUB_I32_e32, AMDGPU::V_ADD_I32_e32}},
+      {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
+       {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
+
+  unsigned Opc = OpcMap[0][N->isDivergent()][IsAdd];
+  unsigned CarryOpc = OpcMap[1][N->isDivergent()][IsAdd];
 
   SDNode *AddLo;
   if (!ConsumeCarry) {

diff  --git a/llvm/test/CodeGen/AMDGPU/bypass-div.ll b/llvm/test/CodeGen/AMDGPU/bypass-div.ll
index 299ae9083703..5cc320a3658b 100644
--- a/llvm/test/CodeGen/AMDGPU/bypass-div.ll
+++ b/llvm/test/CodeGen/AMDGPU/bypass-div.ll
@@ -48,7 +48,7 @@ define i64 @sdiv64(i64 %a, i64 %b) {
 ; GFX9-NEXT:    v_mul_lo_u32 v14, v7, v13
 ; GFX9-NEXT:    v_addc_co_u32_e32 v11, vcc, v16, v11, vcc
 ; GFX9-NEXT:    v_mul_hi_u32 v13, v7, v13
-; GFX9-NEXT:    v_add_co_u32_e32 v12, vcc, v14, v12
+; GFX9-NEXT:    v_add_co_u32_e32 v12, vcc, v12, v14
 ; GFX9-NEXT:    v_mul_hi_u32 v12, v7, v10
 ; GFX9-NEXT:    v_mul_lo_u32 v10, v7, v10
 ; GFX9-NEXT:    v_addc_co_u32_e32 v11, vcc, v11, v13, vcc
@@ -70,7 +70,7 @@ define i64 @sdiv64(i64 %a, i64 %b) {
 ; GFX9-NEXT:    v_mul_hi_u32 v13, v10, v8
 ; GFX9-NEXT:    v_mul_lo_u32 v8, v10, v8
 ; GFX9-NEXT:    v_addc_co_u32_e32 v14, vcc, v16, v14, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v8, vcc, v8, v12
+; GFX9-NEXT:    v_add_co_u32_e32 v8, vcc, v12, v8
 ; GFX9-NEXT:    v_mul_hi_u32 v8, v10, v9
 ; GFX9-NEXT:    v_mul_lo_u32 v9, v10, v9
 ; GFX9-NEXT:    v_addc_co_u32_e32 v12, vcc, v14, v13, vcc
@@ -94,7 +94,7 @@ define i64 @sdiv64(i64 %a, i64 %b) {
 ; GFX9-NEXT:    v_mul_hi_u32 v6, v1, v6
 ; GFX9-NEXT:    v_mul_hi_u32 v13, v1, v7
 ; GFX9-NEXT:    v_mul_lo_u32 v7, v1, v7
-; GFX9-NEXT:    v_add_co_u32_e32 v10, vcc, v12, v10
+; GFX9-NEXT:    v_add_co_u32_e32 v10, vcc, v10, v12
 ; GFX9-NEXT:    v_addc_co_u32_e32 v6, vcc, v11, v6, vcc
 ; GFX9-NEXT:    v_addc_co_u32_e32 v10, vcc, v13, v15, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v6, vcc, v6, v7
@@ -215,7 +215,7 @@ define i64 @udiv64(i64 %a, i64 %b) {
 ; GFX9-NEXT:    v_addc_co_u32_e32 v10, vcc, v13, v14, vcc
 ; GFX9-NEXT:    v_mul_lo_u32 v14, v5, v11
 ; GFX9-NEXT:    v_mul_hi_u32 v11, v5, v11
-; GFX9-NEXT:    v_add_co_u32_e32 v9, vcc, v14, v9
+; GFX9-NEXT:    v_add_co_u32_e32 v9, vcc, v9, v14
 ; GFX9-NEXT:    v_addc_co_u32_e32 v9, vcc, v10, v11, vcc
 ; GFX9-NEXT:    v_addc_co_u32_e32 v10, vcc, v15, v12, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v8, vcc, v9, v8
@@ -237,7 +237,7 @@ define i64 @udiv64(i64 %a, i64 %b) {
 ; GFX9-NEXT:    v_mul_hi_u32 v11, v8, v6
 ; GFX9-NEXT:    v_mul_lo_u32 v6, v8, v6
 ; GFX9-NEXT:    v_addc_co_u32_e32 v15, vcc, v13, v15, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v6, vcc, v6, v10
+; GFX9-NEXT:    v_add_co_u32_e32 v6, vcc, v10, v6
 ; GFX9-NEXT:    v_addc_co_u32_e32 v6, vcc, v15, v11, vcc
 ; GFX9-NEXT:    v_addc_co_u32_e32 v8, vcc, v14, v12, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v6, vcc, v6, v7
@@ -254,7 +254,7 @@ define i64 @udiv64(i64 %a, i64 %b) {
 ; GFX9-NEXT:    v_addc_co_u32_e32 v7, vcc, v13, v8, vcc
 ; GFX9-NEXT:    v_mul_lo_u32 v8, v1, v4
 ; GFX9-NEXT:    v_mul_hi_u32 v4, v1, v4
-; GFX9-NEXT:    v_add_co_u32_e32 v6, vcc, v8, v6
+; GFX9-NEXT:    v_add_co_u32_e32 v6, vcc, v6, v8
 ; GFX9-NEXT:    v_addc_co_u32_e32 v4, vcc, v7, v4, vcc
 ; GFX9-NEXT:    v_addc_co_u32_e32 v6, vcc, v9, v12, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, v4, v5
@@ -376,7 +376,7 @@ define i64 @srem64(i64 %a, i64 %b) {
 ; GFX9-NEXT:    v_mul_lo_u32 v13, v6, v12
 ; GFX9-NEXT:    v_mul_hi_u32 v12, v6, v12
 ; GFX9-NEXT:    v_addc_co_u32_e32 v10, vcc, v15, v10, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v11, vcc, v13, v11
+; GFX9-NEXT:    v_add_co_u32_e32 v11, vcc, v11, v13
 ; GFX9-NEXT:    v_addc_co_u32_e32 v10, vcc, v10, v12, vcc
 ; GFX9-NEXT:    v_addc_co_u32_e32 v11, vcc, v16, v14, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v9, vcc, v10, v9
@@ -398,7 +398,7 @@ define i64 @srem64(i64 %a, i64 %b) {
 ; GFX9-NEXT:    v_mul_hi_u32 v12, v9, v7
 ; GFX9-NEXT:    v_mul_lo_u32 v7, v9, v7
 ; GFX9-NEXT:    v_addc_co_u32_e32 v16, vcc, v15, v16, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v7, vcc, v7, v11
+; GFX9-NEXT:    v_add_co_u32_e32 v7, vcc, v11, v7
 ; GFX9-NEXT:    v_addc_co_u32_e32 v7, vcc, v16, v12, vcc
 ; GFX9-NEXT:    v_addc_co_u32_e32 v9, vcc, v13, v14, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v7, vcc, v7, v8
@@ -420,7 +420,7 @@ define i64 @srem64(i64 %a, i64 %b) {
 ; GFX9-NEXT:    v_mul_hi_u32 v4, v1, v4
 ; GFX9-NEXT:    v_mul_hi_u32 v12, v1, v6
 ; GFX9-NEXT:    v_mul_lo_u32 v6, v1, v6
-; GFX9-NEXT:    v_add_co_u32_e32 v9, vcc, v11, v9
+; GFX9-NEXT:    v_add_co_u32_e32 v9, vcc, v9, v11
 ; GFX9-NEXT:    v_addc_co_u32_e32 v4, vcc, v10, v4, vcc
 ; GFX9-NEXT:    v_addc_co_u32_e32 v9, vcc, v12, v14, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, v4, v6
@@ -539,7 +539,7 @@ define i64 @urem64(i64 %a, i64 %b) {
 ; GFX9-NEXT:    v_addc_co_u32_e32 v10, vcc, v13, v14, vcc
 ; GFX9-NEXT:    v_mul_lo_u32 v14, v5, v11
 ; GFX9-NEXT:    v_mul_hi_u32 v11, v5, v11
-; GFX9-NEXT:    v_add_co_u32_e32 v9, vcc, v14, v9
+; GFX9-NEXT:    v_add_co_u32_e32 v9, vcc, v9, v14
 ; GFX9-NEXT:    v_addc_co_u32_e32 v9, vcc, v10, v11, vcc
 ; GFX9-NEXT:    v_addc_co_u32_e32 v10, vcc, v15, v12, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v8, vcc, v9, v8
@@ -561,7 +561,7 @@ define i64 @urem64(i64 %a, i64 %b) {
 ; GFX9-NEXT:    v_mul_hi_u32 v11, v8, v6
 ; GFX9-NEXT:    v_mul_lo_u32 v6, v8, v6
 ; GFX9-NEXT:    v_addc_co_u32_e32 v15, vcc, v13, v15, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v6, vcc, v6, v10
+; GFX9-NEXT:    v_add_co_u32_e32 v6, vcc, v10, v6
 ; GFX9-NEXT:    v_addc_co_u32_e32 v6, vcc, v15, v11, vcc
 ; GFX9-NEXT:    v_addc_co_u32_e32 v8, vcc, v14, v12, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v6, vcc, v6, v7
@@ -578,7 +578,7 @@ define i64 @urem64(i64 %a, i64 %b) {
 ; GFX9-NEXT:    v_addc_co_u32_e32 v7, vcc, v13, v8, vcc
 ; GFX9-NEXT:    v_mul_lo_u32 v8, v1, v4
 ; GFX9-NEXT:    v_mul_hi_u32 v4, v1, v4
-; GFX9-NEXT:    v_add_co_u32_e32 v6, vcc, v8, v6
+; GFX9-NEXT:    v_add_co_u32_e32 v6, vcc, v6, v8
 ; GFX9-NEXT:    v_addc_co_u32_e32 v4, vcc, v7, v4, vcc
 ; GFX9-NEXT:    v_addc_co_u32_e32 v6, vcc, v9, v12, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, v4, v5
@@ -843,7 +843,7 @@ define <2 x i64> @sdivrem64(i64 %a, i64 %b) {
 ; GFX9-NEXT:    v_mul_lo_u32 v14, v7, v13
 ; GFX9-NEXT:    v_addc_co_u32_e32 v11, vcc, v16, v11, vcc
 ; GFX9-NEXT:    v_mul_hi_u32 v13, v7, v13
-; GFX9-NEXT:    v_add_co_u32_e32 v12, vcc, v14, v12
+; GFX9-NEXT:    v_add_co_u32_e32 v12, vcc, v12, v14
 ; GFX9-NEXT:    v_mul_hi_u32 v12, v7, v10
 ; GFX9-NEXT:    v_mul_lo_u32 v10, v7, v10
 ; GFX9-NEXT:    v_addc_co_u32_e32 v11, vcc, v11, v13, vcc
@@ -865,7 +865,7 @@ define <2 x i64> @sdivrem64(i64 %a, i64 %b) {
 ; GFX9-NEXT:    v_mul_hi_u32 v13, v10, v8
 ; GFX9-NEXT:    v_mul_lo_u32 v8, v10, v8
 ; GFX9-NEXT:    v_addc_co_u32_e32 v14, vcc, v16, v14, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v8, vcc, v8, v12
+; GFX9-NEXT:    v_add_co_u32_e32 v8, vcc, v12, v8
 ; GFX9-NEXT:    v_mul_hi_u32 v8, v10, v9
 ; GFX9-NEXT:    v_mul_lo_u32 v9, v10, v9
 ; GFX9-NEXT:    v_addc_co_u32_e32 v12, vcc, v14, v13, vcc
@@ -889,7 +889,7 @@ define <2 x i64> @sdivrem64(i64 %a, i64 %b) {
 ; GFX9-NEXT:    v_mul_hi_u32 v4, v1, v4
 ; GFX9-NEXT:    v_mul_hi_u32 v13, v1, v7
 ; GFX9-NEXT:    v_mul_lo_u32 v7, v1, v7
-; GFX9-NEXT:    v_add_co_u32_e32 v10, vcc, v12, v10
+; GFX9-NEXT:    v_add_co_u32_e32 v10, vcc, v10, v12
 ; GFX9-NEXT:    v_addc_co_u32_e32 v4, vcc, v11, v4, vcc
 ; GFX9-NEXT:    v_addc_co_u32_e32 v10, vcc, v13, v15, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, v4, v7
@@ -1032,7 +1032,7 @@ define <2 x i64> @udivrem64(i64 %a, i64 %b) {
 ; GFX9-NEXT:    v_addc_co_u32_e32 v10, vcc, v13, v14, vcc
 ; GFX9-NEXT:    v_mul_lo_u32 v14, v5, v11
 ; GFX9-NEXT:    v_mul_hi_u32 v11, v5, v11
-; GFX9-NEXT:    v_add_co_u32_e32 v9, vcc, v14, v9
+; GFX9-NEXT:    v_add_co_u32_e32 v9, vcc, v9, v14
 ; GFX9-NEXT:    v_addc_co_u32_e32 v9, vcc, v10, v11, vcc
 ; GFX9-NEXT:    v_addc_co_u32_e32 v10, vcc, v15, v12, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v8, vcc, v9, v8
@@ -1054,7 +1054,7 @@ define <2 x i64> @udivrem64(i64 %a, i64 %b) {
 ; GFX9-NEXT:    v_mul_hi_u32 v11, v8, v6
 ; GFX9-NEXT:    v_mul_lo_u32 v6, v8, v6
 ; GFX9-NEXT:    v_addc_co_u32_e32 v15, vcc, v13, v15, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v6, vcc, v6, v10
+; GFX9-NEXT:    v_add_co_u32_e32 v6, vcc, v10, v6
 ; GFX9-NEXT:    v_addc_co_u32_e32 v6, vcc, v15, v11, vcc
 ; GFX9-NEXT:    v_addc_co_u32_e32 v8, vcc, v14, v12, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v6, vcc, v6, v7
@@ -1071,7 +1071,7 @@ define <2 x i64> @udivrem64(i64 %a, i64 %b) {
 ; GFX9-NEXT:    v_addc_co_u32_e32 v7, vcc, v13, v8, vcc
 ; GFX9-NEXT:    v_mul_lo_u32 v8, v1, v4
 ; GFX9-NEXT:    v_mul_hi_u32 v4, v1, v4
-; GFX9-NEXT:    v_add_co_u32_e32 v6, vcc, v8, v6
+; GFX9-NEXT:    v_add_co_u32_e32 v6, vcc, v6, v8
 ; GFX9-NEXT:    v_addc_co_u32_e32 v4, vcc, v7, v4, vcc
 ; GFX9-NEXT:    v_addc_co_u32_e32 v6, vcc, v9, v12, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, v4, v5

diff  --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll b/llvm/test/CodeGen/AMDGPU/sdiv64.ll
index f37dfbd92e25..b066cebe5486 100644
--- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll
@@ -293,7 +293,7 @@ define i64 @v_test_sdiv(i64 %x, i64 %y) {
 ; GCN-NEXT:    v_mul_lo_u32 v13, v6, v10
 ; GCN-NEXT:    v_mul_hi_u32 v10, v6, v10
 ; GCN-NEXT:    v_addc_u32_e32 v11, vcc, v15, v11, vcc
-; GCN-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; GCN-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
 ; GCN-NEXT:    v_addc_u32_e32 v10, vcc, v11, v10, vcc
 ; GCN-NEXT:    v_addc_u32_e32 v11, vcc, v16, v14, vcc
 ; GCN-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
@@ -315,7 +315,7 @@ define i64 @v_test_sdiv(i64 %x, i64 %y) {
 ; GCN-NEXT:    v_mul_hi_u32 v11, v9, v8
 ; GCN-NEXT:    v_addc_u32_e32 v16, vcc, v15, v17, vcc
 ; GCN-NEXT:    v_mul_lo_u32 v8, v9, v8
-; GCN-NEXT:    v_add_i32_e32 v7, vcc, v7, v13
+; GCN-NEXT:    v_add_i32_e32 v7, vcc, v13, v7
 ; GCN-NEXT:    v_addc_u32_e32 v7, vcc, v16, v12, vcc
 ; GCN-NEXT:    v_addc_u32_e32 v9, vcc, v11, v14, vcc
 ; GCN-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
@@ -338,7 +338,7 @@ define i64 @v_test_sdiv(i64 %x, i64 %y) {
 ; GCN-NEXT:    v_mul_hi_u32 v5, v1, v5
 ; GCN-NEXT:    v_mul_hi_u32 v11, v1, v6
 ; GCN-NEXT:    v_mul_lo_u32 v6, v1, v6
-; GCN-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
+; GCN-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
 ; GCN-NEXT:    v_addc_u32_e32 v5, vcc, v9, v5, vcc
 ; GCN-NEXT:    v_addc_u32_e32 v8, vcc, v11, v14, vcc
 ; GCN-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
@@ -1388,7 +1388,7 @@ define i64 @v_test_sdiv_k_num_i64(i64 %x) {
 ; GCN-NEXT:    v_mul_lo_u32 v11, v4, v8
 ; GCN-NEXT:    v_mul_hi_u32 v8, v4, v8
 ; GCN-NEXT:    v_addc_u32_e32 v9, vcc, v13, v9, vcc
-; GCN-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; GCN-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
 ; GCN-NEXT:    v_addc_u32_e32 v8, vcc, v9, v8, vcc
 ; GCN-NEXT:    v_addc_u32_e32 v9, vcc, v14, v12, vcc
 ; GCN-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
@@ -1410,7 +1410,7 @@ define i64 @v_test_sdiv_k_num_i64(i64 %x) {
 ; GCN-NEXT:    v_mul_hi_u32 v9, v7, v6
 ; GCN-NEXT:    v_addc_u32_e32 v14, vcc, v13, v15, vcc
 ; GCN-NEXT:    v_mul_lo_u32 v6, v7, v6
-; GCN-NEXT:    v_add_i32_e32 v5, vcc, v5, v11
+; GCN-NEXT:    v_add_i32_e32 v5, vcc, v11, v5
 ; GCN-NEXT:    v_addc_u32_e32 v5, vcc, v14, v10, vcc
 ; GCN-NEXT:    v_addc_u32_e32 v7, vcc, v9, v12, vcc
 ; GCN-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
@@ -1600,7 +1600,7 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) {
 ; GCN-NEXT:    v_mul_lo_u32 v11, v4, v8
 ; GCN-NEXT:    v_mul_hi_u32 v8, v4, v8
 ; GCN-NEXT:    v_addc_u32_e32 v9, vcc, v13, v9, vcc
-; GCN-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; GCN-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
 ; GCN-NEXT:    v_addc_u32_e32 v8, vcc, v9, v8, vcc
 ; GCN-NEXT:    v_addc_u32_e32 v9, vcc, v14, v12, vcc
 ; GCN-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
@@ -1622,7 +1622,7 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) {
 ; GCN-NEXT:    v_mul_hi_u32 v9, v7, v6
 ; GCN-NEXT:    v_addc_u32_e32 v14, vcc, v13, v15, vcc
 ; GCN-NEXT:    v_mul_lo_u32 v6, v7, v6
-; GCN-NEXT:    v_add_i32_e32 v5, vcc, v5, v11
+; GCN-NEXT:    v_add_i32_e32 v5, vcc, v11, v5
 ; GCN-NEXT:    v_addc_u32_e32 v5, vcc, v14, v10, vcc
 ; GCN-NEXT:    v_addc_u32_e32 v7, vcc, v9, v12, vcc
 ; GCN-NEXT:    v_add_i32_e32 v5, vcc, v5, v6

diff  --git a/llvm/test/CodeGen/AMDGPU/srem64.ll b/llvm/test/CodeGen/AMDGPU/srem64.ll
index 96145858a303..97a6c3757b0b 100644
--- a/llvm/test/CodeGen/AMDGPU/srem64.ll
+++ b/llvm/test/CodeGen/AMDGPU/srem64.ll
@@ -271,7 +271,7 @@ define i64 @v_test_srem(i64 %x, i64 %y) {
 ; GCN-NEXT:    v_mul_lo_u32 v12, v5, v9
 ; GCN-NEXT:    v_mul_hi_u32 v9, v5, v9
 ; GCN-NEXT:    v_addc_u32_e32 v10, vcc, v14, v10, vcc
-; GCN-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; GCN-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
 ; GCN-NEXT:    v_addc_u32_e32 v9, vcc, v10, v9, vcc
 ; GCN-NEXT:    v_addc_u32_e32 v10, vcc, v15, v13, vcc
 ; GCN-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
@@ -293,7 +293,7 @@ define i64 @v_test_srem(i64 %x, i64 %y) {
 ; GCN-NEXT:    v_mul_hi_u32 v10, v8, v7
 ; GCN-NEXT:    v_addc_u32_e32 v15, vcc, v14, v16, vcc
 ; GCN-NEXT:    v_mul_lo_u32 v7, v8, v7
-; GCN-NEXT:    v_add_i32_e32 v6, vcc, v6, v12
+; GCN-NEXT:    v_add_i32_e32 v6, vcc, v12, v6
 ; GCN-NEXT:    v_addc_u32_e32 v6, vcc, v15, v11, vcc
 ; GCN-NEXT:    v_addc_u32_e32 v8, vcc, v10, v13, vcc
 ; GCN-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
@@ -316,7 +316,7 @@ define i64 @v_test_srem(i64 %x, i64 %y) {
 ; GCN-NEXT:    v_mul_hi_u32 v4, v1, v4
 ; GCN-NEXT:    v_mul_hi_u32 v10, v1, v5
 ; GCN-NEXT:    v_mul_lo_u32 v5, v1, v5
-; GCN-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
+; GCN-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
 ; GCN-NEXT:    v_addc_u32_e32 v4, vcc, v8, v4, vcc
 ; GCN-NEXT:    v_addc_u32_e32 v7, vcc, v10, v13, vcc
 ; GCN-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
@@ -1572,7 +1572,7 @@ define i64 @v_test_srem_k_num_i64(i64 %x) {
 ; GCN-NEXT:    v_mul_lo_u32 v10, v3, v7
 ; GCN-NEXT:    v_mul_hi_u32 v7, v3, v7
 ; GCN-NEXT:    v_addc_u32_e32 v8, vcc, v12, v8, vcc
-; GCN-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
+; GCN-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
 ; GCN-NEXT:    v_addc_u32_e32 v7, vcc, v8, v7, vcc
 ; GCN-NEXT:    v_addc_u32_e32 v8, vcc, v13, v11, vcc
 ; GCN-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
@@ -1594,7 +1594,7 @@ define i64 @v_test_srem_k_num_i64(i64 %x) {
 ; GCN-NEXT:    v_mul_hi_u32 v8, v6, v5
 ; GCN-NEXT:    v_addc_u32_e32 v13, vcc, v12, v14, vcc
 ; GCN-NEXT:    v_mul_lo_u32 v5, v6, v5
-; GCN-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
+; GCN-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
 ; GCN-NEXT:    v_addc_u32_e32 v4, vcc, v13, v9, vcc
 ; GCN-NEXT:    v_addc_u32_e32 v6, vcc, v8, v11, vcc
 ; GCN-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
@@ -1782,7 +1782,7 @@ define i64 @v_test_srem_pow2_k_num_i64(i64 %x) {
 ; GCN-NEXT:    v_mul_lo_u32 v10, v3, v7
 ; GCN-NEXT:    v_mul_hi_u32 v7, v3, v7
 ; GCN-NEXT:    v_addc_u32_e32 v8, vcc, v12, v8, vcc
-; GCN-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
+; GCN-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
 ; GCN-NEXT:    v_addc_u32_e32 v7, vcc, v8, v7, vcc
 ; GCN-NEXT:    v_addc_u32_e32 v8, vcc, v13, v11, vcc
 ; GCN-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
@@ -1804,7 +1804,7 @@ define i64 @v_test_srem_pow2_k_num_i64(i64 %x) {
 ; GCN-NEXT:    v_mul_hi_u32 v8, v6, v5
 ; GCN-NEXT:    v_addc_u32_e32 v13, vcc, v12, v14, vcc
 ; GCN-NEXT:    v_mul_lo_u32 v5, v6, v5
-; GCN-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
+; GCN-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
 ; GCN-NEXT:    v_addc_u32_e32 v4, vcc, v13, v9, vcc
 ; GCN-NEXT:    v_addc_u32_e32 v6, vcc, v8, v11, vcc
 ; GCN-NEXT:    v_add_i32_e32 v4, vcc, v4, v5

diff  --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll
index 86b4a39057c3..7a4065eeac46 100644
--- a/llvm/test/CodeGen/AMDGPU/udiv64.ll
+++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll
@@ -256,7 +256,7 @@ define i64 @v_test_udiv_i64(i64 %x, i64 %y) {
 ; GCN-NEXT:    v_mul_lo_u32 v12, v5, v9
 ; GCN-NEXT:    v_mul_hi_u32 v9, v5, v9
 ; GCN-NEXT:    v_addc_u32_e32 v10, vcc, v14, v10, vcc
-; GCN-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; GCN-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
 ; GCN-NEXT:    v_addc_u32_e32 v9, vcc, v10, v9, vcc
 ; GCN-NEXT:    v_addc_u32_e32 v10, vcc, v15, v13, vcc
 ; GCN-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
@@ -278,7 +278,7 @@ define i64 @v_test_udiv_i64(i64 %x, i64 %y) {
 ; GCN-NEXT:    v_mul_hi_u32 v10, v8, v7
 ; GCN-NEXT:    v_addc_u32_e32 v15, vcc, v14, v16, vcc
 ; GCN-NEXT:    v_mul_lo_u32 v7, v8, v7
-; GCN-NEXT:    v_add_i32_e32 v6, vcc, v6, v12
+; GCN-NEXT:    v_add_i32_e32 v6, vcc, v12, v6
 ; GCN-NEXT:    v_addc_u32_e32 v6, vcc, v15, v11, vcc
 ; GCN-NEXT:    v_addc_u32_e32 v8, vcc, v10, v13, vcc
 ; GCN-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
@@ -296,7 +296,7 @@ define i64 @v_test_udiv_i64(i64 %x, i64 %y) {
 ; GCN-NEXT:    v_addc_u32_e32 v7, vcc, v14, v8, vcc
 ; GCN-NEXT:    v_mul_lo_u32 v8, v1, v4
 ; GCN-NEXT:    v_mul_hi_u32 v4, v1, v4
-; GCN-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; GCN-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
 ; GCN-NEXT:    v_addc_u32_e32 v4, vcc, v7, v4, vcc
 ; GCN-NEXT:    v_addc_u32_e32 v6, vcc, v9, v13, vcc
 ; GCN-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
@@ -1174,7 +1174,7 @@ define i64 @v_test_udiv_pow2_k_num_i64(i64 %x) {
 ; GCN-NEXT:    v_mul_lo_u32 v10, v3, v9
 ; GCN-NEXT:    v_mul_hi_u32 v9, v3, v9
 ; GCN-NEXT:    v_addc_u32_e32 v7, vcc, v12, v7, vcc
-; GCN-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
+; GCN-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
 ; GCN-NEXT:    v_addc_u32_e32 v7, vcc, v7, v9, vcc
 ; GCN-NEXT:    v_addc_u32_e32 v8, vcc, v13, v11, vcc
 ; GCN-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
@@ -1196,7 +1196,7 @@ define i64 @v_test_udiv_pow2_k_num_i64(i64 %x) {
 ; GCN-NEXT:    v_mul_hi_u32 v8, v6, v5
 ; GCN-NEXT:    v_addc_u32_e32 v13, vcc, v12, v14, vcc
 ; GCN-NEXT:    v_mul_lo_u32 v5, v6, v5
-; GCN-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
+; GCN-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
 ; GCN-NEXT:    v_addc_u32_e32 v4, vcc, v13, v9, vcc
 ; GCN-NEXT:    v_addc_u32_e32 v6, vcc, v8, v11, vcc
 ; GCN-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
@@ -1682,7 +1682,7 @@ define i64 @v_test_udiv_k_den_i64(i64 %x) {
 ; GCN-NEXT:    v_addc_u32_e32 v5, vcc, v10, v6, vcc
 ; GCN-NEXT:    v_mul_lo_u32 v6, v1, v2
 ; GCN-NEXT:    v_mul_hi_u32 v2, v1, v2
-; GCN-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; GCN-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
 ; GCN-NEXT:    v_addc_u32_e32 v2, vcc, v5, v2, vcc
 ; GCN-NEXT:    v_addc_u32_e32 v4, vcc, v7, v9, vcc
 ; GCN-NEXT:    v_add_i32_e32 v2, vcc, v2, v3

diff  --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll
index 9df153381d83..9a6f7002ca87 100644
--- a/llvm/test/CodeGen/AMDGPU/urem64.ll
+++ b/llvm/test/CodeGen/AMDGPU/urem64.ll
@@ -266,7 +266,7 @@ define i64 @v_test_urem_i64(i64 %x, i64 %y) {
 ; GCN-NEXT:    v_mul_lo_u32 v12, v5, v9
 ; GCN-NEXT:    v_mul_hi_u32 v9, v5, v9
 ; GCN-NEXT:    v_addc_u32_e32 v10, vcc, v14, v10, vcc
-; GCN-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; GCN-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
 ; GCN-NEXT:    v_addc_u32_e32 v9, vcc, v10, v9, vcc
 ; GCN-NEXT:    v_addc_u32_e32 v10, vcc, v15, v13, vcc
 ; GCN-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
@@ -288,7 +288,7 @@ define i64 @v_test_urem_i64(i64 %x, i64 %y) {
 ; GCN-NEXT:    v_mul_hi_u32 v10, v8, v7
 ; GCN-NEXT:    v_addc_u32_e32 v15, vcc, v14, v16, vcc
 ; GCN-NEXT:    v_mul_lo_u32 v7, v8, v7
-; GCN-NEXT:    v_add_i32_e32 v6, vcc, v6, v12
+; GCN-NEXT:    v_add_i32_e32 v6, vcc, v12, v6
 ; GCN-NEXT:    v_addc_u32_e32 v6, vcc, v15, v11, vcc
 ; GCN-NEXT:    v_addc_u32_e32 v8, vcc, v10, v13, vcc
 ; GCN-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
@@ -306,7 +306,7 @@ define i64 @v_test_urem_i64(i64 %x, i64 %y) {
 ; GCN-NEXT:    v_addc_u32_e32 v7, vcc, v14, v8, vcc
 ; GCN-NEXT:    v_mul_lo_u32 v8, v1, v4
 ; GCN-NEXT:    v_mul_hi_u32 v4, v1, v4
-; GCN-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; GCN-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
 ; GCN-NEXT:    v_addc_u32_e32 v4, vcc, v7, v4, vcc
 ; GCN-NEXT:    v_addc_u32_e32 v6, vcc, v9, v13, vcc
 ; GCN-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
@@ -1191,7 +1191,7 @@ define i64 @v_test_urem_pow2_k_num_i64(i64 %x) {
 ; GCN-NEXT:    v_mul_lo_u32 v10, v3, v9
 ; GCN-NEXT:    v_mul_hi_u32 v9, v3, v9
 ; GCN-NEXT:    v_addc_u32_e32 v7, vcc, v12, v7, vcc
-; GCN-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
+; GCN-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
 ; GCN-NEXT:    v_addc_u32_e32 v7, vcc, v7, v9, vcc
 ; GCN-NEXT:    v_addc_u32_e32 v8, vcc, v13, v11, vcc
 ; GCN-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
@@ -1213,7 +1213,7 @@ define i64 @v_test_urem_pow2_k_num_i64(i64 %x) {
 ; GCN-NEXT:    v_mul_hi_u32 v8, v6, v5
 ; GCN-NEXT:    v_addc_u32_e32 v13, vcc, v12, v14, vcc
 ; GCN-NEXT:    v_mul_lo_u32 v5, v6, v5
-; GCN-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
+; GCN-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
 ; GCN-NEXT:    v_addc_u32_e32 v4, vcc, v13, v9, vcc
 ; GCN-NEXT:    v_addc_u32_e32 v6, vcc, v8, v11, vcc
 ; GCN-NEXT:    v_add_i32_e32 v4, vcc, v4, v5


        


More information about the llvm-commits mailing list