[llvm] 66237d6 - [X86][CodeGen] Add entries for NDD SHLD/SHRD to the commuteInstructionImpl

Tue Jan 23 01:05:26 PST 2024

Author: Shengchen Kan
Date: 2024-01-23T17:05:09+08:00
New Revision: 66237d647ed95d7df92a438f8181c11423addc7d

URL: https://github.com/llvm/llvm-project/commit/66237d647ed95d7df92a438f8181c11423addc7d
DIFF: https://github.com/llvm/llvm-project/commit/66237d647ed95d7df92a438f8181c11423addc7d.diff

LOG: [X86][CodeGen] Add entries for NDD SHLD/SHRD to the commuteInstructionImpl

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86CompressEVEX.cpp
    llvm/lib/Target/X86/X86InstrInfo.cpp
    llvm/test/CodeGen/X86/apx/compress-evex.mir
    llvm/test/CodeGen/X86/apx/shrd.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86CompressEVEX.cpp b/llvm/lib/Target/X86/X86CompressEVEX.cpp
index 78a7e9850abdb2..a9704e30478d13 100644

--- a/llvm/lib/Target/X86/X86CompressEVEX.cpp
+++ b/llvm/lib/Target/X86/X86CompressEVEX.cpp
@@ -200,7 +200,6 @@ static bool isRedundantNewDataDest(MachineInstr &MI, const X86Subtarget &ST) {
       !MI.getOperand(2).isReg() || MI.getOperand(2).getReg() != Reg0)
     return false;
   // Opcode may change after commute, e.g. SHRD -> SHLD
-  // TODO: Add test for this after ND SHRD/SHLD is supported
   ST.getInstrInfo()->commuteInstruction(MI, false, 1, 2);
   return true;
 }

diff  --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index b7c2a5c4771c22..6dcaaf754a18d5 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -2275,7 +2275,13 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
   case X86::SHRD32rri8:
   case X86::SHLD32rri8:
   case X86::SHRD64rri8:
-  case X86::SHLD64rri8: {
+  case X86::SHLD64rri8:
+  case X86::SHRD16rri8_ND:
+  case X86::SHLD16rri8_ND:
+  case X86::SHRD32rri8_ND:
+  case X86::SHLD32rri8_ND:
+  case X86::SHRD64rri8_ND:
+  case X86::SHLD64rri8_ND: {
     unsigned Size;
     switch (Opc) {
     default:
@@ -2304,6 +2310,30 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
       Size = 64;
       Opc = X86::SHRD64rri8;
       break;
+    case X86::SHRD16rri8_ND:
+      Size = 16;
+      Opc = X86::SHLD16rri8_ND;
+      break;
+    case X86::SHLD16rri8_ND:
+      Size = 16;
+      Opc = X86::SHRD16rri8_ND;
+      break;
+    case X86::SHRD32rri8_ND:
+      Size = 32;
+      Opc = X86::SHLD32rri8_ND;
+      break;
+    case X86::SHLD32rri8_ND:
+      Size = 32;
+      Opc = X86::SHRD32rri8_ND;
+      break;
+    case X86::SHRD64rri8_ND:
+      Size = 64;
+      Opc = X86::SHLD64rri8_ND;
+      break;
+    case X86::SHLD64rri8_ND:
+      Size = 64;
+      Opc = X86::SHRD64rri8_ND;
+      break;
     }
     WorkingMI = CloneIfNew(MI);
     WorkingMI->setDesc(get(Opc));

diff  --git a/llvm/test/CodeGen/X86/apx/compress-evex.mir b/llvm/test/CodeGen/X86/apx/compress-evex.mir
index 7e13896e985907..5a3d7ceb10c432 100644
--- a/llvm/test/CodeGen/X86/apx/compress-evex.mir
+++ b/llvm/test/CodeGen/X86/apx/compress-evex.mir
@@ -32,6 +32,16 @@ body:             |
     RET64 $rax
 ...
 ---
+name:            ndd_2_non_ndd_commutable_new_opcode
+body:             |
+  bb.0.entry:
+    liveins: $rdi, $rsi
+    ; CHECK: shldq   $52, %rsi, %rax                 # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0xa4,0xf0,0x34]
+    renamable $rax = ADD64rr_ND killed renamable $rdi, renamable $rsi, implicit-def dead $eflags
+    renamable $rax = SHRD64rri8_ND killed renamable $rsi, killed renamable $rax, 12, implicit-def dead $eflags
+    RET64 $rax
+...
+---
 name:            ndd_2_non_ndd_incommutable
 body:             |
   bb.0.entry:

diff  --git a/llvm/test/CodeGen/X86/apx/shrd.ll b/llvm/test/CodeGen/X86/apx/shrd.ll
index 453d7a5d17e05c..3eaa06b123bd00 100644
--- a/llvm/test/CodeGen/X86/apx/shrd.ll
+++ b/llvm/test/CodeGen/X86/apx/shrd.ll
@@ -157,7 +157,7 @@ entry:
 define i16 @shrd16mri8(ptr %ptr, i16 noundef %b) {
 ; CHECK-LABEL: shrd16mri8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    shrdw $12, %si, (%rdi), %ax
+; CHECK-NEXT:    shldw $4, %si, (%rdi), %ax
 ; CHECK-NEXT:    retq
 entry:
     %a = load i16, ptr %ptr
@@ -168,7 +168,7 @@ entry:
 define i32 @shrd32mri8(ptr %ptr, i32 noundef %b) {
 ; CHECK-LABEL: shrd32mri8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    shrdl $12, %esi, (%rdi), %eax
+; CHECK-NEXT:    shldl $20, %esi, (%rdi), %eax
 ; CHECK-NEXT:    retq
 entry:
     %a = load i32, ptr %ptr
@@ -179,7 +179,7 @@ entry:
 define i64 @shrd64mri8(ptr %ptr, i64 noundef %b) {
 ; CHECK-LABEL: shrd64mri8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    shrdq $12, %rsi, (%rdi), %rax
+; CHECK-NEXT:    shldq $52, %rsi, (%rdi), %rax
 ; CHECK-NEXT:    retq
 entry:
     %a = load i64, ptr %ptr
@@ -190,15 +190,13 @@ entry:
 define void @shrd16mrcl_legacy(ptr %ptr, i16 noundef %b, i8 %cl) {
 ; CHECK-LABEL: shrd16mrcl_legacy:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movzwl (%rdi), %eax
 ; CHECK-NEXT:    andb $15, %dl, %cl
-; CHECK-NEXT:    shrdw %cl, %ax, %si, %ax
-; CHECK-NEXT:    movw %ax, (%rdi)
+; CHECK-NEXT:    shrdw %cl, %si, (%rdi)
 ; CHECK-NEXT:    retq
 entry:
     %a = load i16, ptr %ptr
     %clin = sext i8 %cl to i16
-    %shrd = call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 %clin)
+    %shrd = call i16 @llvm.fshr.i16(i16 %b, i16 %a, i16 %clin)
     store i16 %shrd, ptr %ptr
     ret void
 }
@@ -207,15 +205,13 @@ define void @shrd32mrcl_legacy(ptr %ptr, i32 noundef %b, i8 %cl) {
 ; CHECK-LABEL: shrd32mrcl_legacy:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movl %edx, %ecx
-; CHECK-NEXT:    movl (%rdi), %eax
 ; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT:    shrdl %cl, %eax, %esi, %eax
-; CHECK-NEXT:    movl %eax, (%rdi)
+; CHECK-NEXT:    shrdl %cl, %esi, (%rdi)
 ; CHECK-NEXT:    retq
 entry:
     %a = load i32, ptr %ptr
     %clin = sext i8 %cl to i32
-    %shrd = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %clin)
+    %shrd = call i32 @llvm.fshr.i32(i32 %b, i32 %a, i32 %clin)
     store i32 %shrd, ptr %ptr
     ret void
 }
@@ -224,15 +220,13 @@ define void @shrd64mrcl_legacy(ptr %ptr, i64 noundef %b, i8 %cl) {
 ; CHECK-LABEL: shrd64mrcl_legacy:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movl %edx, %ecx
-; CHECK-NEXT:    movq (%rdi), %rax
 ; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT:    shrdq %cl, %rax, %rsi, %rax
-; CHECK-NEXT:    movq %rax, (%rdi)
+; CHECK-NEXT:    shrdq %cl, %rsi, (%rdi)
 ; CHECK-NEXT:    retq
 entry:
     %a = load i64, ptr %ptr
     %clin = sext i8 %cl to i64
-    %shrd = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %clin)
+    %shrd = call i64 @llvm.fshr.i64(i64 %b, i64 %a, i64 %clin)
     store i64 %shrd, ptr %ptr
     ret void
 }
@@ -240,12 +234,11 @@ entry:
 define void @shrd16mri8_legacy(ptr %ptr, i16 noundef %b) {
 ; CHECK-LABEL: shrd16mri8_legacy:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    shrdw $12, %si, (%rdi), %ax
-; CHECK-NEXT:    movw %ax, (%rdi)
+; CHECK-NEXT:    shrdw $12, %si, (%rdi)
 ; CHECK-NEXT:    retq
 entry:
     %a = load i16, ptr %ptr
-    %shrd = call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 12)
+    %shrd = call i16 @llvm.fshr.i16(i16 %b, i16 %a, i16 12)
     store i16 %shrd, ptr %ptr
     ret void
 }
@@ -253,12 +246,11 @@ entry:
 define void @shrd32mri8_legacy(ptr %ptr, i32 noundef %b) {
 ; CHECK-LABEL: shrd32mri8_legacy:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    shrdl $12, %esi, (%rdi), %eax
-; CHECK-NEXT:    movl %eax, (%rdi)
+; CHECK-NEXT:    shrdl $12, %esi, (%rdi)
 ; CHECK-NEXT:    retq
 entry:
     %a = load i32, ptr %ptr
-    %shrd = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 12)
+    %shrd = call i32 @llvm.fshr.i32(i32 %b, i32 %a, i32 12)
     store i32 %shrd, ptr %ptr
     ret void
 }
@@ -266,12 +258,11 @@ entry:
 define void @shrd64mri8_legacy(ptr %ptr, i64 noundef %b) {
 ; CHECK-LABEL: shrd64mri8_legacy:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    shrdq $12, %rsi, (%rdi), %rax
-; CHECK-NEXT:    movq %rax, (%rdi)
+; CHECK-NEXT:    shrdq $12, %rsi, (%rdi)
 ; CHECK-NEXT:    retq
 entry:
     %a = load i64, ptr %ptr
-    %shrd = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 12)
+    %shrd = call i64 @llvm.fshr.i64(i64 %b, i64 %a, i64 12)
     store i64 %shrd, ptr %ptr
     ret void
 }